Bug Summary

File:lib/Target/ARM/ARMISelLowering.cpp
Warning:line 454, column 18
Excessive padding in 'struct (anonymous at /tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp:454:18)' (8 padding bytes, where 0 is optimal). Optimal fields order: Name, Op, CC, consider reordering the fields or adding explicit padding members

Annotated Source Code

1//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the interfaces that ARM uses to lower LLVM code into a
11// selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#include "ARMBaseInstrInfo.h"
16#include "ARMBaseRegisterInfo.h"
17#include "ARMCallingConv.h"
18#include "ARMConstantPoolValue.h"
19#include "ARMISelLowering.h"
20#include "ARMMachineFunctionInfo.h"
21#include "ARMPerfectShuffle.h"
22#include "ARMRegisterInfo.h"
23#include "ARMSelectionDAGInfo.h"
24#include "ARMSubtarget.h"
25#include "MCTargetDesc/ARMAddressingModes.h"
26#include "MCTargetDesc/ARMBaseInfo.h"
27#include "llvm/ADT/APFloat.h"
28#include "llvm/ADT/APInt.h"
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/BitVector.h"
31#include "llvm/ADT/DenseMap.h"
32#include "llvm/ADT/SmallPtrSet.h"
33#include "llvm/ADT/SmallVector.h"
34#include "llvm/ADT/Statistic.h"
35#include "llvm/ADT/STLExtras.h"
36#include "llvm/ADT/StringExtras.h"
37#include "llvm/ADT/StringSwitch.h"
38#include "llvm/ADT/StringRef.h"
39#include "llvm/ADT/Triple.h"
40#include "llvm/ADT/Twine.h"
41#include "llvm/Analysis/VectorUtils.h"
42#include "llvm/CodeGen/CallingConvLower.h"
43#include "llvm/CodeGen/ISDOpcodes.h"
44#include "llvm/CodeGen/IntrinsicLowering.h"
45#include "llvm/CodeGen/MachineBasicBlock.h"
46#include "llvm/CodeGen/MachineConstantPool.h"
47#include "llvm/CodeGen/MachineFrameInfo.h"
48#include "llvm/CodeGen/MachineFunction.h"
49#include "llvm/CodeGen/MachineInstr.h"
50#include "llvm/CodeGen/MachineInstrBuilder.h"
51#include "llvm/CodeGen/MachineJumpTableInfo.h"
52#include "llvm/CodeGen/MachineMemOperand.h"
53#include "llvm/CodeGen/MachineOperand.h"
54#include "llvm/CodeGen/MachineRegisterInfo.h"
55#include "llvm/CodeGen/MachineValueType.h"
56#include "llvm/CodeGen/RuntimeLibcalls.h"
57#include "llvm/CodeGen/SelectionDAG.h"
58#include "llvm/CodeGen/SelectionDAGNodes.h"
59#include "llvm/CodeGen/ValueTypes.h"
60#include "llvm/IR/Attributes.h"
61#include "llvm/IR/CallingConv.h"
62#include "llvm/IR/Constant.h"
63#include "llvm/IR/Constants.h"
64#include "llvm/IR/Function.h"
65#include "llvm/IR/DataLayout.h"
66#include "llvm/IR/DebugLoc.h"
67#include "llvm/IR/DerivedTypes.h"
68#include "llvm/IR/Function.h"
69#include "llvm/IR/GlobalAlias.h"
70#include "llvm/IR/GlobalValue.h"
71#include "llvm/IR/GlobalVariable.h"
72#include "llvm/IR/IRBuilder.h"
73#include "llvm/IR/InlineAsm.h"
74#include "llvm/IR/Instruction.h"
75#include "llvm/IR/Instructions.h"
76#include "llvm/IR/IntrinsicInst.h"
77#include "llvm/IR/Intrinsics.h"
78#include "llvm/IR/Module.h"
79#include "llvm/IR/Type.h"
80#include "llvm/IR/User.h"
81#include "llvm/IR/Value.h"
82#include "llvm/MC/MCInstrDesc.h"
83#include "llvm/MC/MCInstrItineraries.h"
84#include "llvm/MC/MCRegisterInfo.h"
85#include "llvm/MC/MCSchedule.h"
86#include "llvm/Support/AtomicOrdering.h"
87#include "llvm/Support/BranchProbability.h"
88#include "llvm/Support/Casting.h"
89#include "llvm/Support/CodeGen.h"
90#include "llvm/Support/CommandLine.h"
91#include "llvm/Support/Compiler.h"
92#include "llvm/Support/Debug.h"
93#include "llvm/Support/ErrorHandling.h"
94#include "llvm/Support/KnownBits.h"
95#include "llvm/Support/MathExtras.h"
96#include "llvm/Support/raw_ostream.h"
97#include "llvm/Target/TargetInstrInfo.h"
98#include "llvm/Target/TargetMachine.h"
99#include "llvm/Target/TargetOptions.h"
100#include <algorithm>
101#include <cassert>
102#include <cstdint>
103#include <cstdlib>
104#include <iterator>
105#include <limits>
106#include <tuple>
107#include <string>
108#include <utility>
109#include <vector>
110
111using namespace llvm;
112
113#define DEBUG_TYPE"arm-isel" "arm-isel"
114
115STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"arm-isel", "NumTailCalls"
, "Number of tail calls", {0}, false}
;
116STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt")static llvm::Statistic NumMovwMovt = {"arm-isel", "NumMovwMovt"
, "Number of GAs materialized with movw + movt", {0}, false}
;
117STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments")static llvm::Statistic NumLoopByVals = {"arm-isel", "NumLoopByVals"
, "Number of loops generated for byval arguments", {0}, false
}
;
118STATISTIC(NumConstpoolPromoted,static llvm::Statistic NumConstpoolPromoted = {"arm-isel", "NumConstpoolPromoted"
, "Number of constants with their storage promoted into constant pools"
, {0}, false}
119 "Number of constants with their storage promoted into constant pools")static llvm::Statistic NumConstpoolPromoted = {"arm-isel", "NumConstpoolPromoted"
, "Number of constants with their storage promoted into constant pools"
, {0}, false}
;
120
121static cl::opt<bool>
122ARMInterworking("arm-interworking", cl::Hidden,
123 cl::desc("Enable / disable ARM interworking (for debugging only)"),
124 cl::init(true));
125
126static cl::opt<bool> EnableConstpoolPromotion(
127 "arm-promote-constant", cl::Hidden,
128 cl::desc("Enable / disable promotion of unnamed_addr constants into "
129 "constant pools"),
130 cl::init(true));
131static cl::opt<unsigned> ConstpoolPromotionMaxSize(
132 "arm-promote-constant-max-size", cl::Hidden,
133 cl::desc("Maximum size of constant to promote into a constant pool"),
134 cl::init(64));
135static cl::opt<unsigned> ConstpoolPromotionMaxTotal(
136 "arm-promote-constant-max-total", cl::Hidden,
137 cl::desc("Maximum size of ALL constants to promote into a constant pool"),
138 cl::init(128));
139
140// The APCS parameter registers.
141static const MCPhysReg GPRArgRegs[] = {
142 ARM::R0, ARM::R1, ARM::R2, ARM::R3
143};
144
145void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
146 MVT PromotedBitwiseVT) {
147 if (VT != PromotedLdStVT) {
148 setOperationAction(ISD::LOAD, VT, Promote);
149 AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
150
151 setOperationAction(ISD::STORE, VT, Promote);
152 AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
153 }
154
155 MVT ElemTy = VT.getVectorElementType();
156 if (ElemTy != MVT::f64)
157 setOperationAction(ISD::SETCC, VT, Custom);
158 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
159 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
160 if (ElemTy == MVT::i32) {
161 setOperationAction(ISD::SINT_TO_FP, VT, Custom);
162 setOperationAction(ISD::UINT_TO_FP, VT, Custom);
163 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
164 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
165 } else {
166 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
167 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
168 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
169 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
170 }
171 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
172 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
173 setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
174 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
175 setOperationAction(ISD::SELECT, VT, Expand);
176 setOperationAction(ISD::SELECT_CC, VT, Expand);
177 setOperationAction(ISD::VSELECT, VT, Expand);
178 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
179 if (VT.isInteger()) {
180 setOperationAction(ISD::SHL, VT, Custom);
181 setOperationAction(ISD::SRA, VT, Custom);
182 setOperationAction(ISD::SRL, VT, Custom);
183 }
184
185 // Promote all bit-wise operations.
186 if (VT.isInteger() && VT != PromotedBitwiseVT) {
187 setOperationAction(ISD::AND, VT, Promote);
188 AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
189 setOperationAction(ISD::OR, VT, Promote);
190 AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT);
191 setOperationAction(ISD::XOR, VT, Promote);
192 AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
193 }
194
195 // Neon does not support vector divide/remainder operations.
196 setOperationAction(ISD::SDIV, VT, Expand);
197 setOperationAction(ISD::UDIV, VT, Expand);
198 setOperationAction(ISD::FDIV, VT, Expand);
199 setOperationAction(ISD::SREM, VT, Expand);
200 setOperationAction(ISD::UREM, VT, Expand);
201 setOperationAction(ISD::FREM, VT, Expand);
202
203 if (!VT.isFloatingPoint() &&
204 VT != MVT::v2i64 && VT != MVT::v1i64)
205 for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
206 setOperationAction(Opcode, VT, Legal);
207}
208
209void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
210 addRegisterClass(VT, &ARM::DPRRegClass);
211 addTypeForNEON(VT, MVT::f64, MVT::v2i32);
212}
213
214void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
215 addRegisterClass(VT, &ARM::DPairRegClass);
216 addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
217}
218
219ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
220 const ARMSubtarget &STI)
221 : TargetLowering(TM), Subtarget(&STI) {
222 RegInfo = Subtarget->getRegisterInfo();
223 Itins = Subtarget->getInstrItineraryData();
224
225 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
226
227 if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
228 !Subtarget->isTargetWatchOS()) {
229 const auto &E = Subtarget->getTargetTriple().getEnvironment();
230
231 bool IsHFTarget = E == Triple::EABIHF || E == Triple::GNUEABIHF ||
232 E == Triple::MuslEABIHF;
233 // Windows is a special case. Technically, we will replace all of the "GNU"
234 // calls with calls to MSVCRT if appropriate and adjust the calling
235 // convention then.
236 IsHFTarget = IsHFTarget || Subtarget->isTargetWindows();
237
238 for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
239 setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
240 IsHFTarget ? CallingConv::ARM_AAPCS_VFP
241 : CallingConv::ARM_AAPCS);
242 }
243
244 if (Subtarget->isTargetMachO()) {
245 // Uses VFP for Thumb libfuncs if available.
246 if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
247 Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
248 static const struct {
249 const RTLIB::Libcall Op;
250 const char * const Name;
251 const ISD::CondCode Cond;
252 } LibraryCalls[] = {
253 // Single-precision floating-point arithmetic.
254 { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
255 { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
256 { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
257 { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
258
259 // Double-precision floating-point arithmetic.
260 { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
261 { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
262 { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
263 { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
264
265 // Single-precision comparisons.
266 { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
267 { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
268 { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
269 { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
270 { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
271 { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
272 { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
273 { RTLIB::O_F32, "__unordsf2vfp", ISD::SETEQ },
274
275 // Double-precision comparisons.
276 { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
277 { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
278 { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
279 { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
280 { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
281 { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
282 { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
283 { RTLIB::O_F64, "__unorddf2vfp", ISD::SETEQ },
284
285 // Floating-point to integer conversions.
286 // i64 conversions are done via library routines even when generating VFP
287 // instructions, so use the same ones.
288 { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
289 { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
290 { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
291 { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
292
293 // Conversions between floating types.
294 { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
295 { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
296
297 // Integer to floating-point conversions.
298 // i64 conversions are done via library routines even when generating VFP
299 // instructions, so use the same ones.
300 // FIXME: There appears to be some naming inconsistency in ARM libgcc:
301 // e.g., __floatunsidf vs. __floatunssidfvfp.
302 { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
303 { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
304 { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
305 { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
306 };
307
308 for (const auto &LC : LibraryCalls) {
309 setLibcallName(LC.Op, LC.Name);
310 if (LC.Cond != ISD::SETCC_INVALID)
311 setCmpLibcallCC(LC.Op, LC.Cond);
312 }
313 }
314
315 // Set the correct calling convention for ARMv7k WatchOS. It's just
316 // AAPCS_VFP for functions as simple as libcalls.
317 if (Subtarget->isTargetWatchABI()) {
318 for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i)
319 setLibcallCallingConv((RTLIB::Libcall)i, CallingConv::ARM_AAPCS_VFP);
320 }
321 }
322
323 // These libcalls are not available in 32-bit.
324 setLibcallName(RTLIB::SHL_I128, nullptr);
325 setLibcallName(RTLIB::SRL_I128, nullptr);
326 setLibcallName(RTLIB::SRA_I128, nullptr);
327
328 // RTLIB
329 if (Subtarget->isAAPCS_ABI() &&
330 (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
331 Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
332 static const struct {
333 const RTLIB::Libcall Op;
334 const char * const Name;
335 const CallingConv::ID CC;
336 const ISD::CondCode Cond;
337 } LibraryCalls[] = {
338 // Double-precision floating-point arithmetic helper functions
339 // RTABI chapter 4.1.2, Table 2
340 { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
341 { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
342 { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
343 { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
344
345 // Double-precision floating-point comparison helper functions
346 // RTABI chapter 4.1.2, Table 3
347 { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
348 { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
349 { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
350 { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
351 { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
352 { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
353 { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
354 { RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
355
356 // Single-precision floating-point arithmetic helper functions
357 // RTABI chapter 4.1.2, Table 4
358 { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
359 { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
360 { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
361 { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
362
363 // Single-precision floating-point comparison helper functions
364 // RTABI chapter 4.1.2, Table 5
365 { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
366 { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
367 { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
368 { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
369 { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
370 { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
371 { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
372 { RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
373
374 // Floating-point to integer conversions.
375 // RTABI chapter 4.1.2, Table 6
376 { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
377 { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
378 { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
379 { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
380 { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
381 { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
382 { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
383 { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
384
385 // Conversions between floating types.
386 // RTABI chapter 4.1.2, Table 7
387 { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
388 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
389 { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
390
391 // Integer to floating-point conversions.
392 // RTABI chapter 4.1.2, Table 8
393 { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
394 { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
395 { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
396 { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
397 { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
398 { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
399 { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
400 { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
401
402 // Long long helper functions
403 // RTABI chapter 4.2, Table 9
404 { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
405 { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
406 { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
407 { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
408
409 // Integer division functions
410 // RTABI chapter 4.3.1
411 { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
412 { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
413 { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
414 { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
415 { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
416 { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
417 { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
418 { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
419 };
420
421 for (const auto &LC : LibraryCalls) {
422 setLibcallName(LC.Op, LC.Name);
423 setLibcallCallingConv(LC.Op, LC.CC);
424 if (LC.Cond != ISD::SETCC_INVALID)
425 setCmpLibcallCC(LC.Op, LC.Cond);
426 }
427
428 // EABI dependent RTLIB
429 if (TM.Options.EABIVersion == EABI::EABI4 ||
430 TM.Options.EABIVersion == EABI::EABI5) {
431 static const struct {
432 const RTLIB::Libcall Op;
433 const char *const Name;
434 const CallingConv::ID CC;
435 const ISD::CondCode Cond;
436 } MemOpsLibraryCalls[] = {
437 // Memory operations
438 // RTABI chapter 4.3.4
439 { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
440 { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
441 { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
442 };
443
444 for (const auto &LC : MemOpsLibraryCalls) {
445 setLibcallName(LC.Op, LC.Name);
446 setLibcallCallingConv(LC.Op, LC.CC);
447 if (LC.Cond != ISD::SETCC_INVALID)
448 setCmpLibcallCC(LC.Op, LC.Cond);
449 }
450 }
451 }
452
453 if (Subtarget->isTargetWindows()) {
454 static const struct {
Excessive padding in 'struct (anonymous at /tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp:454:18)' (8 padding bytes, where 0 is optimal). Optimal fields order: Name, Op, CC, consider reordering the fields or adding explicit padding members
455 const RTLIB::Libcall Op;
456 const char * const Name;
457 const CallingConv::ID CC;
458 } LibraryCalls[] = {
459 { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
460 { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
461 { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
462 { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
463 { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
464 { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
465 { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
466 { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
467 };
468
469 for (const auto &LC : LibraryCalls) {
470 setLibcallName(LC.Op, LC.Name);
471 setLibcallCallingConv(LC.Op, LC.CC);
472 }
473 }
474
475 // Use divmod compiler-rt calls for iOS 5.0 and later.
476 if (Subtarget->isTargetMachO() &&
477 !(Subtarget->isTargetIOS() &&
478 Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
479 setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
480 setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
481 }
482
483 // The half <-> float conversion functions are always soft-float on
484 // non-watchos platforms, but are needed for some targets which use a
485 // hard-float calling convention by default.
486 if (!Subtarget->isTargetWatchABI()) {
487 if (Subtarget->isAAPCS_ABI()) {
488 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
489 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
490 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
491 } else {
492 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
493 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
494 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
495 }
496 }
497
498 // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
499 // a __gnu_ prefix (which is the default).
500 if (Subtarget->isTargetAEABI()) {
501 static const struct {
502 const RTLIB::Libcall Op;
503 const char * const Name;
504 const CallingConv::ID CC;
505 } LibraryCalls[] = {
506 { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
507 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
508 { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
509 };
510
511 for (const auto &LC : LibraryCalls) {
512 setLibcallName(LC.Op, LC.Name);
513 setLibcallCallingConv(LC.Op, LC.CC);
514 }
515 }
516
517 if (Subtarget->isThumb1Only())
518 addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
519 else
520 addRegisterClass(MVT::i32, &ARM::GPRRegClass);
521
522 if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
523 !Subtarget->isThumb1Only()) {
524 addRegisterClass(MVT::f32, &ARM::SPRRegClass);
525 addRegisterClass(MVT::f64, &ARM::DPRRegClass);
526 }
527
528 for (MVT VT : MVT::vector_valuetypes()) {
529 for (MVT InnerVT : MVT::vector_valuetypes()) {
530 setTruncStoreAction(VT, InnerVT, Expand);
531 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
532 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
533 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
534 }
535
536 setOperationAction(ISD::MULHS, VT, Expand);
537 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
538 setOperationAction(ISD::MULHU, VT, Expand);
539 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
540
541 setOperationAction(ISD::BSWAP, VT, Expand);
542 }
543
544 setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
545 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
546
547 setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom);
548 setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom);
549
550 if (Subtarget->hasNEON()) {
551 addDRTypeForNEON(MVT::v2f32);
552 addDRTypeForNEON(MVT::v8i8);
553 addDRTypeForNEON(MVT::v4i16);
554 addDRTypeForNEON(MVT::v2i32);
555 addDRTypeForNEON(MVT::v1i64);
556
557 addQRTypeForNEON(MVT::v4f32);
558 addQRTypeForNEON(MVT::v2f64);
559 addQRTypeForNEON(MVT::v16i8);
560 addQRTypeForNEON(MVT::v8i16);
561 addQRTypeForNEON(MVT::v4i32);
562 addQRTypeForNEON(MVT::v2i64);
563
564 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
565 // neither Neon nor VFP support any arithmetic operations on it.
566 // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
567 // supported for v4f32.
568 setOperationAction(ISD::FADD, MVT::v2f64, Expand);
569 setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
570 setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
571 // FIXME: Code duplication: FDIV and FREM are expanded always, see
572 // ARMTargetLowering::addTypeForNEON method for details.
573 setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
574 setOperationAction(ISD::FREM, MVT::v2f64, Expand);
575 // FIXME: Create unittest.
576 // In another words, find a way when "copysign" appears in DAG with vector
577 // operands.
578 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
579 // FIXME: Code duplication: SETCC has custom operation action, see
580 // ARMTargetLowering::addTypeForNEON method for details.
581 setOperationAction(ISD::SETCC, MVT::v2f64, Expand);
582 // FIXME: Create unittest for FNEG and for FABS.
583 setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
584 setOperationAction(ISD::FABS, MVT::v2f64, Expand);
585 setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
586 setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
587 setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
588 setOperationAction(ISD::FPOWI, MVT::v2f64, Expand);
589 setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
590 setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
591 setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
592 setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
593 setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
594 setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
595 // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
596 setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
597 setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
598 setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
599 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
600 setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
601 setOperationAction(ISD::FMA, MVT::v2f64, Expand);
602
603 setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
604 setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
605 setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
606 setOperationAction(ISD::FPOWI, MVT::v4f32, Expand);
607 setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
608 setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
609 setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
610 setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
611 setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
612 setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
613 setOperationAction(ISD::FCEIL, MVT::v4f32, Expand);
614 setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand);
615 setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
616 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
617 setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand);
618
619 // Mark v2f32 intrinsics.
620 setOperationAction(ISD::FSQRT, MVT::v2f32, Expand);
621 setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
622 setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
623 setOperationAction(ISD::FPOWI, MVT::v2f32, Expand);
624 setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
625 setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
626 setOperationAction(ISD::FLOG2, MVT::v2f32, Expand);
627 setOperationAction(ISD::FLOG10, MVT::v2f32, Expand);
628 setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
629 setOperationAction(ISD::FEXP2, MVT::v2f32, Expand);
630 setOperationAction(ISD::FCEIL, MVT::v2f32, Expand);
631 setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand);
632 setOperationAction(ISD::FRINT, MVT::v2f32, Expand);
633 setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand);
634 setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand);
635
636 // Neon does not support some operations on v1i64 and v2i64 types.
637 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
638 // Custom handling for some quad-vector types to detect VMULL.
639 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
640 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
641 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
642 // Custom handling for some vector types to avoid expensive expansions
643 setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
644 setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
645 setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
646 setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
647 // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
648 // a destination type that is wider than the source, and nor does
649 // it have a FP_TO_[SU]INT instruction with a narrower destination than
650 // source.
651 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
652 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
653 setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
654 setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
655
656 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand);
657 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
658
659 // NEON does not have single instruction CTPOP for vectors with element
660 // types wider than 8-bits. However, custom lowering can leverage the
661 // v8i8/v16i8 vcnt instruction.
662 setOperationAction(ISD::CTPOP, MVT::v2i32, Custom);
663 setOperationAction(ISD::CTPOP, MVT::v4i32, Custom);
664 setOperationAction(ISD::CTPOP, MVT::v4i16, Custom);
665 setOperationAction(ISD::CTPOP, MVT::v8i16, Custom);
666 setOperationAction(ISD::CTPOP, MVT::v1i64, Expand);
667 setOperationAction(ISD::CTPOP, MVT::v2i64, Expand);
668
669 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
670 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
671
672 // NEON does not have single instruction CTTZ for vectors.
673 setOperationAction(ISD::CTTZ, MVT::v8i8, Custom);
674 setOperationAction(ISD::CTTZ, MVT::v4i16, Custom);
675 setOperationAction(ISD::CTTZ, MVT::v2i32, Custom);
676 setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
677
678 setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);
679 setOperationAction(ISD::CTTZ, MVT::v8i16, Custom);
680 setOperationAction(ISD::CTTZ, MVT::v4i32, Custom);
681 setOperationAction(ISD::CTTZ, MVT::v2i64, Custom);
682
683 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i8, Custom);
684 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i16, Custom);
685 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i32, Custom);
686 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v1i64, Custom);
687
688 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i8, Custom);
689 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i16, Custom);
690 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom);
691 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom);
692
693 // NEON only has FMA instructions as of VFP4.
694 if (!Subtarget->hasVFP4()) {
695 setOperationAction(ISD::FMA, MVT::v2f32, Expand);
696 setOperationAction(ISD::FMA, MVT::v4f32, Expand);
697 }
698
699 setTargetDAGCombine(ISD::INTRINSIC_VOID);
700 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
701 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
702 setTargetDAGCombine(ISD::SHL);
703 setTargetDAGCombine(ISD::SRL);
704 setTargetDAGCombine(ISD::SRA);
705 setTargetDAGCombine(ISD::SIGN_EXTEND);
706 setTargetDAGCombine(ISD::ZERO_EXTEND);
707 setTargetDAGCombine(ISD::ANY_EXTEND);
708 setTargetDAGCombine(ISD::BUILD_VECTOR);
709 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
710 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
711 setTargetDAGCombine(ISD::STORE);
712 setTargetDAGCombine(ISD::FP_TO_SINT);
713 setTargetDAGCombine(ISD::FP_TO_UINT);
714 setTargetDAGCombine(ISD::FDIV);
715 setTargetDAGCombine(ISD::LOAD);
716
717 // It is legal to extload from v4i8 to v4i16 or v4i32.
718 for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16,
719 MVT::v2i32}) {
720 for (MVT VT : MVT::integer_vector_valuetypes()) {
721 setLoadExtAction(ISD::EXTLOAD, VT, Ty, Legal);
722 setLoadExtAction(ISD::ZEXTLOAD, VT, Ty, Legal);
723 setLoadExtAction(ISD::SEXTLOAD, VT, Ty, Legal);
724 }
725 }
726 }
727
728 if (Subtarget->isFPOnlySP()) {
729 // When targeting a floating-point unit with only single-precision
730 // operations, f64 is legal for the few double-precision instructions which
731 // are present However, no double-precision operations other than moves,
732 // loads and stores are provided by the hardware.
733 setOperationAction(ISD::FADD, MVT::f64, Expand);
734 setOperationAction(ISD::FSUB, MVT::f64, Expand);
735 setOperationAction(ISD::FMUL, MVT::f64, Expand);
736 setOperationAction(ISD::FMA, MVT::f64, Expand);
737 setOperationAction(ISD::FDIV, MVT::f64, Expand);
738 setOperationAction(ISD::FREM, MVT::f64, Expand);
739 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
740 setOperationAction(ISD::FGETSIGN, MVT::f64, Expand);
741 setOperationAction(ISD::FNEG, MVT::f64, Expand);
742 setOperationAction(ISD::FABS, MVT::f64, Expand);
743 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
744 setOperationAction(ISD::FSIN, MVT::f64, Expand);
745 setOperationAction(ISD::FCOS, MVT::f64, Expand);
746 setOperationAction(ISD::FPOWI, MVT::f64, Expand);
747 setOperationAction(ISD::FPOW, MVT::f64, Expand);
748 setOperationAction(ISD::FLOG, MVT::f64, Expand);
749 setOperationAction(ISD::FLOG2, MVT::f64, Expand);
750 setOperationAction(ISD::FLOG10, MVT::f64, Expand);
751 setOperationAction(ISD::FEXP, MVT::f64, Expand);
752 setOperationAction(ISD::FEXP2, MVT::f64, Expand);
753 setOperationAction(ISD::FCEIL, MVT::f64, Expand);
754 setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
755 setOperationAction(ISD::FRINT, MVT::f64, Expand);
756 setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
757 setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
758 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
759 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
760 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
761 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
762 setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom);
763 setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom);
764 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
765 setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
766 }
767
768 computeRegisterProperties(Subtarget->getRegisterInfo());
769
770 // ARM does not have floating-point extending loads.
771 for (MVT VT : MVT::fp_valuetypes()) {
772 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
773 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
774 }
775
776 // ... or truncating stores
777 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
778 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
779 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
780
781 // ARM does not have i1 sign extending load.
782 for (MVT VT : MVT::integer_valuetypes())
783 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
784
785 // ARM supports all 4 flavors of integer indexed load / store.
786 if (!Subtarget->isThumb1Only()) {
787 for (unsigned im = (unsigned)ISD::PRE_INC;
788 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
789 setIndexedLoadAction(im, MVT::i1, Legal);
790 setIndexedLoadAction(im, MVT::i8, Legal);
791 setIndexedLoadAction(im, MVT::i16, Legal);
792 setIndexedLoadAction(im, MVT::i32, Legal);
793 setIndexedStoreAction(im, MVT::i1, Legal);
794 setIndexedStoreAction(im, MVT::i8, Legal);
795 setIndexedStoreAction(im, MVT::i16, Legal);
796 setIndexedStoreAction(im, MVT::i32, Legal);
797 }
798 } else {
799 // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
800 setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal);
801 setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal);
802 }
803
804 setOperationAction(ISD::SADDO, MVT::i32, Custom);
805 setOperationAction(ISD::UADDO, MVT::i32, Custom);
806 setOperationAction(ISD::SSUBO, MVT::i32, Custom);
807 setOperationAction(ISD::USUBO, MVT::i32, Custom);
808
809 // i64 operation support.
810 setOperationAction(ISD::MUL, MVT::i64, Expand);
811 setOperationAction(ISD::MULHU, MVT::i32, Expand);
812 if (Subtarget->isThumb1Only()) {
813 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
814 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
815 }
816 if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
817 || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
818 setOperationAction(ISD::MULHS, MVT::i32, Expand);
819
820 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
821 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
822 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
823 setOperationAction(ISD::SRL, MVT::i64, Custom);
824 setOperationAction(ISD::SRA, MVT::i64, Custom);
825 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
826
827 setOperationAction(ISD::ADDC, MVT::i32, Custom);
828 setOperationAction(ISD::ADDE, MVT::i32, Custom);
829 setOperationAction(ISD::SUBC, MVT::i32, Custom);
830 setOperationAction(ISD::SUBE, MVT::i32, Custom);
831
832 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
833 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
834
835 // ARM does not have ROTL.
836 setOperationAction(ISD::ROTL, MVT::i32, Expand);
837 for (MVT VT : MVT::vector_valuetypes()) {
838 setOperationAction(ISD::ROTL, VT, Expand);
839 setOperationAction(ISD::ROTR, VT, Expand);
840 }
841 setOperationAction(ISD::CTTZ, MVT::i32, Custom);
842 setOperationAction(ISD::CTPOP, MVT::i32, Expand);
843 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
844 setOperationAction(ISD::CTLZ, MVT::i32, Expand);
845
846 // @llvm.readcyclecounter requires the Performance Monitors extension.
847 // Default to the 0 expansion on unsupported platforms.
848 // FIXME: Technically there are older ARM CPUs that have
849 // implementation-specific ways of obtaining this information.
850 if (Subtarget->hasPerfMon())
851 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
852
853 // Only ARMv6 has BSWAP.
854 if (!Subtarget->hasV6Ops())
855 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
856
857 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
858 : Subtarget->hasDivideInARMMode();
859 if (!hasDivide) {
860 // These are expanded into libcalls if the cpu doesn't have HW divider.
861 setOperationAction(ISD::SDIV, MVT::i32, LibCall);
862 setOperationAction(ISD::UDIV, MVT::i32, LibCall);
863 }
864
865 if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
866 setOperationAction(ISD::SDIV, MVT::i32, Custom);
867 setOperationAction(ISD::UDIV, MVT::i32, Custom);
868
869 setOperationAction(ISD::SDIV, MVT::i64, Custom);
870 setOperationAction(ISD::UDIV, MVT::i64, Custom);
871 }
872
873 setOperationAction(ISD::SREM, MVT::i32, Expand);
874 setOperationAction(ISD::UREM, MVT::i32, Expand);
875
876 // Register based DivRem for AEABI (RTABI 4.2)
877 if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
878 Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
879 Subtarget->isTargetWindows()) {
880 setOperationAction(ISD::SREM, MVT::i64, Custom);
881 setOperationAction(ISD::UREM, MVT::i64, Custom);
882 HasStandaloneRem = false;
883
884 if (Subtarget->isTargetWindows()) {
885 const struct {
886 const RTLIB::Libcall Op;
887 const char * const Name;
888 const CallingConv::ID CC;
889 } LibraryCalls[] = {
890 { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
891 { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
892 { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
893 { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
894
895 { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
896 { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
897 { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
898 { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
899 };
900
901 for (const auto &LC : LibraryCalls) {
902 setLibcallName(LC.Op, LC.Name);
903 setLibcallCallingConv(LC.Op, LC.CC);
904 }
905 } else {
906 const struct {
907 const RTLIB::Libcall Op;
908 const char * const Name;
909 const CallingConv::ID CC;
910 } LibraryCalls[] = {
911 { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
912 { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
913 { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
914 { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
915
916 { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
917 { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
918 { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
919 { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
920 };
921
922 for (const auto &LC : LibraryCalls) {
923 setLibcallName(LC.Op, LC.Name);
924 setLibcallCallingConv(LC.Op, LC.CC);
925 }
926 }
927
928 setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
929 setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
930 setOperationAction(ISD::SDIVREM, MVT::i64, Custom);
931 setOperationAction(ISD::UDIVREM, MVT::i64, Custom);
932 } else {
933 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
934 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
935 }
936
937 if (Subtarget->isTargetWindows() && Subtarget->getTargetTriple().isOSMSVCRT())
938 for (auto &VT : {MVT::f32, MVT::f64})
939 setOperationAction(ISD::FPOWI, VT, Custom);
940
941 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
942 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
943 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
944 setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
945
946 setOperationAction(ISD::TRAP, MVT::Other, Legal);
947
948 // Use the default implementation.
949 setOperationAction(ISD::VASTART, MVT::Other, Custom);
950 setOperationAction(ISD::VAARG, MVT::Other, Expand);
951 setOperationAction(ISD::VACOPY, MVT::Other, Expand);
952 setOperationAction(ISD::VAEND, MVT::Other, Expand);
953 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
954 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
955
956 if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
957 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
958 else
959 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
960
961 // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
962 // the default expansion.
963 InsertFencesForAtomic = false;
964 if (Subtarget->hasAnyDataBarrier() &&
965 (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
966 // ATOMIC_FENCE needs custom lowering; the others should have been expanded
967 // to ldrex/strex loops already.
968 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
969 if (!Subtarget->isThumb() || !Subtarget->isMClass())
970 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
971
972 // On v8, we have particularly efficient implementations of atomic fences
973 // if they can be combined with nearby atomic loads and stores.
974 if (!Subtarget->hasV8Ops() || getTargetMachine().getOptLevel() == 0) {
975 // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
976 InsertFencesForAtomic = true;
977 }
978 } else {
979 // If there's anything we can use as a barrier, go through custom lowering
980 // for ATOMIC_FENCE.
981 // If target has DMB in thumb, Fences can be inserted.
982 if (Subtarget->hasDataBarrier())
983 InsertFencesForAtomic = true;
984
985 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other,
986 Subtarget->hasAnyDataBarrier() ? Custom : Expand);
987
988 // Set them all for expansion, which will force libcalls.
989 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
990 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
991 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
992 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
993 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
994 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
995 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
996 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
997 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
998 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
999 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
1000 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
1001 // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
1002 // Unordered/Monotonic case.
1003 if (!InsertFencesForAtomic) {
1004 setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
1005 setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
1006 }
1007 }
1008
1009 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
1010
1011 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1012 if (!Subtarget->hasV6Ops()) {
1013 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
1014 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
1015 }
1016 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
1017
1018 if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
1019 !Subtarget->isThumb1Only()) {
1020 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1021 // iff target supports vfp2.
1022 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
1023 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
1024 }
1025
1026 // We want to custom lower some of our intrinsics.
1027 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
1028 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
1029 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
1030 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
1031 if (Subtarget->useSjLjEH())
1032 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1033
1034 setOperationAction(ISD::SETCC, MVT::i32, Expand);
1035 setOperationAction(ISD::SETCC, MVT::f32, Expand);
1036 setOperationAction(ISD::SETCC, MVT::f64, Expand);
1037 setOperationAction(ISD::SELECT, MVT::i32, Custom);
1038 setOperationAction(ISD::SELECT, MVT::f32, Custom);
1039 setOperationAction(ISD::SELECT, MVT::f64, Custom);
1040 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
1041 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
1042 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
1043
1044 // Thumb-1 cannot currently select ARMISD::SUBE.
1045 if (!Subtarget->isThumb1Only())
1046 setOperationAction(ISD::SETCCE, MVT::i32, Custom);
1047
1048 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
1049 setOperationAction(ISD::BR_CC, MVT::i32, Custom);
1050 setOperationAction(ISD::BR_CC, MVT::f32, Custom);
1051 setOperationAction(ISD::BR_CC, MVT::f64, Custom);
1052 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
1053
1054 // We don't support sin/cos/fmod/copysign/pow
1055 setOperationAction(ISD::FSIN, MVT::f64, Expand);
1056 setOperationAction(ISD::FSIN, MVT::f32, Expand);
1057 setOperationAction(ISD::FCOS, MVT::f32, Expand);
1058 setOperationAction(ISD::FCOS, MVT::f64, Expand);
1059 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
1060 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
1061 setOperationAction(ISD::FREM, MVT::f64, Expand);
1062 setOperationAction(ISD::FREM, MVT::f32, Expand);
1063 if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
1064 !Subtarget->isThumb1Only()) {
1065 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
1066 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
1067 }
1068 setOperationAction(ISD::FPOW, MVT::f64, Expand);
1069 setOperationAction(ISD::FPOW, MVT::f32, Expand);
1070
1071 if (!Subtarget->hasVFP4()) {
1072 setOperationAction(ISD::FMA, MVT::f64, Expand);
1073 setOperationAction(ISD::FMA, MVT::f32, Expand);
1074 }
1075
1076 // Various VFP goodness
1077 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1078 // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1079 if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) {
1080 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
1081 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
1082 }
1083
1084 // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1085 if (!Subtarget->hasFP16()) {
1086 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
1087 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
1088 }
1089 }
1090
1091 // Combine sin / cos into one node or libcall if possible.
1092 if (Subtarget->hasSinCos()) {
1093 setLibcallName(RTLIB::SINCOS_F32, "sincosf");
1094 setLibcallName(RTLIB::SINCOS_F64, "sincos");
1095 if (Subtarget->isTargetWatchABI()) {
1096 setLibcallCallingConv(RTLIB::SINCOS_F32, CallingConv::ARM_AAPCS_VFP);
1097 setLibcallCallingConv(RTLIB::SINCOS_F64, CallingConv::ARM_AAPCS_VFP);
1098 }
1099 if (Subtarget->isTargetIOS() || Subtarget->isTargetWatchOS()) {
1100 // For iOS, we don't want to the normal expansion of a libcall to
1101 // sincos. We want to issue a libcall to __sincos_stret.
1102 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
1103 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
1104 }
1105 }
1106
1107 // FP-ARMv8 implements a lot of rounding-like FP operations.
1108 if (Subtarget->hasFPARMv8()) {
1109 setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
1110 setOperationAction(ISD::FCEIL, MVT::f32, Legal);
1111 setOperationAction(ISD::FROUND, MVT::f32, Legal);
1112 setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
1113 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
1114 setOperationAction(ISD::FRINT, MVT::f32, Legal);
1115 setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
1116 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
1117 setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal);
1118 setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal);
1119 setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
1120 setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
1121
1122 if (!Subtarget->isFPOnlySP()) {
1123 setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
1124 setOperationAction(ISD::FCEIL, MVT::f64, Legal);
1125 setOperationAction(ISD::FROUND, MVT::f64, Legal);
1126 setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
1127 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
1128 setOperationAction(ISD::FRINT, MVT::f64, Legal);
1129 setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
1130 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
1131 }
1132 }
1133
1134 if (Subtarget->hasNEON()) {
1135 // vmin and vmax aren't available in a scalar form, so we use
1136 // a NEON instruction with an undef lane instead.
1137 setOperationAction(ISD::FMINNAN, MVT::f32, Legal);
1138 setOperationAction(ISD::FMAXNAN, MVT::f32, Legal);
1139 setOperationAction(ISD::FMINNAN, MVT::v2f32, Legal);
1140 setOperationAction(ISD::FMAXNAN, MVT::v2f32, Legal);
1141 setOperationAction(ISD::FMINNAN, MVT::v4f32, Legal);
1142 setOperationAction(ISD::FMAXNAN, MVT::v4f32, Legal);
1143 }
1144
1145 // We have target-specific dag combine patterns for the following nodes:
1146 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1147 setTargetDAGCombine(ISD::ADD);
1148 setTargetDAGCombine(ISD::SUB);
1149 setTargetDAGCombine(ISD::MUL);
1150 setTargetDAGCombine(ISD::AND);
1151 setTargetDAGCombine(ISD::OR);
1152 setTargetDAGCombine(ISD::XOR);
1153
1154 if (Subtarget->hasV6Ops())
1155 setTargetDAGCombine(ISD::SRL);
1156
1157 setStackPointerRegisterToSaveRestore(ARM::SP);
1158
1159 if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1160 !Subtarget->hasVFP2())
1161 setSchedulingPreference(Sched::RegPressure);
1162 else
1163 setSchedulingPreference(Sched::Hybrid);
1164
1165 //// temporary - rewrite interface to use type
1166 MaxStoresPerMemset = 8;
1167 MaxStoresPerMemsetOptSize = 4;
1168 MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1169 MaxStoresPerMemcpyOptSize = 2;
1170 MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1171 MaxStoresPerMemmoveOptSize = 2;
1172
1173 // On ARM arguments smaller than 4 bytes are extended, so all arguments
1174 // are at least 4 bytes aligned.
1175 setMinStackArgumentAlignment(4);
1176
1177 // Prefer likely predicted branches to selects on out-of-order cores.
1178 PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1179
1180 setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
1181}
1182
1183bool ARMTargetLowering::useSoftFloat() const {
1184 return Subtarget->useSoftFloat();
1185}
1186
1187// FIXME: It might make sense to define the representative register class as the
1188// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1189// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1190// SPR's representative would be DPR_VFP2. This should work well if register
1191// pressure tracking were modified such that a register use would increment the
1192// pressure of the register class's representative and all of it's super
1193// classes' representatives transitively. We have not implemented this because
1194// of the difficulty prior to coalescing of modeling operand register classes
1195// due to the common occurrence of cross class copies and subregister insertions
1196// and extractions.
1197std::pair<const TargetRegisterClass *, uint8_t>
1198ARMTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
1199 MVT VT) const {
1200 const TargetRegisterClass *RRC = nullptr;
1201 uint8_t Cost = 1;
1202 switch (VT.SimpleTy) {
1203 default:
1204 return TargetLowering::findRepresentativeClass(TRI, VT);
1205 // Use DPR as representative register class for all floating point
1206 // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1207 // the cost is 1 for both f32 and f64.
1208 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1209 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1210 RRC = &ARM::DPRRegClass;
1211 // When NEON is used for SP, only half of the register file is available
1212 // because operations that define both SP and DP results will be constrained
1213 // to the VFP2 class (D0-D15). We currently model this constraint prior to
1214 // coalescing by double-counting the SP regs. See the FIXME above.
1215 if (Subtarget->useNEONForSinglePrecisionFP())
1216 Cost = 2;
1217 break;
1218 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1219 case MVT::v4f32: case MVT::v2f64:
1220 RRC = &ARM::DPRRegClass;
1221 Cost = 2;
1222 break;
1223 case MVT::v4i64:
1224 RRC = &ARM::DPRRegClass;
1225 Cost = 4;
1226 break;
1227 case MVT::v8i64:
1228 RRC = &ARM::DPRRegClass;
1229 Cost = 8;
1230 break;
1231 }
1232 return std::make_pair(RRC, Cost);
1233}
1234
1235const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1236 switch ((ARMISD::NodeType)Opcode) {
1237 case ARMISD::FIRST_NUMBER: break;
1238 case ARMISD::Wrapper: return "ARMISD::Wrapper";
1239 case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC";
1240 case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
1241 case ARMISD::COPY_STRUCT_BYVAL: return "ARMISD::COPY_STRUCT_BYVAL";
1242 case ARMISD::CALL: return "ARMISD::CALL";
1243 case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
1244 case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
1245 case ARMISD::BRCOND: return "ARMISD::BRCOND";
1246 case ARMISD::BR_JT: return "ARMISD::BR_JT";
1247 case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
1248 case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
1249 case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG";
1250 case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
1251 case ARMISD::CMP: return "ARMISD::CMP";
1252 case ARMISD::CMN: return "ARMISD::CMN";
1253 case ARMISD::CMPZ: return "ARMISD::CMPZ";
1254 case ARMISD::CMPFP: return "ARMISD::CMPFP";
1255 case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
1256 case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
1257 case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
1258
1259 case ARMISD::CMOV: return "ARMISD::CMOV";
1260
1261 case ARMISD::SSAT: return "ARMISD::SSAT";
1262
1263 case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
1264 case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
1265 case ARMISD::RRX: return "ARMISD::RRX";
1266
1267 case ARMISD::ADDC: return "ARMISD::ADDC";
1268 case ARMISD::ADDE: return "ARMISD::ADDE";
1269 case ARMISD::SUBC: return "ARMISD::SUBC";
1270 case ARMISD::SUBE: return "ARMISD::SUBE";
1271
1272 case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
1273 case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
1274
1275 case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
1276 case ARMISD::EH_SJLJ_LONGJMP: return "ARMISD::EH_SJLJ_LONGJMP";
1277 case ARMISD::EH_SJLJ_SETUP_DISPATCH: return "ARMISD::EH_SJLJ_SETUP_DISPATCH";
1278
1279 case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
1280
1281 case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
1282
1283 case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
1284
1285 case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
1286
1287 case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
1288
1289 case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK";
1290 case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK";
1291
1292 case ARMISD::VCEQ: return "ARMISD::VCEQ";
1293 case ARMISD::VCEQZ: return "ARMISD::VCEQZ";
1294 case ARMISD::VCGE: return "ARMISD::VCGE";
1295 case ARMISD::VCGEZ: return "ARMISD::VCGEZ";
1296 case ARMISD::VCLEZ: return "ARMISD::VCLEZ";
1297 case ARMISD::VCGEU: return "ARMISD::VCGEU";
1298 case ARMISD::VCGT: return "ARMISD::VCGT";
1299 case ARMISD::VCGTZ: return "ARMISD::VCGTZ";
1300 case ARMISD::VCLTZ: return "ARMISD::VCLTZ";
1301 case ARMISD::VCGTU: return "ARMISD::VCGTU";
1302 case ARMISD::VTST: return "ARMISD::VTST";
1303
1304 case ARMISD::VSHL: return "ARMISD::VSHL";
1305 case ARMISD::VSHRs: return "ARMISD::VSHRs";
1306 case ARMISD::VSHRu: return "ARMISD::VSHRu";
1307 case ARMISD::VRSHRs: return "ARMISD::VRSHRs";
1308 case ARMISD::VRSHRu: return "ARMISD::VRSHRu";
1309 case ARMISD::VRSHRN: return "ARMISD::VRSHRN";
1310 case ARMISD::VQSHLs: return "ARMISD::VQSHLs";
1311 case ARMISD::VQSHLu: return "ARMISD::VQSHLu";
1312 case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu";
1313 case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs";
1314 case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu";
1315 case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu";
1316 case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs";
1317 case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu";
1318 case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu";
1319 case ARMISD::VSLI: return "ARMISD::VSLI";
1320 case ARMISD::VSRI: return "ARMISD::VSRI";
1321 case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";
1322 case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
1323 case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM";
1324 case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM";
1325 case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM";
1326 case ARMISD::VDUP: return "ARMISD::VDUP";
1327 case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE";
1328 case ARMISD::VEXT: return "ARMISD::VEXT";
1329 case ARMISD::VREV64: return "ARMISD::VREV64";
1330 case ARMISD::VREV32: return "ARMISD::VREV32";
1331 case ARMISD::VREV16: return "ARMISD::VREV16";
1332 case ARMISD::VZIP: return "ARMISD::VZIP";
1333 case ARMISD::VUZP: return "ARMISD::VUZP";
1334 case ARMISD::VTRN: return "ARMISD::VTRN";
1335 case ARMISD::VTBL1: return "ARMISD::VTBL1";
1336 case ARMISD::VTBL2: return "ARMISD::VTBL2";
1337 case ARMISD::VMULLs: return "ARMISD::VMULLs";
1338 case ARMISD::VMULLu: return "ARMISD::VMULLu";
1339 case ARMISD::UMAAL: return "ARMISD::UMAAL";
1340 case ARMISD::UMLAL: return "ARMISD::UMLAL";
1341 case ARMISD::SMLAL: return "ARMISD::SMLAL";
1342 case ARMISD::SMLALBB: return "ARMISD::SMLALBB";
1343 case ARMISD::SMLALBT: return "ARMISD::SMLALBT";
1344 case ARMISD::SMLALTB: return "ARMISD::SMLALTB";
1345 case ARMISD::SMLALTT: return "ARMISD::SMLALTT";
1346 case ARMISD::SMULWB: return "ARMISD::SMULWB";
1347 case ARMISD::SMULWT: return "ARMISD::SMULWT";
1348 case ARMISD::SMLALD: return "ARMISD::SMLALD";
1349 case ARMISD::SMLALDX: return "ARMISD::SMLALDX";
1350 case ARMISD::SMLSLD: return "ARMISD::SMLSLD";
1351 case ARMISD::SMLSLDX: return "ARMISD::SMLSLDX";
1352 case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
1353 case ARMISD::BFI: return "ARMISD::BFI";
1354 case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
1355 case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
1356 case ARMISD::VBSL: return "ARMISD::VBSL";
1357 case ARMISD::MEMCPY: return "ARMISD::MEMCPY";
1358 case ARMISD::VLD1DUP: return "ARMISD::VLD1DUP";
1359 case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
1360 case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
1361 case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
1362 case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD";
1363 case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD";
1364 case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD";
1365 case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD";
1366 case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD";
1367 case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD";
1368 case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD";
1369 case ARMISD::VLD1DUP_UPD: return "ARMISD::VLD1DUP_UPD";
1370 case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD";
1371 case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD";
1372 case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD";
1373 case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD";
1374 case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD";
1375 case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD";
1376 case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD";
1377 case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD";
1378 case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";
1379 case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";
1380 }
1381 return nullptr;
1382}
1383
1384EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
1385 EVT VT) const {
1386 if (!VT.isVector())
1387 return getPointerTy(DL);
1388 return VT.changeVectorElementTypeToInteger();
1389}
1390
1391/// getRegClassFor - Return the register class that should be used for the
1392/// specified value type.
1393const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const {
1394 // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1395 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1396 // load / store 4 to 8 consecutive D registers.
1397 if (Subtarget->hasNEON()) {
1398 if (VT == MVT::v4i64)
1399 return &ARM::QQPRRegClass;
1400 if (VT == MVT::v8i64)
1401 return &ARM::QQQQPRRegClass;
1402 }
1403 return TargetLowering::getRegClassFor(VT);
1404}
1405
1406// memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1407// source/dest is aligned and the copy size is large enough. We therefore want
1408// to align such objects passed to memory intrinsics.
1409bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
1410 unsigned &PrefAlign) const {
1411 if (!isa<MemIntrinsic>(CI))
1412 return false;
1413 MinSize = 8;
1414 // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1415 // cycle faster than 4-byte aligned LDM.
1416 PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
1417 return true;
1418}
1419
1420// Create a fast isel object.
1421FastISel *
1422ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
1423 const TargetLibraryInfo *libInfo) const {
1424 return ARM::createFastISel(funcInfo, libInfo);
1425}
1426
1427Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
1428 unsigned NumVals = N->getNumValues();
1429 if (!NumVals)
1430 return Sched::RegPressure;
1431
1432 for (unsigned i = 0; i != NumVals; ++i) {
1433 EVT VT = N->getValueType(i);
1434 if (VT == MVT::Glue || VT == MVT::Other)
1435 continue;
1436 if (VT.isFloatingPoint() || VT.isVector())
1437 return Sched::ILP;
1438 }
1439
1440 if (!N->isMachineOpcode())
1441 return Sched::RegPressure;
1442
1443 // Load are scheduled for latency even if there instruction itinerary
1444 // is not available.
1445 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1446 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1447
1448 if (MCID.getNumDefs() == 0)
1449 return Sched::RegPressure;
1450 if (!Itins->isEmpty() &&
1451 Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
1452 return Sched::ILP;
1453
1454 return Sched::RegPressure;
1455}
1456
1457//===----------------------------------------------------------------------===//
1458// Lowering Code
1459//===----------------------------------------------------------------------===//
1460
1461static bool isSRL16(const SDValue &Op) {
1462 if (Op.getOpcode() != ISD::SRL)
1463 return false;
1464 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1465 return Const->getZExtValue() == 16;
1466 return false;
1467}
1468
1469static bool isSRA16(const SDValue &Op) {
1470 if (Op.getOpcode() != ISD::SRA)
1471 return false;
1472 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1473 return Const->getZExtValue() == 16;
1474 return false;
1475}
1476
1477static bool isSHL16(const SDValue &Op) {
1478 if (Op.getOpcode() != ISD::SHL)
1479 return false;
1480 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1481 return Const->getZExtValue() == 16;
1482 return false;
1483}
1484
1485// Check for a signed 16-bit value. We special case SRA because it makes it
1486// more simple when also looking for SRAs that aren't sign extending a
1487// smaller value. Without the check, we'd need to take extra care with
1488// checking order for some operations.
1489static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
1490 if (isSRA16(Op))
1491 return isSHL16(Op.getOperand(0));
1492 return DAG.ComputeNumSignBits(Op) == 17;
1493}
1494
1495/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
1496static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
1497 switch (CC) {
1498 default: llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 1498)
;
1499 case ISD::SETNE: return ARMCC::NE;
1500 case ISD::SETEQ: return ARMCC::EQ;
1501 case ISD::SETGT: return ARMCC::GT;
1502 case ISD::SETGE: return ARMCC::GE;
1503 case ISD::SETLT: return ARMCC::LT;
1504 case ISD::SETLE: return ARMCC::LE;
1505 case ISD::SETUGT: return ARMCC::HI;
1506 case ISD::SETUGE: return ARMCC::HS;
1507 case ISD::SETULT: return ARMCC::LO;
1508 case ISD::SETULE: return ARMCC::LS;
1509 }
1510}
1511
1512/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
1513static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
1514 ARMCC::CondCodes &CondCode2, bool &InvalidOnQNaN) {
1515 CondCode2 = ARMCC::AL;
1516 InvalidOnQNaN = true;
1517 switch (CC) {
1518 default: llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 1518)
;
1519 case ISD::SETEQ:
1520 case ISD::SETOEQ:
1521 CondCode = ARMCC::EQ;
1522 InvalidOnQNaN = false;
1523 break;
1524 case ISD::SETGT:
1525 case ISD::SETOGT: CondCode = ARMCC::GT; break;
1526 case ISD::SETGE:
1527 case ISD::SETOGE: CondCode = ARMCC::GE; break;
1528 case ISD::SETOLT: CondCode = ARMCC::MI; break;
1529 case ISD::SETOLE: CondCode = ARMCC::LS; break;
1530 case ISD::SETONE:
1531 CondCode = ARMCC::MI;
1532 CondCode2 = ARMCC::GT;
1533 InvalidOnQNaN = false;
1534 break;
1535 case ISD::SETO: CondCode = ARMCC::VC; break;
1536 case ISD::SETUO: CondCode = ARMCC::VS; break;
1537 case ISD::SETUEQ:
1538 CondCode = ARMCC::EQ;
1539 CondCode2 = ARMCC::VS;
1540 InvalidOnQNaN = false;
1541 break;
1542 case ISD::SETUGT: CondCode = ARMCC::HI; break;
1543 case ISD::SETUGE: CondCode = ARMCC::PL; break;
1544 case ISD::SETLT:
1545 case ISD::SETULT: CondCode = ARMCC::LT; break;
1546 case ISD::SETLE:
1547 case ISD::SETULE: CondCode = ARMCC::LE; break;
1548 case ISD::SETNE:
1549 case ISD::SETUNE:
1550 CondCode = ARMCC::NE;
1551 InvalidOnQNaN = false;
1552 break;
1553 }
1554}
1555
1556//===----------------------------------------------------------------------===//
1557// Calling Convention Implementation
1558//===----------------------------------------------------------------------===//
1559
1560#include "ARMGenCallingConv.inc"
1561
1562/// getEffectiveCallingConv - Get the effective calling convention, taking into
1563/// account presence of floating point hardware and calling convention
1564/// limitations, such as support for variadic functions.
1565CallingConv::ID
1566ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
1567 bool isVarArg) const {
1568 switch (CC) {
1569 default:
1570 llvm_unreachable("Unsupported calling convention")::llvm::llvm_unreachable_internal("Unsupported calling convention"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 1570)
;
1571 case CallingConv::ARM_AAPCS:
1572 case CallingConv::ARM_APCS:
1573 case CallingConv::GHC:
1574 return CC;
1575 case CallingConv::PreserveMost:
1576 return CallingConv::PreserveMost;
1577 case CallingConv::ARM_AAPCS_VFP:
1578 case CallingConv::Swift:
1579 return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP;
1580 case CallingConv::C:
1581 if (!Subtarget->isAAPCS_ABI())
1582 return CallingConv::ARM_APCS;
1583 else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() &&
1584 getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
1585 !isVarArg)
1586 return CallingConv::ARM_AAPCS_VFP;
1587 else
1588 return CallingConv::ARM_AAPCS;
1589 case CallingConv::Fast:
1590 case CallingConv::CXX_FAST_TLS:
1591 if (!Subtarget->isAAPCS_ABI()) {
1592 if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
1593 return CallingConv::Fast;
1594 return CallingConv::ARM_APCS;
1595 } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
1596 return CallingConv::ARM_AAPCS_VFP;
1597 else
1598 return CallingConv::ARM_AAPCS;
1599 }
1600}
1601
1602CCAssignFn *ARMTargetLowering::CCAssignFnForCall(CallingConv::ID CC,
1603 bool isVarArg) const {
1604 return CCAssignFnForNode(CC, false, isVarArg);
1605}
1606
1607CCAssignFn *ARMTargetLowering::CCAssignFnForReturn(CallingConv::ID CC,
1608 bool isVarArg) const {
1609 return CCAssignFnForNode(CC, true, isVarArg);
1610}
1611
1612/// CCAssignFnForNode - Selects the correct CCAssignFn for the given
1613/// CallingConvention.
1614CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
1615 bool Return,
1616 bool isVarArg) const {
1617 switch (getEffectiveCallingConv(CC, isVarArg)) {
1618 default:
1619 llvm_unreachable("Unsupported calling convention")::llvm::llvm_unreachable_internal("Unsupported calling convention"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 1619)
;
1620 case CallingConv::ARM_APCS:
1621 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
1622 case CallingConv::ARM_AAPCS:
1623 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1624 case CallingConv::ARM_AAPCS_VFP:
1625 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1626 case CallingConv::Fast:
1627 return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
1628 case CallingConv::GHC:
1629 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
1630 case CallingConv::PreserveMost:
1631 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1632 }
1633}
1634
1635/// LowerCallResult - Lower the result values of a call into the
1636/// appropriate copies out of appropriate physical registers.
1637SDValue ARMTargetLowering::LowerCallResult(
1638 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
1639 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1640 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
1641 SDValue ThisVal) const {
1642
1643 // Assign locations to each value returned by this call.
1644 SmallVector<CCValAssign, 16> RVLocs;
1645 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1646 *DAG.getContext());
1647 CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
1648
1649 // Copy all of the result registers out of their specified physreg.
1650 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1651 CCValAssign VA = RVLocs[i];
1652
1653 // Pass 'this' value directly from the argument to return value, to avoid
1654 // reg unit interference
1655 if (i == 0 && isThisReturn) {
1656 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&((!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
"unexpected return calling convention register assignment") ?
static_cast<void> (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i32 && \"unexpected return calling convention register assignment\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 1657, __PRETTY_FUNCTION__))
1657 "unexpected return calling convention register assignment")((!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
"unexpected return calling convention register assignment") ?
static_cast<void> (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i32 && \"unexpected return calling convention register assignment\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 1657, __PRETTY_FUNCTION__))
;
1658 InVals.push_back(ThisVal);
1659 continue;
1660 }
1661
1662 SDValue Val;
1663 if (VA.needsCustom()) {
1664 // Handle f64 or half of a v2f64.
1665 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1666 InFlag);
1667 Chain = Lo.getValue(1);
1668 InFlag = Lo.getValue(2);
1669 VA = RVLocs[++i]; // skip ahead to next loc
1670 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1671 InFlag);
1672 Chain = Hi.getValue(1);
1673 InFlag = Hi.getValue(2);
1674 if (!Subtarget->isLittle())
1675 std::swap (Lo, Hi);
1676 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1677
1678 if (VA.getLocVT() == MVT::v2f64) {
1679 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
1680 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1681 DAG.getConstant(0, dl, MVT::i32));
1682
1683 VA = RVLocs[++i]; // skip ahead to next loc
1684 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1685 Chain = Lo.getValue(1);
1686 InFlag = Lo.getValue(2);
1687 VA = RVLocs[++i]; // skip ahead to next loc
1688 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1689 Chain = Hi.getValue(1);
1690 InFlag = Hi.getValue(2);
1691 if (!Subtarget->isLittle())
1692 std::swap (Lo, Hi);
1693 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1694 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1695 DAG.getConstant(1, dl, MVT::i32));
1696 }
1697 } else {
1698 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
1699 InFlag);
1700 Chain = Val.getValue(1);
1701 InFlag = Val.getValue(2);
1702 }
1703
1704 switch (VA.getLocInfo()) {
1705 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 1705)
;
1706 case CCValAssign::Full: break;
1707 case CCValAssign::BCvt:
1708 Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
1709 break;
1710 }
1711
1712 InVals.push_back(Val);
1713 }
1714
1715 return Chain;
1716}
1717
1718/// LowerMemOpCallTo - Store the argument to the stack.
1719SDValue ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
1720 SDValue Arg, const SDLoc &dl,
1721 SelectionDAG &DAG,
1722 const CCValAssign &VA,
1723 ISD::ArgFlagsTy Flags) const {
1724 unsigned LocMemOffset = VA.getLocMemOffset();
1725 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1726 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
1727 StackPtr, PtrOff);
1728 return DAG.getStore(
1729 Chain, dl, Arg, PtrOff,
1730 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
1731}
1732
1733void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
1734 SDValue Chain, SDValue &Arg,
1735 RegsToPassVector &RegsToPass,
1736 CCValAssign &VA, CCValAssign &NextVA,
1737 SDValue &StackPtr,
1738 SmallVectorImpl<SDValue> &MemOpChains,
1739 ISD::ArgFlagsTy Flags) const {
1740
1741 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
1742 DAG.getVTList(MVT::i32, MVT::i32), Arg);
1743 unsigned id = Subtarget->isLittle() ? 0 : 1;
1744 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
1745
1746 if (NextVA.isRegLoc())
1747 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
1748 else {
1749 assert(NextVA.isMemLoc())((NextVA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("NextVA.isMemLoc()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 1749, __PRETTY_FUNCTION__))
;
1750 if (!StackPtr.getNode())
1751 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
1752 getPointerTy(DAG.getDataLayout()));
1753
1754 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id),
1755 dl, DAG, NextVA,
1756 Flags));
1757 }
1758}
1759
1760/// LowerCall - Lowering a call into a callseq_start <-
1761/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
1762/// nodes.
1763SDValue
1764ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
1765 SmallVectorImpl<SDValue> &InVals) const {
1766 SelectionDAG &DAG = CLI.DAG;
1767 SDLoc &dl = CLI.DL;
1768 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1769 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1770 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1771 SDValue Chain = CLI.Chain;
1772 SDValue Callee = CLI.Callee;
1773 bool &isTailCall = CLI.IsTailCall;
1774 CallingConv::ID CallConv = CLI.CallConv;
1775 bool doesNotRet = CLI.DoesNotReturn;
1776 bool isVarArg = CLI.IsVarArg;
1777
1778 MachineFunction &MF = DAG.getMachineFunction();
1779 bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
1780 bool isThisReturn = false;
1781 bool isSibCall = false;
1782 auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
1783
1784 // Disable tail calls if they're not supported.
1785 if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true")
1786 isTailCall = false;
1787
1788 if (isTailCall) {
1789 // Check if it's really possible to do a tail call.
1790 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
1791 isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(),
1792 Outs, OutVals, Ins, DAG);
1793 if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall())
1794 report_fatal_error("failed to perform tail call elimination on a call "
1795 "site marked musttail");
1796 // We don't support GuaranteedTailCallOpt for ARM, only automatically
1797 // detected sibcalls.
1798 if (isTailCall) {
1799 ++NumTailCalls;
1800 isSibCall = true;
1801 }
1802 }
1803
1804 // Analyze operands of the call, assigning locations to each operand.
1805 SmallVector<CCValAssign, 16> ArgLocs;
1806 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1807 *DAG.getContext());
1808 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
1809
1810 // Get a count of how many bytes are to be pushed on the stack.
1811 unsigned NumBytes = CCInfo.getNextStackOffset();
1812
1813 // For tail calls, memory operands are available in our caller's stack.
1814 if (isSibCall)
1815 NumBytes = 0;
1816
1817 // Adjust the stack pointer for the new arguments...
1818 // These operations are automatically eliminated by the prolog/epilog pass
1819 if (!isSibCall)
1820 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
1821
1822 SDValue StackPtr =
1823 DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
1824
1825 RegsToPassVector RegsToPass;
1826 SmallVector<SDValue, 8> MemOpChains;
1827
1828 // Walk the register/memloc assignments, inserting copies/loads. In the case
1829 // of tail call optimization, arguments are handled later.
1830 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
1831 i != e;
1832 ++i, ++realArgIdx) {
1833 CCValAssign &VA = ArgLocs[i];
1834 SDValue Arg = OutVals[realArgIdx];
1835 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
1836 bool isByVal = Flags.isByVal();
1837
1838 // Promote the value if needed.
1839 switch (VA.getLocInfo()) {
1840 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 1840)
;
1841 case CCValAssign::Full: break;
1842 case CCValAssign::SExt:
1843 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
1844 break;
1845 case CCValAssign::ZExt:
1846 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
1847 break;
1848 case CCValAssign::AExt:
1849 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
1850 break;
1851 case CCValAssign::BCvt:
1852 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
1853 break;
1854 }
1855
1856 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
1857 if (VA.needsCustom()) {
1858 if (VA.getLocVT() == MVT::v2f64) {
1859 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1860 DAG.getConstant(0, dl, MVT::i32));
1861 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1862 DAG.getConstant(1, dl, MVT::i32));
1863
1864 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
1865 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1866
1867 VA = ArgLocs[++i]; // skip ahead to next loc
1868 if (VA.isRegLoc()) {
1869 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
1870 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1871 } else {
1872 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 1872, __PRETTY_FUNCTION__))
;
1873
1874 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
1875 dl, DAG, VA, Flags));
1876 }
1877 } else {
1878 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
1879 StackPtr, MemOpChains, Flags);
1880 }
1881 } else if (VA.isRegLoc()) {
1882 if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
1883 Outs[0].VT == MVT::i32) {
1884 assert(VA.getLocVT() == MVT::i32 &&((VA.getLocVT() == MVT::i32 && "unexpected calling convention register assignment"
) ? static_cast<void> (0) : __assert_fail ("VA.getLocVT() == MVT::i32 && \"unexpected calling convention register assignment\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 1885, __PRETTY_FUNCTION__))
1885 "unexpected calling convention register assignment")((VA.getLocVT() == MVT::i32 && "unexpected calling convention register assignment"
) ? static_cast<void> (0) : __assert_fail ("VA.getLocVT() == MVT::i32 && \"unexpected calling convention register assignment\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 1885, __PRETTY_FUNCTION__))
;
1886 assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&((!Ins.empty() && Ins[0].VT == MVT::i32 && "unexpected use of 'returned'"
) ? static_cast<void> (0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i32 && \"unexpected use of 'returned'\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 1887, __PRETTY_FUNCTION__))
1887 "unexpected use of 'returned'")((!Ins.empty() && Ins[0].VT == MVT::i32 && "unexpected use of 'returned'"
) ? static_cast<void> (0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i32 && \"unexpected use of 'returned'\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 1887, __PRETTY_FUNCTION__))
;
1888 isThisReturn = true;
1889 }
1890 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1891 } else if (isByVal) {
1892 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 1892, __PRETTY_FUNCTION__))
;
1893 unsigned offset = 0;
1894
1895 // True if this byval aggregate will be split between registers
1896 // and memory.
1897 unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
1898 unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
1899
1900 if (CurByValIdx < ByValArgsCount) {
1901
1902 unsigned RegBegin, RegEnd;
1903 CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
1904
1905 EVT PtrVT =
1906 DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
1907 unsigned int i, j;
1908 for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
1909 SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
1910 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
1911 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
1912 MachinePointerInfo(),
1913 DAG.InferPtrAlignment(AddArg));
1914 MemOpChains.push_back(Load.getValue(1));
1915 RegsToPass.push_back(std::make_pair(j, Load));
1916 }
1917
1918 // If parameter size outsides register area, "offset" value
1919 // helps us to calculate stack slot for remained part properly.
1920 offset = RegEnd - RegBegin;
1921
1922 CCInfo.nextInRegsParam();
1923 }
1924
1925 if (Flags.getByValSize() > 4*offset) {
1926 auto PtrVT = getPointerTy(DAG.getDataLayout());
1927 unsigned LocMemOffset = VA.getLocMemOffset();
1928 SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1929 SDValue Dst = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, StkPtrOff);
1930 SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
1931 SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
1932 SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
1933 MVT::i32);
1934 SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), dl,
1935 MVT::i32);
1936
1937 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
1938 SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
1939 MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
1940 Ops));
1941 }
1942 } else if (!isSibCall) {
1943 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 1943, __PRETTY_FUNCTION__))
;
1944
1945 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
1946 dl, DAG, VA, Flags));
1947 }
1948 }
1949
1950 if (!MemOpChains.empty())
1951 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
1952
1953 // Build a sequence of copy-to-reg nodes chained together with token chain
1954 // and flag operands which copy the outgoing args into the appropriate regs.
1955 SDValue InFlag;
1956 // Tail call byval lowering might overwrite argument registers so in case of
1957 // tail call optimization the copies to registers are lowered later.
1958 if (!isTailCall)
1959 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1960 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1961 RegsToPass[i].second, InFlag);
1962 InFlag = Chain.getValue(1);
1963 }
1964
1965 // For tail calls lower the arguments to the 'real' stack slot.
1966 if (isTailCall) {
1967 // Force all the incoming stack arguments to be loaded from the stack
1968 // before any new outgoing arguments are stored to the stack, because the
1969 // outgoing stack slots may alias the incoming argument stack slots, and
1970 // the alias isn't otherwise explicit. This is slightly more conservative
1971 // than necessary, because it means that each store effectively depends
1972 // on every argument instead of just those arguments it would clobber.
1973
1974 // Do not flag preceding copytoreg stuff together with the following stuff.
1975 InFlag = SDValue();
1976 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1977 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1978 RegsToPass[i].second, InFlag);
1979 InFlag = Chain.getValue(1);
1980 }
1981 InFlag = SDValue();
1982 }
1983
1984 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1985 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1986 // node so that legalize doesn't hack it.
1987 bool isDirect = false;
1988
1989 const TargetMachine &TM = getTargetMachine();
1990 const Module *Mod = MF.getFunction()->getParent();
1991 const GlobalValue *GV = nullptr;
1992 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
1993 GV = G->getGlobal();
1994 bool isStub =
1995 !TM.shouldAssumeDSOLocal(*Mod, GV) && Subtarget->isTargetMachO();
1996
1997 bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
1998 bool isLocalARMFunc = false;
1999 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2000 auto PtrVt = getPointerTy(DAG.getDataLayout());
2001
2002 if (Subtarget->genLongCalls()) {
2003 assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&(((!isPositionIndependent() || Subtarget->isTargetWindows(
)) && "long-calls codegen is not position independent!"
) ? static_cast<void> (0) : __assert_fail ("(!isPositionIndependent() || Subtarget->isTargetWindows()) && \"long-calls codegen is not position independent!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 2004, __PRETTY_FUNCTION__))
2004 "long-calls codegen is not position independent!")(((!isPositionIndependent() || Subtarget->isTargetWindows(
)) && "long-calls codegen is not position independent!"
) ? static_cast<void> (0) : __assert_fail ("(!isPositionIndependent() || Subtarget->isTargetWindows()) && \"long-calls codegen is not position independent!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 2004, __PRETTY_FUNCTION__))
;
2005 // Handle a global address or an external symbol. If it's not one of
2006 // those, the target's already in a register, so we don't need to do
2007 // anything extra.
2008 if (isa<GlobalAddressSDNode>(Callee)) {
2009 // Create a constant pool entry for the callee address
2010 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2011 ARMConstantPoolValue *CPV =
2012 ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
2013
2014 // Get the address of the callee into a register
2015 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2016 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2017 Callee = DAG.getLoad(
2018 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2019 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2020 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2021 const char *Sym = S->getSymbol();
2022
2023 // Create a constant pool entry for the callee address
2024 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2025 ARMConstantPoolValue *CPV =
2026 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
2027 ARMPCLabelIndex, 0);
2028 // Get the address of the callee into a register
2029 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2030 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2031 Callee = DAG.getLoad(
2032 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2033 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2034 }
2035 } else if (isa<GlobalAddressSDNode>(Callee)) {
2036 // If we're optimizing for minimum size and the function is called three or
2037 // more times in this block, we can improve codesize by calling indirectly
2038 // as BLXr has a 16-bit encoding.
2039 auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2040 auto *BB = CLI.CS->getParent();
2041 bool PreferIndirect =
2042 Subtarget->isThumb() && MF.getFunction()->optForMinSize() &&
2043 count_if(GV->users(), [&BB](const User *U) {
2044 return isa<Instruction>(U) && cast<Instruction>(U)->getParent() == BB;
2045 }) > 2;
2046
2047 if (!PreferIndirect) {
2048 isDirect = true;
2049 bool isDef = GV->isStrongDefinitionForLinker();
2050
2051 // ARM call to a local ARM function is predicable.
2052 isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2053 // tBX takes a register source operand.
2054 if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2055 assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?")((Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetMachO() && \"WrapperPIC use on non-MachO?\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 2055, __PRETTY_FUNCTION__))
;
2056 Callee = DAG.getNode(
2057 ARMISD::WrapperPIC, dl, PtrVt,
2058 DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2059 Callee = DAG.getLoad(
2060 PtrVt, dl, DAG.getEntryNode(), Callee,
2061 MachinePointerInfo::getGOT(DAG.getMachineFunction()),
2062 /* Alignment = */ 0, MachineMemOperand::MODereferenceable |
2063 MachineMemOperand::MOInvariant);
2064 } else if (Subtarget->isTargetCOFF()) {
2065 assert(Subtarget->isTargetWindows() &&((Subtarget->isTargetWindows() && "Windows is the only supported COFF target"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"Windows is the only supported COFF target\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 2066, __PRETTY_FUNCTION__))
2066 "Windows is the only supported COFF target")((Subtarget->isTargetWindows() && "Windows is the only supported COFF target"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"Windows is the only supported COFF target\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 2066, __PRETTY_FUNCTION__))
;
2067 unsigned TargetFlags = GV->hasDLLImportStorageClass()
2068 ? ARMII::MO_DLLIMPORT
2069 : ARMII::MO_NO_FLAG;
2070 Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0,
2071 TargetFlags);
2072 if (GV->hasDLLImportStorageClass())
2073 Callee =
2074 DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2075 DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2076 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
2077 } else {
2078 Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0);
2079 }
2080 }
2081 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2082 isDirect = true;
2083 // tBX takes a register source operand.
2084 const char *Sym = S->getSymbol();
2085 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2086 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2087 ARMConstantPoolValue *CPV =
2088 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
2089 ARMPCLabelIndex, 4);
2090 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2091 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2092 Callee = DAG.getLoad(
2093 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2094 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2095 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2096 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2097 } else {
2098 Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2099 }
2100 }
2101
2102 // FIXME: handle tail calls differently.
2103 unsigned CallOpc;
2104 if (Subtarget->isThumb()) {
2105 if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2106 CallOpc = ARMISD::CALL_NOLINK;
2107 else
2108 CallOpc = ARMISD::CALL;
2109 } else {
2110 if (!isDirect && !Subtarget->hasV5TOps())
2111 CallOpc = ARMISD::CALL_NOLINK;
2112 else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2113 // Emit regular call when code size is the priority
2114 !MF.getFunction()->optForMinSize())
2115 // "mov lr, pc; b _foo" to avoid confusing the RSP
2116 CallOpc = ARMISD::CALL_NOLINK;
2117 else
2118 CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2119 }
2120
2121 std::vector<SDValue> Ops;
2122 Ops.push_back(Chain);
2123 Ops.push_back(Callee);
2124
2125 // Add argument registers to the end of the list so that they are known live
2126 // into the call.
2127 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2128 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2129 RegsToPass[i].second.getValueType()));
2130
2131 // Add a register mask operand representing the call-preserved registers.
2132 if (!isTailCall) {
2133 const uint32_t *Mask;
2134 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2135 if (isThisReturn) {
2136 // For 'this' returns, use the R0-preserving mask if applicable
2137 Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2138 if (!Mask) {
2139 // Set isThisReturn to false if the calling convention is not one that
2140 // allows 'returned' to be modeled in this way, so LowerCallResult does
2141 // not try to pass 'this' straight through
2142 isThisReturn = false;
2143 Mask = ARI->getCallPreservedMask(MF, CallConv);
2144 }
2145 } else
2146 Mask = ARI->getCallPreservedMask(MF, CallConv);
2147
2148 assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention"
) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 2148, __PRETTY_FUNCTION__))
;
2149 Ops.push_back(DAG.getRegisterMask(Mask));
2150 }
2151
2152 if (InFlag.getNode())
2153 Ops.push_back(InFlag);
2154
2155 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2156 if (isTailCall) {
2157 MF.getFrameInfo().setHasTailCall();
2158 return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
2159 }
2160
2161 // Returns a chain and a flag for retval copy to use.
2162 Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
2163 InFlag = Chain.getValue(1);
2164
2165 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
2166 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
2167 if (!Ins.empty())
2168 InFlag = Chain.getValue(1);
2169
2170 // Handle result values, copying them out of physregs into vregs that we
2171 // return.
2172 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
2173 InVals, isThisReturn,
2174 isThisReturn ? OutVals[0] : SDValue());
2175}
2176
2177/// HandleByVal - Every parameter *after* a byval parameter is passed
2178/// on the stack. Remember the next parameter register to allocate,
2179/// and then confiscate the rest of the parameter registers to insure
2180/// this.
2181void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2182 unsigned Align) const {
2183 // Byval (as with any stack) slots are always at least 4 byte aligned.
2184 Align = std::max(Align, 4U);
2185
2186 unsigned Reg = State->AllocateReg(GPRArgRegs);
2187 if (!Reg)
2188 return;
2189
2190 unsigned AlignInRegs = Align / 4;
2191 unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2192 for (unsigned i = 0; i < Waste; ++i)
2193 Reg = State->AllocateReg(GPRArgRegs);
2194
2195 if (!Reg)
2196 return;
2197
2198 unsigned Excess = 4 * (ARM::R4 - Reg);
2199
2200 // Special case when NSAA != SP and parameter size greater than size of
2201 // all remained GPR regs. In that case we can't split parameter, we must
2202 // send it to stack. We also must set NCRN to R4, so waste all
2203 // remained registers.
2204 const unsigned NSAAOffset = State->getNextStackOffset();
2205 if (NSAAOffset != 0 && Size > Excess) {
2206 while (State->AllocateReg(GPRArgRegs))
2207 ;
2208 return;
2209 }
2210
2211 // First register for byval parameter is the first register that wasn't
2212 // allocated before this method call, so it would be "reg".
2213 // If parameter is small enough to be saved in range [reg, r4), then
2214 // the end (first after last) register would be reg + param-size-in-regs,
2215 // else parameter would be splitted between registers and stack,
2216 // end register would be r4 in this case.
2217 unsigned ByValRegBegin = Reg;
2218 unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
2219 State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2220 // Note, first register is allocated in the beginning of function already,
2221 // allocate remained amount of registers we need.
2222 for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2223 State->AllocateReg(GPRArgRegs);
2224 // A byval parameter that is split between registers and memory needs its
2225 // size truncated here.
2226 // In the case where the entire structure fits in registers, we set the
2227 // size in memory to zero.
2228 Size = std::max<int>(Size - Excess, 0);
2229}
2230
2231/// MatchingStackOffset - Return true if the given stack call argument is
2232/// already available in the same position (relatively) of the caller's
2233/// incoming argument stack.
2234static
2235bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
2236 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
2237 const TargetInstrInfo *TII) {
2238 unsigned Bytes = Arg.getValueSizeInBits() / 8;
2239 int FI = std::numeric_limits<int>::max();
2240 if (Arg.getOpcode() == ISD::CopyFromReg) {
2241 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2242 if (!TargetRegisterInfo::isVirtualRegister(VR))
2243 return false;
2244 MachineInstr *Def = MRI->getVRegDef(VR);
2245 if (!Def)
2246 return false;
2247 if (!Flags.isByVal()) {
2248 if (!TII->isLoadFromStackSlot(*Def, FI))
2249 return false;
2250 } else {
2251 return false;
2252 }
2253 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2254 if (Flags.isByVal())
2255 // ByVal argument is passed in as a pointer but it's now being
2256 // dereferenced. e.g.
2257 // define @foo(%struct.X* %A) {
2258 // tail call @bar(%struct.X* byval %A)
2259 // }
2260 return false;
2261 SDValue Ptr = Ld->getBasePtr();
2262 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2263 if (!FINode)
2264 return false;
2265 FI = FINode->getIndex();
2266 } else
2267 return false;
2268
2269 assert(FI != std::numeric_limits<int>::max())((FI != std::numeric_limits<int>::max()) ? static_cast<
void> (0) : __assert_fail ("FI != std::numeric_limits<int>::max()"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 2269, __PRETTY_FUNCTION__))
;
2270 if (!MFI.isFixedObjectIndex(FI))
2271 return false;
2272 return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
2273}
2274
2275/// IsEligibleForTailCallOptimization - Check whether the call is eligible
2276/// for tail call optimization. Targets which want to do tail call
2277/// optimization should implement this function.
2278bool
2279ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
2280 CallingConv::ID CalleeCC,
2281 bool isVarArg,
2282 bool isCalleeStructRet,
2283 bool isCallerStructRet,
2284 const SmallVectorImpl<ISD::OutputArg> &Outs,
2285 const SmallVectorImpl<SDValue> &OutVals,
2286 const SmallVectorImpl<ISD::InputArg> &Ins,
2287 SelectionDAG& DAG) const {
2288 MachineFunction &MF = DAG.getMachineFunction();
2289 const Function *CallerF = MF.getFunction();
2290 CallingConv::ID CallerCC = CallerF->getCallingConv();
2291
2292 assert(Subtarget->supportsTailCall())((Subtarget->supportsTailCall()) ? static_cast<void>
(0) : __assert_fail ("Subtarget->supportsTailCall()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 2292, __PRETTY_FUNCTION__))
;
2293
2294 // Look for obvious safe cases to perform tail call optimization that do not
2295 // require ABI changes. This is what gcc calls sibcall.
2296
2297 // Exception-handling functions need a special set of instructions to indicate
2298 // a return to the hardware. Tail-calling another function would probably
2299 // break this.
2300 if (CallerF->hasFnAttribute("interrupt"))
2301 return false;
2302
2303 // Also avoid sibcall optimization if either caller or callee uses struct
2304 // return semantics.
2305 if (isCalleeStructRet || isCallerStructRet)
2306 return false;
2307
2308 // Externally-defined functions with weak linkage should not be
2309 // tail-called on ARM when the OS does not support dynamic
2310 // pre-emption of symbols, as the AAELF spec requires normal calls
2311 // to undefined weak functions to be replaced with a NOP or jump to the
2312 // next instruction. The behaviour of branch instructions in this
2313 // situation (as used for tail calls) is implementation-defined, so we
2314 // cannot rely on the linker replacing the tail call with a return.
2315 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2316 const GlobalValue *GV = G->getGlobal();
2317 const Triple &TT = getTargetMachine().getTargetTriple();
2318 if (GV->hasExternalWeakLinkage() &&
2319 (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
2320 return false;
2321 }
2322
2323 // Check that the call results are passed in the same way.
2324 LLVMContext &C = *DAG.getContext();
2325 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
2326 CCAssignFnForReturn(CalleeCC, isVarArg),
2327 CCAssignFnForReturn(CallerCC, isVarArg)))
2328 return false;
2329 // The callee has to preserve all registers the caller needs to preserve.
2330 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2331 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2332 if (CalleeCC != CallerCC) {
2333 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2334 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2335 return false;
2336 }
2337
2338 // If Caller's vararg or byval argument has been split between registers and
2339 // stack, do not perform tail call, since part of the argument is in caller's
2340 // local frame.
2341 const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
2342 if (AFI_Caller->getArgRegsSaveSize())
2343 return false;
2344
2345 // If the callee takes no arguments then go on to check the results of the
2346 // call.
2347 if (!Outs.empty()) {
2348 // Check if stack adjustment is needed. For now, do not do this if any
2349 // argument is passed on the stack.
2350 SmallVector<CCValAssign, 16> ArgLocs;
2351 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
2352 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
2353 if (CCInfo.getNextStackOffset()) {
2354 // Check if the arguments are already laid out in the right way as
2355 // the caller's fixed stack objects.
2356 MachineFrameInfo &MFI = MF.getFrameInfo();
2357 const MachineRegisterInfo *MRI = &MF.getRegInfo();
2358 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2359 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2360 i != e;
2361 ++i, ++realArgIdx) {
2362 CCValAssign &VA = ArgLocs[i];
2363 EVT RegVT = VA.getLocVT();
2364 SDValue Arg = OutVals[realArgIdx];
2365 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2366 if (VA.getLocInfo() == CCValAssign::Indirect)
2367 return false;
2368 if (VA.needsCustom()) {
2369 // f64 and vector types are split into multiple registers or
2370 // register/stack-slot combinations. The types will not match
2371 // the registers; give up on memory f64 refs until we figure
2372 // out what to do about this.
2373 if (!VA.isRegLoc())
2374 return false;
2375 if (!ArgLocs[++i].isRegLoc())
2376 return false;
2377 if (RegVT == MVT::v2f64) {
2378 if (!ArgLocs[++i].isRegLoc())
2379 return false;
2380 if (!ArgLocs[++i].isRegLoc())
2381 return false;
2382 }
2383 } else if (!VA.isRegLoc()) {
2384 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
2385 MFI, MRI, TII))
2386 return false;
2387 }
2388 }
2389 }
2390
2391 const MachineRegisterInfo &MRI = MF.getRegInfo();
2392 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
2393 return false;
2394 }
2395
2396 return true;
2397}
2398
2399bool
2400ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
2401 MachineFunction &MF, bool isVarArg,
2402 const SmallVectorImpl<ISD::OutputArg> &Outs,
2403 LLVMContext &Context) const {
2404 SmallVector<CCValAssign, 16> RVLocs;
2405 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2406 return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2407}
2408
2409static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps,
2410 const SDLoc &DL, SelectionDAG &DAG) {
2411 const MachineFunction &MF = DAG.getMachineFunction();
2412 const Function *F = MF.getFunction();
2413
2414 StringRef IntKind = F->getFnAttribute("interrupt").getValueAsString();
2415
2416 // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
2417 // version of the "preferred return address". These offsets affect the return
2418 // instruction if this is a return from PL1 without hypervisor extensions.
2419 // IRQ/FIQ: +4 "subs pc, lr, #4"
2420 // SWI: 0 "subs pc, lr, #0"
2421 // ABORT: +4 "subs pc, lr, #4"
2422 // UNDEF: +4/+2 "subs pc, lr, #0"
2423 // UNDEF varies depending on where the exception came from ARM or Thumb
2424 // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
2425
2426 int64_t LROffset;
2427 if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
2428 IntKind == "ABORT")
2429 LROffset = 4;
2430 else if (IntKind == "SWI" || IntKind == "UNDEF")
2431 LROffset = 0;
2432 else
2433 report_fatal_error("Unsupported interrupt attribute. If present, value "
2434 "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
2435
2436 RetOps.insert(RetOps.begin() + 1,
2437 DAG.getConstant(LROffset, DL, MVT::i32, false));
2438
2439 return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
2440}
2441
2442SDValue
2443ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2444 bool isVarArg,
2445 const SmallVectorImpl<ISD::OutputArg> &Outs,
2446 const SmallVectorImpl<SDValue> &OutVals,
2447 const SDLoc &dl, SelectionDAG &DAG) const {
2448
2449 // CCValAssign - represent the assignment of the return value to a location.
2450 SmallVector<CCValAssign, 16> RVLocs;
2451
2452 // CCState - Info about the registers and stack slots.
2453 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2454 *DAG.getContext());
2455
2456 // Analyze outgoing return values.
2457 CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2458
2459 SDValue Flag;
2460 SmallVector<SDValue, 4> RetOps;
2461 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2462 bool isLittleEndian = Subtarget->isLittle();
2463
2464 MachineFunction &MF = DAG.getMachineFunction();
2465 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2466 AFI->setReturnRegsCount(RVLocs.size());
2467
2468 // Copy the result values into the output registers.
2469 for (unsigned i = 0, realRVLocIdx = 0;
2470 i != RVLocs.size();
2471 ++i, ++realRVLocIdx) {
2472 CCValAssign &VA = RVLocs[i];
2473 assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 2473, __PRETTY_FUNCTION__))
;
2474
2475 SDValue Arg = OutVals[realRVLocIdx];
2476
2477 switch (VA.getLocInfo()) {
2478 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 2478)
;
2479 case CCValAssign::Full: break;
2480 case CCValAssign::BCvt:
2481 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2482 break;
2483 }
2484
2485 if (VA.needsCustom()) {
2486 if (VA.getLocVT() == MVT::v2f64) {
2487 // Extract the first half and return it in two registers.
2488 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2489 DAG.getConstant(0, dl, MVT::i32));
2490 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
2491 DAG.getVTList(MVT::i32, MVT::i32), Half);
2492
2493 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2494 HalfGPRs.getValue(isLittleEndian ? 0 : 1),
2495 Flag);
2496 Flag = Chain.getValue(1);
2497 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2498 VA = RVLocs[++i]; // skip ahead to next loc
2499 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2500 HalfGPRs.getValue(isLittleEndian ? 1 : 0),
2501 Flag);
2502 Flag = Chain.getValue(1);
2503 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2504 VA = RVLocs[++i]; // skip ahead to next loc
2505
2506 // Extract the 2nd half and fall through to handle it as an f64 value.
2507 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2508 DAG.getConstant(1, dl, MVT::i32));
2509 }
2510 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
2511 // available.
2512 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2513 DAG.getVTList(MVT::i32, MVT::i32), Arg);
2514 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2515 fmrrd.getValue(isLittleEndian ? 0 : 1),
2516 Flag);
2517 Flag = Chain.getValue(1);
2518 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2519 VA = RVLocs[++i]; // skip ahead to next loc
2520 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2521 fmrrd.getValue(isLittleEndian ? 1 : 0),
2522 Flag);
2523 } else
2524 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
2525
2526 // Guarantee that all emitted copies are
2527 // stuck together, avoiding something bad.
2528 Flag = Chain.getValue(1);
2529 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2530 }
2531 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2532 const MCPhysReg *I =
2533 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
2534 if (I) {
2535 for (; *I; ++I) {
2536 if (ARM::GPRRegClass.contains(*I))
2537 RetOps.push_back(DAG.getRegister(*I, MVT::i32));
2538 else if (ARM::DPRRegClass.contains(*I))
2539 RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
2540 else
2541 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 2541)
;
2542 }
2543 }
2544
2545 // Update chain and glue.
2546 RetOps[0] = Chain;
2547 if (Flag.getNode())
2548 RetOps.push_back(Flag);
2549
2550 // CPUs which aren't M-class use a special sequence to return from
2551 // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
2552 // though we use "subs pc, lr, #N").
2553 //
2554 // M-class CPUs actually use a normal return sequence with a special
2555 // (hardware-provided) value in LR, so the normal code path works.
2556 if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt") &&
2557 !Subtarget->isMClass()) {
2558 if (Subtarget->isThumb1Only())
2559 report_fatal_error("interrupt attribute is not supported in Thumb1");
2560 return LowerInterruptReturn(RetOps, dl, DAG);
2561 }
2562
2563 return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps);
2564}
2565
2566bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2567 if (N->getNumValues() != 1)
2568 return false;
2569 if (!N->hasNUsesOfValue(1, 0))
2570 return false;
2571
2572 SDValue TCChain = Chain;
2573 SDNode *Copy = *N->use_begin();
2574 if (Copy->getOpcode() == ISD::CopyToReg) {
2575 // If the copy has a glue operand, we conservatively assume it isn't safe to
2576 // perform a tail call.
2577 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2578 return false;
2579 TCChain = Copy->getOperand(0);
2580 } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
2581 SDNode *VMov = Copy;
2582 // f64 returned in a pair of GPRs.
2583 SmallPtrSet<SDNode*, 2> Copies;
2584 for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2585 UI != UE; ++UI) {
2586 if (UI->getOpcode() != ISD::CopyToReg)
2587 return false;
2588 Copies.insert(*UI);
2589 }
2590 if (Copies.size() > 2)
2591 return false;
2592
2593 for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2594 UI != UE; ++UI) {
2595 SDValue UseChain = UI->getOperand(0);
2596 if (Copies.count(UseChain.getNode()))
2597 // Second CopyToReg
2598 Copy = *UI;
2599 else {
2600 // We are at the top of this chain.
2601 // If the copy has a glue operand, we conservatively assume it
2602 // isn't safe to perform a tail call.
2603 if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue)
2604 return false;
2605 // First CopyToReg
2606 TCChain = UseChain;
2607 }
2608 }
2609 } else if (Copy->getOpcode() == ISD::BITCAST) {
2610 // f32 returned in a single GPR.
2611 if (!Copy->hasOneUse())
2612 return false;
2613 Copy = *Copy->use_begin();
2614 if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
2615 return false;
2616 // If the copy has a glue operand, we conservatively assume it isn't safe to
2617 // perform a tail call.
2618 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2619 return false;
2620 TCChain = Copy->getOperand(0);
2621 } else {
2622 return false;
2623 }
2624
2625 bool HasRet = false;
2626 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2627 UI != UE; ++UI) {
2628 if (UI->getOpcode() != ARMISD::RET_FLAG &&
2629 UI->getOpcode() != ARMISD::INTRET_FLAG)
2630 return false;
2631 HasRet = true;
2632 }
2633
2634 if (!HasRet)
2635 return false;
2636
2637 Chain = TCChain;
2638 return true;
2639}
2640
2641bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
2642 if (!Subtarget->supportsTailCall())
2643 return false;
2644
2645 auto Attr =
2646 CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
2647 if (!CI->isTailCall() || Attr.getValueAsString() == "true")
2648 return false;
2649
2650 return true;
2651}
2652
2653// Trying to write a 64 bit value so need to split into two 32 bit values first,
2654// and pass the lower and high parts through.
2655static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) {
2656 SDLoc DL(Op);
2657 SDValue WriteValue = Op->getOperand(2);
2658
2659 // This function is only supposed to be called for i64 type argument.
2660 assert(WriteValue.getValueType() == MVT::i64((WriteValue.getValueType() == MVT::i64 && "LowerWRITE_REGISTER called for non-i64 type argument."
) ? static_cast<void> (0) : __assert_fail ("WriteValue.getValueType() == MVT::i64 && \"LowerWRITE_REGISTER called for non-i64 type argument.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 2661, __PRETTY_FUNCTION__))
2661 && "LowerWRITE_REGISTER called for non-i64 type argument.")((WriteValue.getValueType() == MVT::i64 && "LowerWRITE_REGISTER called for non-i64 type argument."
) ? static_cast<void> (0) : __assert_fail ("WriteValue.getValueType() == MVT::i64 && \"LowerWRITE_REGISTER called for non-i64 type argument.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 2661, __PRETTY_FUNCTION__))
;
2662
2663 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2664 DAG.getConstant(0, DL, MVT::i32));
2665 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2666 DAG.getConstant(1, DL, MVT::i32));
2667 SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
2668 return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
2669}
2670
2671// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
2672// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
2673// one of the above mentioned nodes. It has to be wrapped because otherwise
2674// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
2675// be used to form addressing mode. These wrapped nodes will be selected
2676// into MOVi.
2677static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
2678 EVT PtrVT = Op.getValueType();
2679 // FIXME there is no actual debug info here
2680 SDLoc dl(Op);
2681 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2682 SDValue Res;
2683 if (CP->isMachineConstantPoolEntry())
2684 Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
2685 CP->getAlignment());
2686 else
2687 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
2688 CP->getAlignment());
2689 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
2690}
2691
2692unsigned ARMTargetLowering::getJumpTableEncoding() const {
2693 return MachineJumpTableInfo::EK_Inline;
2694}
2695
2696SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
2697 SelectionDAG &DAG) const {
2698 MachineFunction &MF = DAG.getMachineFunction();
2699 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2700 unsigned ARMPCLabelIndex = 0;
2701 SDLoc DL(Op);
2702 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2703 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
2704 SDValue CPAddr;
2705 bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
2706 if (!IsPositionIndependent) {
2707 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
2708 } else {
2709 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
2710 ARMPCLabelIndex = AFI->createPICLabelUId();
2711 ARMConstantPoolValue *CPV =
2712 ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
2713 ARMCP::CPBlockAddress, PCAdj);
2714 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2715 }
2716 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
2717 SDValue Result = DAG.getLoad(
2718 PtrVT, DL, DAG.getEntryNode(), CPAddr,
2719 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2720 if (!IsPositionIndependent)
2721 return Result;
2722 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
2723 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
2724}
2725
2726/// \brief Convert a TLS address reference into the correct sequence of loads
2727/// and calls to compute the variable's address for Darwin, and return an
2728/// SDValue containing the final node.
2729
2730/// Darwin only has one TLS scheme which must be capable of dealing with the
2731/// fully general situation, in the worst case. This means:
2732/// + "extern __thread" declaration.
2733/// + Defined in a possibly unknown dynamic library.
2734///
2735/// The general system is that each __thread variable has a [3 x i32] descriptor
2736/// which contains information used by the runtime to calculate the address. The
2737/// only part of this the compiler needs to know about is the first word, which
2738/// contains a function pointer that must be called with the address of the
2739/// entire descriptor in "r0".
2740///
2741/// Since this descriptor may be in a different unit, in general access must
2742/// proceed along the usual ARM rules. A common sequence to produce is:
2743///
2744/// movw rT1, :lower16:_var$non_lazy_ptr
2745/// movt rT1, :upper16:_var$non_lazy_ptr
2746/// ldr r0, [rT1]
2747/// ldr rT2, [r0]
2748/// blx rT2
2749/// [...address now in r0...]
2750SDValue
2751ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
2752 SelectionDAG &DAG) const {
2753 assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin")((Subtarget->isTargetDarwin() && "TLS only supported on Darwin"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"TLS only supported on Darwin\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 2753, __PRETTY_FUNCTION__))
;
2754 SDLoc DL(Op);
2755
2756 // First step is to get the address of the actua global symbol. This is where
2757 // the TLS descriptor lives.
2758 SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
2759
2760 // The first entry in the descriptor is a function pointer that we must call
2761 // to obtain the address of the variable.
2762 SDValue Chain = DAG.getEntryNode();
2763 SDValue FuncTLVGet = DAG.getLoad(
2764 MVT::i32, DL, Chain, DescAddr,
2765 MachinePointerInfo::getGOT(DAG.getMachineFunction()),
2766 /* Alignment = */ 4,
2767 MachineMemOperand::MONonTemporal | MachineMemOperand::MODereferenceable |
2768 MachineMemOperand::MOInvariant);
2769 Chain = FuncTLVGet.getValue(1);
2770
2771 MachineFunction &F = DAG.getMachineFunction();
2772 MachineFrameInfo &MFI = F.getFrameInfo();
2773 MFI.setAdjustsStack(true);
2774
2775 // TLS calls preserve all registers except those that absolutely must be
2776 // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
2777 // silly).
2778 auto TRI =
2779 getTargetMachine().getSubtargetImpl(*F.getFunction())->getRegisterInfo();
2780 auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
2781 const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());
2782
2783 // Finally, we can make the call. This is just a degenerate version of a
2784 // normal AArch64 call node: r0 takes the address of the descriptor, and
2785 // returns the address of the variable in this thread.
2786 Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
2787 Chain =
2788 DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
2789 Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
2790 DAG.getRegisterMask(Mask), Chain.getValue(1));
2791 return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
2792}
2793
2794SDValue
2795ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
2796 SelectionDAG &DAG) const {
2797 assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering")((Subtarget->isTargetWindows() && "Windows specific TLS lowering"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"Windows specific TLS lowering\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 2797, __PRETTY_FUNCTION__))
;
2798
2799 SDValue Chain = DAG.getEntryNode();
2800 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2801 SDLoc DL(Op);
2802
2803 // Load the current TEB (thread environment block)
2804 SDValue Ops[] = {Chain,
2805 DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
2806 DAG.getConstant(15, DL, MVT::i32),
2807 DAG.getConstant(0, DL, MVT::i32),
2808 DAG.getConstant(13, DL, MVT::i32),
2809 DAG.getConstant(0, DL, MVT::i32),
2810 DAG.getConstant(2, DL, MVT::i32)};
2811 SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
2812 DAG.getVTList(MVT::i32, MVT::Other), Ops);
2813
2814 SDValue TEB = CurrentTEB.getValue(0);
2815 Chain = CurrentTEB.getValue(1);
2816
2817 // Load the ThreadLocalStoragePointer from the TEB
2818 // A pointer to the TLS array is located at offset 0x2c from the TEB.
2819 SDValue TLSArray =
2820 DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
2821 TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
2822
2823 // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
2824 // offset into the TLSArray.
2825
2826 // Load the TLS index from the C runtime
2827 SDValue TLSIndex =
2828 DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
2829 TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
2830 TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
2831
2832 SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
2833 DAG.getConstant(2, DL, MVT::i32));
2834 SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
2835 DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
2836 MachinePointerInfo());
2837
2838 // Get the offset of the start of the .tls section (section base)
2839 const auto *GA = cast<GlobalAddressSDNode>(Op);
2840 auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
2841 SDValue Offset = DAG.getLoad(
2842 PtrVT, DL, Chain, DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
2843 DAG.getTargetConstantPool(CPV, PtrVT, 4)),
2844 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2845
2846 return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
2847}
2848
2849// Lower ISD::GlobalTLSAddress using the "general dynamic" model
2850SDValue
2851ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
2852 SelectionDAG &DAG) const {
2853 SDLoc dl(GA);
2854 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2855 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
2856 MachineFunction &MF = DAG.getMachineFunction();
2857 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2858 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2859 ARMConstantPoolValue *CPV =
2860 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
2861 ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
2862 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2863 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
2864 Argument = DAG.getLoad(
2865 PtrVT, dl, DAG.getEntryNode(), Argument,
2866 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2867 SDValue Chain = Argument.getValue(1);
2868
2869 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2870 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
2871
2872 // call __tls_get_addr.
2873 ArgListTy Args;
2874 ArgListEntry Entry;
2875 Entry.Node = Argument;
2876 Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
2877 Args.push_back(Entry);
2878
2879 // FIXME: is there useful debug info available here?
2880 TargetLowering::CallLoweringInfo CLI(DAG);
2881 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
2882 CallingConv::C, Type::getInt32Ty(*DAG.getContext()),
2883 DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
2884
2885 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2886 return CallResult.first;
2887}
2888
2889// Lower ISD::GlobalTLSAddress using the "initial exec" or
2890// "local exec" model.
2891SDValue
2892ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
2893 SelectionDAG &DAG,
2894 TLSModel::Model model) const {
2895 const GlobalValue *GV = GA->getGlobal();
2896 SDLoc dl(GA);
2897 SDValue Offset;
2898 SDValue Chain = DAG.getEntryNode();
2899 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2900 // Get the Thread Pointer
2901 SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
2902
2903 if (model == TLSModel::InitialExec) {
2904 MachineFunction &MF = DAG.getMachineFunction();
2905 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2906 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2907 // Initial exec model.
2908 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
2909 ARMConstantPoolValue *CPV =
2910 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
2911 ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF,
2912 true);
2913 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2914 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
2915 Offset = DAG.getLoad(
2916 PtrVT, dl, Chain, Offset,
2917 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2918 Chain = Offset.getValue(1);
2919
2920 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2921 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
2922
2923 Offset = DAG.getLoad(
2924 PtrVT, dl, Chain, Offset,
2925 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2926 } else {
2927 // local exec model
2928 assert(model == TLSModel::LocalExec)((model == TLSModel::LocalExec) ? static_cast<void> (0)
: __assert_fail ("model == TLSModel::LocalExec", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 2928, __PRETTY_FUNCTION__))
;
2929 ARMConstantPoolValue *CPV =
2930 ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF);
2931 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2932 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
2933 Offset = DAG.getLoad(
2934 PtrVT, dl, Chain, Offset,
2935 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2936 }
2937
2938 // The address of the thread local variable is the add of the thread
2939 // pointer with the offset of the variable.
2940 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
2941}
2942
2943SDValue
2944ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
2945 if (Subtarget->isTargetDarwin())
2946 return LowerGlobalTLSAddressDarwin(Op, DAG);
2947
2948 if (Subtarget->isTargetWindows())
2949 return LowerGlobalTLSAddressWindows(Op, DAG);
2950
2951 // TODO: implement the "local dynamic" model
2952 assert(Subtarget->isTargetELF() && "Only ELF implemented here")((Subtarget->isTargetELF() && "Only ELF implemented here"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetELF() && \"Only ELF implemented here\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 2952, __PRETTY_FUNCTION__))
;
2953 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
2954 if (DAG.getTarget().Options.EmulatedTLS)
2955 return LowerToTLSEmulatedModel(GA, DAG);
2956
2957 TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal());
2958
2959 switch (model) {
2960 case TLSModel::GeneralDynamic:
2961 case TLSModel::LocalDynamic:
2962 return LowerToTLSGeneralDynamicModel(GA, DAG);
2963 case TLSModel::InitialExec:
2964 case TLSModel::LocalExec:
2965 return LowerToTLSExecModels(GA, DAG, model);
2966 }
2967 llvm_unreachable("bogus TLS model")::llvm::llvm_unreachable_internal("bogus TLS model", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 2967)
;
2968}
2969
2970/// Return true if all users of V are within function F, looking through
2971/// ConstantExprs.
2972static bool allUsersAreInFunction(const Value *V, const Function *F) {
2973 SmallVector<const User*,4> Worklist;
2974 for (auto *U : V->users())
2975 Worklist.push_back(U);
2976 while (!Worklist.empty()) {
2977 auto *U = Worklist.pop_back_val();
2978 if (isa<ConstantExpr>(U)) {
2979 for (auto *UU : U->users())
2980 Worklist.push_back(UU);
2981 continue;
2982 }
2983
2984 auto *I = dyn_cast<Instruction>(U);
2985 if (!I || I->getParent()->getParent() != F)
2986 return false;
2987 }
2988 return true;
2989}
2990
2991/// Return true if all users of V are within some (any) function, looking through
2992/// ConstantExprs. In other words, are there any global constant users?
2993static bool allUsersAreInFunctions(const Value *V) {
2994 SmallVector<const User*,4> Worklist;
2995 for (auto *U : V->users())
2996 Worklist.push_back(U);
2997 while (!Worklist.empty()) {
2998 auto *U = Worklist.pop_back_val();
2999 if (isa<ConstantExpr>(U)) {
3000 for (auto *UU : U->users())
3001 Worklist.push_back(UU);
3002 continue;
3003 }
3004
3005 if (!isa<Instruction>(U))
3006 return false;
3007 }
3008 return true;
3009}
3010
3011// Return true if T is an integer, float or an array/vector of either.
3012static bool isSimpleType(Type *T) {
3013 if (T->isIntegerTy() || T->isFloatingPointTy())
3014 return true;
3015 Type *SubT = nullptr;
3016 if (T->isArrayTy())
3017 SubT = T->getArrayElementType();
3018 else if (T->isVectorTy())
3019 SubT = T->getVectorElementType();
3020 else
3021 return false;
3022 return SubT->isIntegerTy() || SubT->isFloatingPointTy();
3023}
3024
3025static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG,
3026 EVT PtrVT, const SDLoc &dl) {
3027 // If we're creating a pool entry for a constant global with unnamed address,
3028 // and the global is small enough, we can emit it inline into the constant pool
3029 // to save ourselves an indirection.
3030 //
3031 // This is a win if the constant is only used in one function (so it doesn't
3032 // need to be duplicated) or duplicating the constant wouldn't increase code
3033 // size (implying the constant is no larger than 4 bytes).
3034 const Function *F = DAG.getMachineFunction().getFunction();
3035
3036 // We rely on this decision to inline being idemopotent and unrelated to the
3037 // use-site. We know that if we inline a variable at one use site, we'll
3038 // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3039 // doesn't know about this optimization, so bail out if it's enabled else
3040 // we could decide to inline here (and thus never emit the GV) but require
3041 // the GV from fast-isel generated code.
3042 if (!EnableConstpoolPromotion ||
3043 DAG.getMachineFunction().getTarget().Options.EnableFastISel)
3044 return SDValue();
3045
3046 auto *GVar = dyn_cast<GlobalVariable>(GV);
3047 if (!GVar || !GVar->hasInitializer() ||
3048 !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3049 !GVar->hasLocalLinkage())
3050 return SDValue();
3051
3052 // Ensure that we don't try and inline any type that contains pointers. If
3053 // we inline a value that contains relocations, we move the relocations from
3054 // .data to .text which is not ideal.
3055 auto *Init = GVar->getInitializer();
3056 if (!isSimpleType(Init->getType()))
3057 return SDValue();
3058
3059 // The constant islands pass can only really deal with alignment requests
3060 // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
3061 // any type wanting greater alignment requirements than 4 bytes. We also
3062 // can only promote constants that are multiples of 4 bytes in size or
3063 // are paddable to a multiple of 4. Currently we only try and pad constants
3064 // that are strings for simplicity.
3065 auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
3066 unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3067 unsigned Align = GVar->getAlignment();
3068 unsigned RequiredPadding = 4 - (Size % 4);
3069 bool PaddingPossible =
3070 RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3071 if (!PaddingPossible || Align > 4 || Size > ConstpoolPromotionMaxSize ||
3072 Size == 0)
3073 return SDValue();
3074
3075 unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3076 MachineFunction &MF = DAG.getMachineFunction();
3077 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3078
3079 // We can't bloat the constant pool too much, else the ConstantIslands pass
3080 // may fail to converge. If we haven't promoted this global yet (it may have
3081 // multiple uses), and promoting it would increase the constant pool size (Sz
3082 // > 4), ensure we have space to do so up to MaxTotal.
3083 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
3084 if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
3085 ConstpoolPromotionMaxTotal)
3086 return SDValue();
3087
3088 // This is only valid if all users are in a single function OR it has users
3089 // in multiple functions but it no larger than a pointer. We also check if
3090 // GVar has constant (non-ConstantExpr) users. If so, it essentially has its
3091 // address taken.
3092 if (!allUsersAreInFunction(GVar, F) &&
3093 !(Size <= 4 && allUsersAreInFunctions(GVar)))
3094 return SDValue();
3095
3096 // We're going to inline this global. Pad it out if needed.
3097 if (RequiredPadding != 4) {
3098 StringRef S = CDAInit->getAsString();
3099
3100 SmallVector<uint8_t,16> V(S.size());
3101 std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
3102 while (RequiredPadding--)
3103 V.push_back(0);
3104 Init = ConstantDataArray::get(*DAG.getContext(), V);
3105 }
3106
3107 auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
3108 SDValue CPAddr =
3109 DAG.getTargetConstantPool(CPVal, PtrVT, /*Align=*/4);
3110 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
3111 AFI->markGlobalAsPromotedToConstantPool(GVar);
3112 AFI->setPromotedConstpoolIncrease(AFI->getPromotedConstpoolIncrease() +
3113 PaddedSize - 4);
3114 }
3115 ++NumConstpoolPromoted;
3116 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3117}
3118
3119static bool isReadOnly(const GlobalValue *GV) {
3120 if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
3121 GV = GA->getBaseObject();
3122 return (isa<GlobalVariable>(GV) && cast<GlobalVariable>(GV)->isConstant()) ||
3123 isa<Function>(GV);
3124}
3125
3126SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
3127 SelectionDAG &DAG) const {
3128 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3129 SDLoc dl(Op);
3130 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3131 const TargetMachine &TM = getTargetMachine();
3132 bool IsRO = isReadOnly(GV);
3133
3134 // promoteToConstantPool only if not generating XO text section
3135 if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly())
3136 if (SDValue V = promoteToConstantPool(GV, DAG, PtrVT, dl))
3137 return V;
3138
3139 if (isPositionIndependent()) {
3140 bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
3141
3142 MachineFunction &MF = DAG.getMachineFunction();
3143 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3144 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3145 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3146 SDLoc dl(Op);
3147 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
3148 ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(
3149 GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj,
3150 UseGOT_PREL ? ARMCP::GOT_PREL : ARMCP::no_modifier,
3151 /*AddCurrentAddress=*/UseGOT_PREL);
3152 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3153 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3154 SDValue Result = DAG.getLoad(
3155 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3156 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3157 SDValue Chain = Result.getValue(1);
3158 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3159 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
3160 if (UseGOT_PREL)
3161 Result =
3162 DAG.getLoad(PtrVT, dl, Chain, Result,
3163 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3164 return Result;
3165 } else if (Subtarget->isROPI() && IsRO) {
3166 // PC-relative.
3167 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
3168 SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3169 return Result;
3170 } else if (Subtarget->isRWPI() && !IsRO) {
3171 // SB-relative.
3172 SDValue RelAddr;
3173 if (Subtarget->useMovt(DAG.getMachineFunction())) {
3174 ++NumMovwMovt;
3175 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
3176 RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
3177 } else { // use literal pool for address constant
3178 ARMConstantPoolValue *CPV =
3179 ARMConstantPoolConstant::Create(GV, ARMCP::SBREL);
3180 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3181 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3182 RelAddr = DAG.getLoad(
3183 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3184 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3185 }
3186 SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
3187 SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
3188 return Result;
3189 }
3190
3191 // If we have T2 ops, we can materialize the address directly via movt/movw
3192 // pair. This is always cheaper.
3193 if (Subtarget->useMovt(DAG.getMachineFunction())) {
3194 ++NumMovwMovt;
3195 // FIXME: Once remat is capable of dealing with instructions with register
3196 // operands, expand this into two nodes.
3197 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
3198 DAG.getTargetGlobalAddress(GV, dl, PtrVT));
3199 } else {
3200 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
3201 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3202 return DAG.getLoad(
3203 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3204 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3205 }
3206}
3207
3208SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
3209 SelectionDAG &DAG) const {
3210 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&((!Subtarget->isROPI() && !Subtarget->isRWPI() &&
"ROPI/RWPI not currently supported for Darwin") ? static_cast
<void> (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Darwin\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 3211, __PRETTY_FUNCTION__))
3211 "ROPI/RWPI not currently supported for Darwin")((!Subtarget->isROPI() && !Subtarget->isRWPI() &&
"ROPI/RWPI not currently supported for Darwin") ? static_cast
<void> (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Darwin\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 3211, __PRETTY_FUNCTION__))
;
3212 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3213 SDLoc dl(Op);
3214 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3215
3216 if (Subtarget->useMovt(DAG.getMachineFunction()))
3217 ++NumMovwMovt;
3218
3219 // FIXME: Once remat is capable of dealing with instructions with register
3220 // operands, expand this into multiple nodes
3221 unsigned Wrapper =
3222 isPositionIndependent() ? ARMISD::WrapperPIC : ARMISD::Wrapper;
3223
3224 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
3225 SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
3226
3227 if (Subtarget->isGVIndirectSymbol(GV))
3228 Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3229 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3230 return Result;
3231}
3232
3233SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
3234 SelectionDAG &DAG) const {
3235 assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported")((Subtarget->isTargetWindows() && "non-Windows COFF is not supported"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"non-Windows COFF is not supported\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 3235, __PRETTY_FUNCTION__))
;
3236 assert(Subtarget->useMovt(DAG.getMachineFunction()) &&((Subtarget->useMovt(DAG.getMachineFunction()) && "Windows on ARM expects to use movw/movt"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->useMovt(DAG.getMachineFunction()) && \"Windows on ARM expects to use movw/movt\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 3237, __PRETTY_FUNCTION__))
3237 "Windows on ARM expects to use movw/movt")((Subtarget->useMovt(DAG.getMachineFunction()) && "Windows on ARM expects to use movw/movt"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->useMovt(DAG.getMachineFunction()) && \"Windows on ARM expects to use movw/movt\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 3237, __PRETTY_FUNCTION__))
;
3238 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&((!Subtarget->isROPI() && !Subtarget->isRWPI() &&
"ROPI/RWPI not currently supported for Windows") ? static_cast
<void> (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Windows\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 3239, __PRETTY_FUNCTION__))
3239 "ROPI/RWPI not currently supported for Windows")((!Subtarget->isROPI() && !Subtarget->isRWPI() &&
"ROPI/RWPI not currently supported for Windows") ? static_cast
<void> (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Windows\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 3239, __PRETTY_FUNCTION__))
;
3240
3241 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3242 const ARMII::TOF TargetFlags =
3243 (GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG);
3244 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3245 SDValue Result;
3246 SDLoc DL(Op);
3247
3248 ++NumMovwMovt;
3249
3250 // FIXME: Once remat is capable of dealing with instructions with register
3251 // operands, expand this into two nodes.
3252 Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
3253 DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0,
3254 TargetFlags));
3255 if (GV->hasDLLImportStorageClass())
3256 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3257 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3258 return Result;
3259}
3260
3261SDValue
3262ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
3263 SDLoc dl(Op);
3264 SDValue Val = DAG.getConstant(0, dl, MVT::i32);
3265 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
3266 DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
3267 Op.getOperand(1), Val);
3268}
3269
3270SDValue
3271ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
3272 SDLoc dl(Op);
3273 return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
3274 Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
3275}
3276
3277SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
3278 SelectionDAG &DAG) const {
3279 SDLoc dl(Op);
3280 return DAG.getNode(ARMISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other,
3281 Op.getOperand(0));
3282}
3283
3284SDValue
3285ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
3286 const ARMSubtarget *Subtarget) const {
3287 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3288 SDLoc dl(Op);
3289 switch (IntNo) {
3290 default: return SDValue(); // Don't custom lower most intrinsics.
3291 case Intrinsic::thread_pointer: {
3292 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3293 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
3294 }
3295 case Intrinsic::eh_sjlj_lsda: {
3296 MachineFunction &MF = DAG.getMachineFunction();
3297 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3298 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3299 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3300 SDValue CPAddr;
3301 bool IsPositionIndependent = isPositionIndependent();
3302 unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
3303 ARMConstantPoolValue *CPV =
3304 ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex,
3305 ARMCP::CPLSDA, PCAdj);
3306 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3307 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3308 SDValue Result = DAG.getLoad(
3309 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3310 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3311
3312 if (IsPositionIndependent) {
3313 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3314 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
3315 }
3316 return Result;
3317 }
3318 case Intrinsic::arm_neon_vabs:
3319 return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),
3320 Op.getOperand(1));
3321 case Intrinsic::arm_neon_vmulls:
3322 case Intrinsic::arm_neon_vmullu: {
3323 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
3324 ? ARMISD::VMULLs : ARMISD::VMULLu;
3325 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3326 Op.getOperand(1), Op.getOperand(2));
3327 }
3328 case Intrinsic::arm_neon_vminnm:
3329 case Intrinsic::arm_neon_vmaxnm: {
3330 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
3331 ? ISD::FMINNUM : ISD::FMAXNUM;
3332 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3333 Op.getOperand(1), Op.getOperand(2));
3334 }
3335 case Intrinsic::arm_neon_vminu:
3336 case Intrinsic::arm_neon_vmaxu: {
3337 if (Op.getValueType().isFloatingPoint())
3338 return SDValue();
3339 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
3340 ? ISD::UMIN : ISD::UMAX;
3341 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3342 Op.getOperand(1), Op.getOperand(2));
3343 }
3344 case Intrinsic::arm_neon_vmins:
3345 case Intrinsic::arm_neon_vmaxs: {
3346 // v{min,max}s is overloaded between signed integers and floats.
3347 if (!Op.getValueType().isFloatingPoint()) {
3348 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3349 ? ISD::SMIN : ISD::SMAX;
3350 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3351 Op.getOperand(1), Op.getOperand(2));
3352 }
3353 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3354 ? ISD::FMINNAN : ISD::FMAXNAN;
3355 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3356 Op.getOperand(1), Op.getOperand(2));
3357 }
3358 case Intrinsic::arm_neon_vtbl1:
3359 return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
3360 Op.getOperand(1), Op.getOperand(2));
3361 case Intrinsic::arm_neon_vtbl2:
3362 return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
3363 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3364 }
3365}
3366
3367static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
3368 const ARMSubtarget *Subtarget) {
3369 SDLoc dl(Op);
3370 ConstantSDNode *ScopeN = cast<ConstantSDNode>(Op.getOperand(2));
3371 auto Scope = static_cast<SynchronizationScope>(ScopeN->getZExtValue());
3372 if (Scope == SynchronizationScope::SingleThread)
3373 return Op;
3374
3375 if (!Subtarget->hasDataBarrier()) {
3376 // Some ARMv6 cpus can support data barriers with an mcr instruction.
3377 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
3378 // here.
3379 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&((Subtarget->hasV6Ops() && !Subtarget->isThumb(
) && "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->hasV6Ops() && !Subtarget->isThumb() && \"Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 3380, __PRETTY_FUNCTION__))
3380 "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!")((Subtarget->hasV6Ops() && !Subtarget->isThumb(
) && "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->hasV6Ops() && !Subtarget->isThumb() && \"Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 3380, __PRETTY_FUNCTION__))
;
3381 return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
3382 DAG.getConstant(0, dl, MVT::i32));
3383 }
3384
3385 ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
3386 AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
3387 ARM_MB::MemBOpt Domain = ARM_MB::ISH;
3388 if (Subtarget->isMClass()) {
3389 // Only a full system barrier exists in the M-class architectures.
3390 Domain = ARM_MB::SY;
3391 } else if (Subtarget->preferISHSTBarriers() &&
3392 Ord == AtomicOrdering::Release) {
3393 // Swift happens to implement ISHST barriers in a way that's compatible with
3394 // Release semantics but weaker than ISH so we'd be fools not to use
3395 // it. Beware: other processors probably don't!
3396 Domain = ARM_MB::ISHST;
3397 }
3398
3399 return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
3400 DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
3401 DAG.getConstant(Domain, dl, MVT::i32));
3402}
3403
3404static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
3405 const ARMSubtarget *Subtarget) {
3406 // ARM pre v5TE and Thumb1 does not have preload instructions.
3407 if (!(Subtarget->isThumb2() ||
3408 (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
3409 // Just preserve the chain.
3410 return Op.getOperand(0);
3411
3412 SDLoc dl(Op);
3413 unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
3414 if (!isRead &&
3415 (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
3416 // ARMv7 with MP extension has PLDW.
3417 return Op.getOperand(0);
3418
3419 unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
3420 if (Subtarget->isThumb()) {
3421 // Invert the bits.
3422 isRead = ~isRead & 1;
3423 isData = ~isData & 1;
3424 }
3425
3426 return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
3427 Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
3428 DAG.getConstant(isData, dl, MVT::i32));
3429}
3430
3431static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
3432 MachineFunction &MF = DAG.getMachineFunction();
3433 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
3434
3435 // vastart just stores the address of the VarArgsFrameIndex slot into the
3436 // memory location argument.
3437 SDLoc dl(Op);
3438 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
3439 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3440 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3441 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3442 MachinePointerInfo(SV));
3443}
3444
3445SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
3446 CCValAssign &NextVA,
3447 SDValue &Root,
3448 SelectionDAG &DAG,
3449 const SDLoc &dl) const {
3450 MachineFunction &MF = DAG.getMachineFunction();
3451 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3452
3453 const TargetRegisterClass *RC;
3454 if (AFI->isThumb1OnlyFunction())
3455 RC = &ARM::tGPRRegClass;
3456 else
3457 RC = &ARM::GPRRegClass;
3458
3459 // Transform the arguments stored in physical registers into virtual ones.
3460 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3461 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3462
3463 SDValue ArgValue2;
3464 if (NextVA.isMemLoc()) {
3465 MachineFrameInfo &MFI = MF.getFrameInfo();
3466 int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);
3467
3468 // Create load node to retrieve arguments from the stack.
3469 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3470 ArgValue2 = DAG.getLoad(
3471 MVT::i32, dl, Root, FIN,
3472 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3473 } else {
3474 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
3475 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3476 }
3477 if (!Subtarget->isLittle())
3478 std::swap (ArgValue, ArgValue2);
3479 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
3480}
3481
3482// The remaining GPRs hold either the beginning of variable-argument
3483// data, or the beginning of an aggregate passed by value (usually
3484// byval). Either way, we allocate stack slots adjacent to the data
3485// provided by our caller, and store the unallocated registers there.
3486// If this is a variadic function, the va_list pointer will begin with
3487// these values; otherwise, this reassembles a (byval) structure that
3488// was split between registers and memory.
3489// Return: The frame index registers were stored into.
3490int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
3491 const SDLoc &dl, SDValue &Chain,
3492 const Value *OrigArg,
3493 unsigned InRegsParamRecordIdx,
3494 int ArgOffset, unsigned ArgSize) const {
3495 // Currently, two use-cases possible:
3496 // Case #1. Non-var-args function, and we meet first byval parameter.
3497 // Setup first unallocated register as first byval register;
3498 // eat all remained registers
3499 // (these two actions are performed by HandleByVal method).
3500 // Then, here, we initialize stack frame with
3501 // "store-reg" instructions.
3502 // Case #2. Var-args function, that doesn't contain byval parameters.
3503 // The same: eat all remained unallocated registers,
3504 // initialize stack frame.
3505
3506 MachineFunction &MF = DAG.getMachineFunction();
3507 MachineFrameInfo &MFI = MF.getFrameInfo();
3508 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3509 unsigned RBegin, REnd;
3510 if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
3511 CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
3512 } else {
3513 unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3514 RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
3515 REnd = ARM::R4;
3516 }
3517
3518 if (REnd != RBegin)
3519 ArgOffset = -4 * (ARM::R4 - RBegin);
3520
3521 auto PtrVT = getPointerTy(DAG.getDataLayout());
3522 int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
3523 SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
3524
3525 SmallVector<SDValue, 4> MemOps;
3526 const TargetRegisterClass *RC =
3527 AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
3528
3529 for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
3530 unsigned VReg = MF.addLiveIn(Reg, RC);
3531 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
3532 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3533 MachinePointerInfo(OrigArg, 4 * i));
3534 MemOps.push_back(Store);
3535 FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
3536 }
3537
3538 if (!MemOps.empty())
3539 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3540 return FrameIndex;
3541}
3542
3543// Setup stack frame, the va_list pointer will start from.
3544void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
3545 const SDLoc &dl, SDValue &Chain,
3546 unsigned ArgOffset,
3547 unsigned TotalArgRegsSaveSize,
3548 bool ForceMutable) const {
3549 MachineFunction &MF = DAG.getMachineFunction();
3550 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3551
3552 // Try to store any remaining integer argument regs
3553 // to their spots on the stack so that they may be loaded by dereferencing
3554 // the result of va_next.
3555 // If there is no regs to be stored, just point address after last
3556 // argument passed via stack.
3557 int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
3558 CCInfo.getInRegsParamsCount(),
3559 CCInfo.getNextStackOffset(), 4);
3560 AFI->setVarArgsFrameIndex(FrameIndex);
3561}
3562
3563SDValue ARMTargetLowering::LowerFormalArguments(
3564 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3565 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3566 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3567 MachineFunction &MF = DAG.getMachineFunction();
3568 MachineFrameInfo &MFI = MF.getFrameInfo();
3569
3570 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3571
3572 // Assign locations to all of the incoming arguments.
3573 SmallVector<CCValAssign, 16> ArgLocs;
3574 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3575 *DAG.getContext());
3576 CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
3577
3578 SmallVector<SDValue, 16> ArgValues;
3579 SDValue ArgValue;
3580 Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
3581 unsigned CurArgIdx = 0;
3582
3583 // Initially ArgRegsSaveSize is zero.
3584 // Then we increase this value each time we meet byval parameter.
3585 // We also increase this value in case of varargs function.
3586 AFI->setArgRegsSaveSize(0);
3587
3588 // Calculate the amount of stack space that we need to allocate to store
3589 // byval and variadic arguments that are passed in registers.
3590 // We need to know this before we allocate the first byval or variadic
3591 // argument, as they will be allocated a stack slot below the CFA (Canonical
3592 // Frame Address, the stack pointer at entry to the function).
3593 unsigned ArgRegBegin = ARM::R4;
3594 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3595 if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
3596 break;
3597
3598 CCValAssign &VA = ArgLocs[i];
3599 unsigned Index = VA.getValNo();
3600 ISD::ArgFlagsTy Flags = Ins[Index].Flags;
3601 if (!Flags.isByVal())
3602 continue;
3603
3604 assert(VA.isMemLoc() && "unexpected byval pointer in reg")((VA.isMemLoc() && "unexpected byval pointer in reg")
? static_cast<void> (0) : __assert_fail ("VA.isMemLoc() && \"unexpected byval pointer in reg\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 3604, __PRETTY_FUNCTION__))
;
3605 unsigned RBegin, REnd;
3606 CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
3607 ArgRegBegin = std::min(ArgRegBegin, RBegin);
3608
3609 CCInfo.nextInRegsParam();
3610 }
3611 CCInfo.rewindByValRegsInfo();
3612
3613 int lastInsIndex = -1;
3614 if (isVarArg && MFI.hasVAStart()) {
3615 unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3616 if (RegIdx != array_lengthof(GPRArgRegs))
3617 ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
3618 }
3619
3620 unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
3621 AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
3622 auto PtrVT = getPointerTy(DAG.getDataLayout());
3623
3624 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3625 CCValAssign &VA = ArgLocs[i];
3626 if (Ins[VA.getValNo()].isOrigArg()) {
3627 std::advance(CurOrigArg,
3628 Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
3629 CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
3630 }
3631 // Arguments stored in registers.
3632 if (VA.isRegLoc()) {
3633 EVT RegVT = VA.getLocVT();
3634
3635 if (VA.needsCustom()) {
3636 // f64 and vector types are split up into multiple registers or
3637 // combinations of registers and stack slots.
3638 if (VA.getLocVT() == MVT::v2f64) {
3639 SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
3640 Chain, DAG, dl);
3641 VA = ArgLocs[++i]; // skip ahead to next loc
3642 SDValue ArgValue2;
3643 if (VA.isMemLoc()) {
3644 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
3645 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3646 ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
3647 MachinePointerInfo::getFixedStack(
3648 DAG.getMachineFunction(), FI));
3649 } else {
3650 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
3651 Chain, DAG, dl);
3652 }
3653 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
3654 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3655 ArgValue, ArgValue1,
3656 DAG.getIntPtrConstant(0, dl));
3657 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3658 ArgValue, ArgValue2,
3659 DAG.getIntPtrConstant(1, dl));
3660 } else
3661 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
3662
3663 } else {
3664 const TargetRegisterClass *RC;
3665
3666 if (RegVT == MVT::f32)
3667 RC = &ARM::SPRRegClass;
3668 else if (RegVT == MVT::f64)
3669 RC = &ARM::DPRRegClass;
3670 else if (RegVT == MVT::v2f64)
3671 RC = &ARM::QPRRegClass;
3672 else if (RegVT == MVT::i32)
3673 RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
3674 : &ARM::GPRRegClass;
3675 else
3676 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering")::llvm::llvm_unreachable_internal("RegVT not supported by FORMAL_ARGUMENTS Lowering"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 3676)
;
3677
3678 // Transform the arguments in physical registers into virtual ones.
3679 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3680 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3681 }
3682
3683 // If this is an 8 or 16-bit value, it is really passed promoted
3684 // to 32 bits. Insert an assert[sz]ext to capture this, then
3685 // truncate to the right size.
3686 switch (VA.getLocInfo()) {
3687 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 3687)
;
3688 case CCValAssign::Full: break;
3689 case CCValAssign::BCvt:
3690 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
3691 break;
3692 case CCValAssign::SExt:
3693 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3694 DAG.getValueType(VA.getValVT()));
3695 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3696 break;
3697 case CCValAssign::ZExt:
3698 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3699 DAG.getValueType(VA.getValVT()));
3700 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3701 break;
3702 }
3703
3704 InVals.push_back(ArgValue);
3705
3706 } else { // VA.isRegLoc()
3707 // sanity check
3708 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 3708, __PRETTY_FUNCTION__))
;
3709 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered")((VA.getValVT() != MVT::i64 && "i64 should already be lowered"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() != MVT::i64 && \"i64 should already be lowered\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 3709, __PRETTY_FUNCTION__))
;
3710
3711 int index = VA.getValNo();
3712
3713 // Some Ins[] entries become multiple ArgLoc[] entries.
3714 // Process them only once.
3715 if (index != lastInsIndex)
3716 {
3717 ISD::ArgFlagsTy Flags = Ins[index].Flags;
3718 // FIXME: For now, all byval parameter objects are marked mutable.
3719 // This can be changed with more analysis.
3720 // In case of tail call optimization mark all arguments mutable.
3721 // Since they could be overwritten by lowering of arguments in case of
3722 // a tail call.
3723 if (Flags.isByVal()) {
3724 assert(Ins[index].isOrigArg() &&((Ins[index].isOrigArg() && "Byval arguments cannot be implicit"
) ? static_cast<void> (0) : __assert_fail ("Ins[index].isOrigArg() && \"Byval arguments cannot be implicit\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 3725, __PRETTY_FUNCTION__))
3725 "Byval arguments cannot be implicit")((Ins[index].isOrigArg() && "Byval arguments cannot be implicit"
) ? static_cast<void> (0) : __assert_fail ("Ins[index].isOrigArg() && \"Byval arguments cannot be implicit\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 3725, __PRETTY_FUNCTION__))
;
3726 unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
3727
3728 int FrameIndex = StoreByValRegs(
3729 CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
3730 VA.getLocMemOffset(), Flags.getByValSize());
3731 InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
3732 CCInfo.nextInRegsParam();
3733 } else {
3734 unsigned FIOffset = VA.getLocMemOffset();
3735 int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
3736 FIOffset, true);
3737
3738 // Create load nodes to retrieve arguments from the stack.
3739 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3740 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
3741 MachinePointerInfo::getFixedStack(
3742 DAG.getMachineFunction(), FI)));
3743 }
3744 lastInsIndex = index;
3745 }
3746 }
3747 }
3748
3749 // varargs
3750 if (isVarArg && MFI.hasVAStart())
3751 VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
3752 CCInfo.getNextStackOffset(),
3753 TotalArgRegsSaveSize);
3754
3755 AFI->setArgumentStackSize(CCInfo.getNextStackOffset());
3756
3757 return Chain;
3758}
3759
3760/// isFloatingPointZero - Return true if this is +0.0.
3761static bool isFloatingPointZero(SDValue Op) {
3762 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
3763 return CFP->getValueAPF().isPosZero();
3764 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
3765 // Maybe this has already been legalized into the constant pool?
3766 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
3767 SDValue WrapperOp = Op.getOperand(1).getOperand(0);
3768 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
3769 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
3770 return CFP->getValueAPF().isPosZero();
3771 }
3772 } else if (Op->getOpcode() == ISD::BITCAST &&
3773 Op->getValueType(0) == MVT::f64) {
3774 // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
3775 // created by LowerConstantFP().
3776 SDValue BitcastOp = Op->getOperand(0);
3777 if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
3778 isNullConstant(BitcastOp->getOperand(0)))
3779 return true;
3780 }
3781 return false;
3782}
3783
3784/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
3785/// the given operands.
3786SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
3787 SDValue &ARMcc, SelectionDAG &DAG,
3788 const SDLoc &dl) const {
3789 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
3790 unsigned C = RHSC->getZExtValue();
3791 if (!isLegalICmpImmediate(C)) {
3792 // Constant does not fit, try adjusting it by one?
3793 switch (CC) {
3794 default: break;
3795 case ISD::SETLT:
3796 case ISD::SETGE:
3797 if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
3798 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
3799 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
3800 }
3801 break;
3802 case ISD::SETULT:
3803 case ISD::SETUGE:
3804 if (C != 0 && isLegalICmpImmediate(C-1)) {
3805 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
3806 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
3807 }
3808 break;
3809 case ISD::SETLE:
3810 case ISD::SETGT:
3811 if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
3812 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
3813 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
3814 }
3815 break;
3816 case ISD::SETULE:
3817 case ISD::SETUGT:
3818 if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
3819 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
3820 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
3821 }
3822 break;
3823 }
3824 }
3825 }
3826
3827 ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
3828 ARMISD::NodeType CompareType;
3829 switch (CondCode) {
3830 default:
3831 CompareType = ARMISD::CMP;
3832 break;
3833 case ARMCC::EQ:
3834 case ARMCC::NE:
3835 // Uses only Z Flag
3836 CompareType = ARMISD::CMPZ;
3837 break;
3838 }
3839 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
3840 return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
3841}
3842
3843/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
3844SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
3845 SelectionDAG &DAG, const SDLoc &dl,
3846 bool InvalidOnQNaN) const {
3847 assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64)((!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64
) ? static_cast<void> (0) : __assert_fail ("!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 3847, __PRETTY_FUNCTION__))
;
3848 SDValue Cmp;
3849 SDValue C = DAG.getConstant(InvalidOnQNaN, dl, MVT::i32);
3850 if (!isFloatingPointZero(RHS))
3851 Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS, C);
3852 else
3853 Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS, C);
3854 return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
3855}
3856
3857/// duplicateCmp - Glue values can have only one use, so this function
3858/// duplicates a comparison node.
3859SDValue
3860ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
3861 unsigned Opc = Cmp.getOpcode();
3862 SDLoc DL(Cmp);
3863 if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
3864 return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
3865
3866 assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation")((Opc == ARMISD::FMSTAT && "unexpected comparison operation"
) ? static_cast<void> (0) : __assert_fail ("Opc == ARMISD::FMSTAT && \"unexpected comparison operation\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 3866, __PRETTY_FUNCTION__))
;
3867 Cmp = Cmp.getOperand(0);
3868 Opc = Cmp.getOpcode();
3869 if (Opc == ARMISD::CMPFP)
3870 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
3871 Cmp.getOperand(1), Cmp.getOperand(2));
3872 else {
3873 assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT")((Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT"
) ? static_cast<void> (0) : __assert_fail ("Opc == ARMISD::CMPFPw0 && \"unexpected operand of FMSTAT\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 3873, __PRETTY_FUNCTION__))
;
3874 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
3875 Cmp.getOperand(1));
3876 }
3877 return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
3878}
3879
3880std::pair<SDValue, SDValue>
3881ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
3882 SDValue &ARMcc) const {
3883 assert(Op.getValueType() == MVT::i32 && "Unsupported value type")((Op.getValueType() == MVT::i32 && "Unsupported value type"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i32 && \"Unsupported value type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 3883, __PRETTY_FUNCTION__))
;
3884
3885 SDValue Value, OverflowCmp;
3886 SDValue LHS = Op.getOperand(0);
3887 SDValue RHS = Op.getOperand(1);
3888 SDLoc dl(Op);
3889
3890 // FIXME: We are currently always generating CMPs because we don't support
3891 // generating CMN through the backend. This is not as good as the natural
3892 // CMP case because it causes a register dependency and cannot be folded
3893 // later.
3894
3895 switch (Op.getOpcode()) {
3896 default:
3897 llvm_unreachable("Unknown overflow instruction!")::llvm::llvm_unreachable_internal("Unknown overflow instruction!"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 3897)
;
3898 case ISD::SADDO:
3899 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
3900 Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
3901 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
3902 break;
3903 case ISD::UADDO:
3904 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
3905 Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
3906 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
3907 break;
3908 case ISD::SSUBO:
3909 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
3910 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
3911 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
3912 break;
3913 case ISD::USUBO:
3914 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
3915 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
3916 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
3917 break;
3918 } // switch (...)
3919
3920 return std::make_pair(Value, OverflowCmp);
3921}
3922
3923SDValue
3924ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
3925 // Let legalize expand this if it isn't a legal type yet.
3926 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
3927 return SDValue();
3928
3929 SDValue Value, OverflowCmp;
3930 SDValue ARMcc;
3931 std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
3932 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3933 SDLoc dl(Op);
3934 // We use 0 and 1 as false and true values.
3935 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
3936 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
3937 EVT VT = Op.getValueType();
3938
3939 SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal,
3940 ARMcc, CCR, OverflowCmp);
3941
3942 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
3943 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
3944}
3945
3946SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
3947 SDValue Cond = Op.getOperand(0);
3948 SDValue SelectTrue = Op.getOperand(1);
3949 SDValue SelectFalse = Op.getOperand(2);
3950 SDLoc dl(Op);
3951 unsigned Opc = Cond.getOpcode();
3952
3953 if (Cond.getResNo() == 1 &&
3954 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
3955 Opc == ISD::USUBO)) {
3956 if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
3957 return SDValue();
3958
3959 SDValue Value, OverflowCmp;
3960 SDValue ARMcc;
3961 std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
3962 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3963 EVT VT = Op.getValueType();
3964
3965 return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR,
3966 OverflowCmp, DAG);
3967 }
3968
3969 // Convert:
3970 //
3971 // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
3972 // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
3973 //
3974 if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
3975 const ConstantSDNode *CMOVTrue =
3976 dyn_cast<ConstantSDNode>(Cond.getOperand(0));
3977 const ConstantSDNode *CMOVFalse =
3978 dyn_cast<ConstantSDNode>(Cond.getOperand(1));
3979
3980 if (CMOVTrue && CMOVFalse) {
3981 unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
3982 unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
3983
3984 SDValue True;
3985 SDValue False;
3986 if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
3987 True = SelectTrue;
3988 False = SelectFalse;
3989 } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
3990 True = SelectFalse;
3991 False = SelectTrue;
3992 }
3993
3994 if (True.getNode() && False.getNode()) {
3995 EVT VT = Op.getValueType();
3996 SDValue ARMcc = Cond.getOperand(2);
3997 SDValue CCR = Cond.getOperand(3);
3998 SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
3999 assert(True.getValueType() == VT)((True.getValueType() == VT) ? static_cast<void> (0) : __assert_fail
("True.getValueType() == VT", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 3999, __PRETTY_FUNCTION__))
;
4000 return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG);
4001 }
4002 }
4003 }
4004
4005 // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
4006 // undefined bits before doing a full-word comparison with zero.
4007 Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
4008 DAG.getConstant(1, dl, Cond.getValueType()));
4009
4010 return DAG.getSelectCC(dl, Cond,
4011 DAG.getConstant(0, dl, Cond.getValueType()),
4012 SelectTrue, SelectFalse, ISD::SETNE);
4013}
4014
4015static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
4016 bool &swpCmpOps, bool &swpVselOps) {
4017 // Start by selecting the GE condition code for opcodes that return true for
4018 // 'equality'
4019 if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
4020 CC == ISD::SETULE)
4021 CondCode = ARMCC::GE;
4022
4023 // and GT for opcodes that return false for 'equality'.
4024 else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
4025 CC == ISD::SETULT)
4026 CondCode = ARMCC::GT;
4027
4028 // Since we are constrained to GE/GT, if the opcode contains 'less', we need
4029 // to swap the compare operands.
4030 if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
4031 CC == ISD::SETULT)
4032 swpCmpOps = true;
4033
4034 // Both GT and GE are ordered comparisons, and return false for 'unordered'.
4035 // If we have an unordered opcode, we need to swap the operands to the VSEL
4036 // instruction (effectively negating the condition).
4037 //
4038 // This also has the effect of swapping which one of 'less' or 'greater'
4039 // returns true, so we also swap the compare operands. It also switches
4040 // whether we return true for 'equality', so we compensate by picking the
4041 // opposite condition code to our original choice.
4042 if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
4043 CC == ISD::SETUGT) {
4044 swpCmpOps = !swpCmpOps;
4045 swpVselOps = !swpVselOps;
4046 CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
4047 }
4048
4049 // 'ordered' is 'anything but unordered', so use the VS condition code and
4050 // swap the VSEL operands.
4051 if (CC == ISD::SETO) {
4052 CondCode = ARMCC::VS;
4053 swpVselOps = true;
4054 }
4055
4056 // 'unordered or not equal' is 'anything but equal', so use the EQ condition
4057 // code and swap the VSEL operands.
4058 if (CC == ISD::SETUNE) {
4059 CondCode = ARMCC::EQ;
4060 swpVselOps = true;
4061 }
4062}
4063
4064SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
4065 SDValue TrueVal, SDValue ARMcc, SDValue CCR,
4066 SDValue Cmp, SelectionDAG &DAG) const {
4067 if (Subtarget->isFPOnlySP() && VT == MVT::f64) {
4068 FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl,
4069 DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
4070 TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl,
4071 DAG.getVTList(MVT::i32, MVT::i32), TrueVal);
4072
4073 SDValue TrueLow = TrueVal.getValue(0);
4074 SDValue TrueHigh = TrueVal.getValue(1);
4075 SDValue FalseLow = FalseVal.getValue(0);
4076 SDValue FalseHigh = FalseVal.getValue(1);
4077
4078 SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
4079 ARMcc, CCR, Cmp);
4080 SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
4081 ARMcc, CCR, duplicateCmp(Cmp, DAG));
4082
4083 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
4084 } else {
4085 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
4086 Cmp);
4087 }
4088}
4089
4090static bool isGTorGE(ISD::CondCode CC) {
4091 return CC == ISD::SETGT || CC == ISD::SETGE;
4092}
4093
4094static bool isLTorLE(ISD::CondCode CC) {
4095 return CC == ISD::SETLT || CC == ISD::SETLE;
4096}
4097
4098// See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating.
4099// All of these conditions (and their <= and >= counterparts) will do:
4100// x < k ? k : x
4101// x > k ? x : k
4102// k < x ? x : k
4103// k > x ? k : x
4104static bool isLowerSaturate(const SDValue LHS, const SDValue RHS,
4105 const SDValue TrueVal, const SDValue FalseVal,
4106 const ISD::CondCode CC, const SDValue K) {
4107 return (isGTorGE(CC) &&
4108 ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))) ||
4109 (isLTorLE(CC) &&
4110 ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal)));
4111}
4112
4113// Similar to isLowerSaturate(), but checks for upper-saturating conditions.
4114static bool isUpperSaturate(const SDValue LHS, const SDValue RHS,
4115 const SDValue TrueVal, const SDValue FalseVal,
4116 const ISD::CondCode CC, const SDValue K) {
4117 return (isGTorGE(CC) &&
4118 ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal))) ||
4119 (isLTorLE(CC) &&
4120 ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal)));
4121}
4122
4123// Check if two chained conditionals could be converted into SSAT.
4124//
4125// SSAT can replace a set of two conditional selectors that bound a number to an
4126// interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples:
4127//
4128// x < -k ? -k : (x > k ? k : x)
4129// x < -k ? -k : (x < k ? x : k)
4130// x > -k ? (x > k ? k : x) : -k
4131// x < k ? (x < -k ? -k : x) : k
4132// etc.
4133//
4134// It returns true if the conversion can be done, false otherwise.
4135// Additionally, the variable is returned in parameter V and the constant in K.
4136static bool isSaturatingConditional(const SDValue &Op, SDValue &V,
4137 uint64_t &K) {
4138 SDValue LHS1 = Op.getOperand(0);
4139 SDValue RHS1 = Op.getOperand(1);
4140 SDValue TrueVal1 = Op.getOperand(2);
4141 SDValue FalseVal1 = Op.getOperand(3);
4142 ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get();
4143
4144 const SDValue Op2 = isa<ConstantSDNode>(TrueVal1) ? FalseVal1 : TrueVal1;
4145 if (Op2.getOpcode() != ISD::SELECT_CC)
4146 return false;
4147
4148 SDValue LHS2 = Op2.getOperand(0);
4149 SDValue RHS2 = Op2.getOperand(1);
4150 SDValue TrueVal2 = Op2.getOperand(2);
4151 SDValue FalseVal2 = Op2.getOperand(3);
4152 ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get();
4153
4154 // Find out which are the constants and which are the variables
4155 // in each conditional
4156 SDValue *K1 = isa<ConstantSDNode>(LHS1) ? &LHS1 : isa<ConstantSDNode>(RHS1)
4157 ? &RHS1
4158 : nullptr;
4159 SDValue *K2 = isa<ConstantSDNode>(LHS2) ? &LHS2 : isa<ConstantSDNode>(RHS2)
4160 ? &RHS2
4161 : nullptr;
4162 SDValue K2Tmp = isa<ConstantSDNode>(TrueVal2) ? TrueVal2 : FalseVal2;
4163 SDValue V1Tmp = (K1 && *K1 == LHS1) ? RHS1 : LHS1;
4164 SDValue V2Tmp = (K2 && *K2 == LHS2) ? RHS2 : LHS2;
4165 SDValue V2 = (K2Tmp == TrueVal2) ? FalseVal2 : TrueVal2;
4166
4167 // We must detect cases where the original operations worked with 16- or
4168 // 8-bit values. In such case, V2Tmp != V2 because the comparison operations
4169 // must work with sign-extended values but the select operations return
4170 // the original non-extended value.
4171 SDValue V2TmpReg = V2Tmp;
4172 if (V2Tmp->getOpcode() == ISD::SIGN_EXTEND_INREG)
4173 V2TmpReg = V2Tmp->getOperand(0);
4174
4175 // Check that the registers and the constants have the correct values
4176 // in both conditionals
4177 if (!K1 || !K2 || *K1 == Op2 || *K2 != K2Tmp || V1Tmp != V2Tmp ||
4178 V2TmpReg != V2)
4179 return false;
4180
4181 // Figure out which conditional is saturating the lower/upper bound.
4182 const SDValue *LowerCheckOp =
4183 isLowerSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1)
4184 ? &Op
4185 : isLowerSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2)
4186 ? &Op2
4187 : nullptr;
4188 const SDValue *UpperCheckOp =
4189 isUpperSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1)
4190 ? &Op
4191 : isUpperSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2)
4192 ? &Op2
4193 : nullptr;
4194
4195 if (!UpperCheckOp || !LowerCheckOp || LowerCheckOp == UpperCheckOp)
4196 return false;
4197
4198 // Check that the constant in the lower-bound check is
4199 // the opposite of the constant in the upper-bound check
4200 // in 1's complement.
4201 int64_t Val1 = cast<ConstantSDNode>(*K1)->getSExtValue();
4202 int64_t Val2 = cast<ConstantSDNode>(*K2)->getSExtValue();
4203 int64_t PosVal = std::max(Val1, Val2);
4204
4205 if (((Val1 > Val2 && UpperCheckOp == &Op) ||
4206 (Val1 < Val2 && UpperCheckOp == &Op2)) &&
4207 Val1 == ~Val2 && isPowerOf2_64(PosVal + 1)) {
4208
4209 V = V2;
4210 K = (uint64_t)PosVal; // At this point, PosVal is guaranteed to be positive
4211 return true;
4212 }
4213
4214 return false;
4215}
4216
4217SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
4218 EVT VT = Op.getValueType();
4219 SDLoc dl(Op);
4220
4221 // Try to convert two saturating conditional selects into a single SSAT
4222 SDValue SatValue;
4223 uint64_t SatConstant;
4224 if (((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2()) &&
4225 isSaturatingConditional(Op, SatValue, SatConstant))
4226 return DAG.getNode(ARMISD::SSAT, dl, VT, SatValue,
4227 DAG.getConstant(countTrailingOnes(SatConstant), dl, VT));
4228
4229 SDValue LHS = Op.getOperand(0);
4230 SDValue RHS = Op.getOperand(1);
4231 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
4232 SDValue TrueVal = Op.getOperand(2);
4233 SDValue FalseVal = Op.getOperand(3);
4234
4235 if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
4236 DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
4237 dl);
4238
4239 // If softenSetCCOperands only returned one value, we should compare it to
4240 // zero.
4241 if (!RHS.getNode()) {
4242 RHS = DAG.getConstant(0, dl, LHS.getValueType());
4243 CC = ISD::SETNE;
4244 }
4245 }
4246
4247 if (LHS.getValueType() == MVT::i32) {
4248 // Try to generate VSEL on ARMv8.
4249 // The VSEL instruction can't use all the usual ARM condition
4250 // codes: it only has two bits to select the condition code, so it's
4251 // constrained to use only GE, GT, VS and EQ.
4252 //
4253 // To implement all the various ISD::SETXXX opcodes, we sometimes need to
4254 // swap the operands of the previous compare instruction (effectively
4255 // inverting the compare condition, swapping 'less' and 'greater') and
4256 // sometimes need to swap the operands to the VSEL (which inverts the
4257 // condition in the sense of firing whenever the previous condition didn't)
4258 if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
4259 TrueVal.getValueType() == MVT::f64)) {
4260 ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
4261 if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
4262 CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
4263 CC = ISD::getSetCCInverse(CC, true);
4264 std::swap(TrueVal, FalseVal);
4265 }
4266 }
4267
4268 SDValue ARMcc;
4269 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4270 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
4271 return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
4272 }
4273
4274 ARMCC::CondCodes CondCode, CondCode2;
4275 bool InvalidOnQNaN;
4276 FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN);
4277
4278 // Try to generate VMAXNM/VMINNM on ARMv8.
4279 if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
4280 TrueVal.getValueType() == MVT::f64)) {
4281 bool swpCmpOps = false;
4282 bool swpVselOps = false;
4283 checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
4284
4285 if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
4286 CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
4287 if (swpCmpOps)
4288 std::swap(LHS, RHS);
4289 if (swpVselOps)
4290 std::swap(TrueVal, FalseVal);
4291 }
4292 }
4293
4294 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4295 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
4296 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4297 SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
4298 if (CondCode2 != ARMCC::AL) {
4299 SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
4300 // FIXME: Needs another CMP because flag can have but one use.
4301 SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
4302 Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
4303 }
4304 return Result;
4305}
4306
4307/// canChangeToInt - Given the fp compare operand, return true if it is suitable
4308/// to morph to an integer compare sequence.
4309static bool canChangeToInt(SDValue Op, bool &SeenZero,
4310 const ARMSubtarget *Subtarget) {
4311 SDNode *N = Op.getNode();
4312 if (!N->hasOneUse())
4313 // Otherwise it requires moving the value from fp to integer registers.
4314 return false;
4315 if (!N->getNumValues())
4316 return false;
4317 EVT VT = Op.getValueType();
4318 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
4319 // f32 case is generally profitable. f64 case only makes sense when vcmpe +
4320 // vmrs are very slow, e.g. cortex-a8.
4321 return false;
4322
4323 if (isFloatingPointZero(Op)) {
4324 SeenZero = true;
4325 return true;
4326 }
4327 return ISD::isNormalLoad(N);
4328}
4329
4330static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
4331 if (isFloatingPointZero(Op))
4332 return DAG.getConstant(0, SDLoc(Op), MVT::i32);
4333
4334 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
4335 return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(),
4336 Ld->getPointerInfo(), Ld->getAlignment(),
4337 Ld->getMemOperand()->getFlags());
4338
4339 llvm_unreachable("Unknown VFP cmp argument!")::llvm::llvm_unreachable_internal("Unknown VFP cmp argument!"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 4339)
;
4340}
4341
4342static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
4343 SDValue &RetVal1, SDValue &RetVal2) {
4344 SDLoc dl(Op);
4345
4346 if (isFloatingPointZero(Op)) {
4347 RetVal1 = DAG.getConstant(0, dl, MVT::i32);
4348 RetVal2 = DAG.getConstant(0, dl, MVT::i32);
4349 return;
4350 }
4351
4352 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
4353 SDValue Ptr = Ld->getBasePtr();
4354 RetVal1 =
4355 DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
4356 Ld->getAlignment(), Ld->getMemOperand()->getFlags());
4357
4358 EVT PtrType = Ptr.getValueType();
4359 unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
4360 SDValue NewPtr = DAG.getNode(ISD::ADD, dl,
4361 PtrType, Ptr, DAG.getConstant(4, dl, PtrType));
4362 RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr,
4363 Ld->getPointerInfo().getWithOffset(4), NewAlign,
4364 Ld->getMemOperand()->getFlags());
4365 return;
4366 }
4367
4368 llvm_unreachable("Unknown VFP cmp argument!")::llvm::llvm_unreachable_internal("Unknown VFP cmp argument!"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 4368)
;
4369}
4370
4371/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
4372/// f32 and even f64 comparisons to integer ones.
4373SDValue
4374ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
4375 SDValue Chain = Op.getOperand(0);
4376 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
4377 SDValue LHS = Op.getOperand(2);
4378 SDValue RHS = Op.getOperand(3);
4379 SDValue Dest = Op.getOperand(4);
4380 SDLoc dl(Op);
4381
4382 bool LHSSeenZero = false;
4383 bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
4384 bool RHSSeenZero = false;
4385 bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
4386 if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
4387 // If unsafe fp math optimization is enabled and there are no other uses of
4388 // the CMP operands, and the condition code is EQ or NE, we can optimize it
4389 // to an integer comparison.
4390 if (CC == ISD::SETOEQ)
4391 CC = ISD::SETEQ;
4392 else if (CC == ISD::SETUNE)
4393 CC = ISD::SETNE;
4394
4395 SDValue Mask = DAG.getConstant(0x7fffffff, dl, MVT::i32);
4396 SDValue ARMcc;
4397 if (LHS.getValueType() == MVT::f32) {
4398 LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
4399 bitcastf32Toi32(LHS, DAG), Mask);
4400 RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
4401 bitcastf32Toi32(RHS, DAG), Mask);
4402 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
4403 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4404 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
4405 Chain, Dest, ARMcc, CCR, Cmp);
4406 }
4407
4408 SDValue LHS1, LHS2;
4409 SDValue RHS1, RHS2;
4410 expandf64Toi32(LHS, DAG, LHS1, LHS2);
4411 expandf64Toi32(RHS, DAG, RHS1, RHS2);
4412 LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
4413 RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
4414 ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
4415 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4416 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
4417 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
4418 return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops);
4419 }
4420
4421 return SDValue();
4422}
4423
4424SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
4425 SDValue Chain = Op.getOperand(0);
4426 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
4427 SDValue LHS = Op.getOperand(2);
4428 SDValue RHS = Op.getOperand(3);
4429 SDValue Dest = Op.getOperand(4);
4430 SDLoc dl(Op);
4431
4432 if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
4433 DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
4434 dl);
4435
4436 // If softenSetCCOperands only returned one value, we should compare it to
4437 // zero.
4438 if (!RHS.getNode()) {
4439 RHS = DAG.getConstant(0, dl, LHS.getValueType());
4440 CC = ISD::SETNE;
4441 }
4442 }
4443
4444 if (LHS.getValueType() == MVT::i32) {
4445 SDValue ARMcc;
4446 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
4447 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4448 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
4449 Chain, Dest, ARMcc, CCR, Cmp);
4450 }
4451
4452 assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64)((LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT
::f64) ? static_cast<void> (0) : __assert_fail ("LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 4452, __PRETTY_FUNCTION__))
;
4453
4454 if (getTargetMachine().Options.UnsafeFPMath &&
4455 (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
4456 CC == ISD::SETNE || CC == ISD::SETUNE)) {
4457 if (SDValue Result = OptimizeVFPBrcond(Op, DAG))
4458 return Result;
4459 }
4460
4461 ARMCC::CondCodes CondCode, CondCode2;
4462 bool InvalidOnQNaN;
4463 FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN);
4464
4465 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4466 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
4467 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4468 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
4469 SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
4470 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
4471 if (CondCode2 != ARMCC::AL) {
4472 ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
4473 SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
4474 Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
4475 }
4476 return Res;
4477}
4478
4479SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
4480 SDValue Chain = Op.getOperand(0);
4481 SDValue Table = Op.getOperand(1);
4482 SDValue Index = Op.getOperand(2);
4483 SDLoc dl(Op);
4484
4485 EVT PTy = getPointerTy(DAG.getDataLayout());
4486 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
4487 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
4488 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);
4489 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy));
4490 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
4491 if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) {
4492 // Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table
4493 // which does another jump to the destination. This also makes it easier
4494 // to translate it to TBB / TBH later (Thumb2 only).
4495 // FIXME: This might not work if the function is extremely large.
4496 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
4497 Addr, Op.getOperand(2), JTI);
4498 }
4499 if (isPositionIndependent() || Subtarget->isROPI()) {
4500 Addr =
4501 DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
4502 MachinePointerInfo::getJumpTable(DAG.getMachineFunction()));
4503 Chain = Addr.getValue(1);
4504 Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
4505 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
4506 } else {
4507 Addr =
4508 DAG.getLoad(PTy, dl, Chain, Addr,
4509 MachinePointerInfo::getJumpTable(DAG.getMachineFunction()));
4510 Chain = Addr.getValue(1);
4511 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
4512 }
4513}
4514
4515static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
4516 EVT VT = Op.getValueType();
4517 SDLoc dl(Op);
4518
4519 if (Op.getValueType().getVectorElementType() == MVT::i32) {
4520 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
4521 return Op;
4522 return DAG.UnrollVectorOp(Op.getNode());
4523 }
4524
4525 assert(Op.getOperand(0).getValueType() == MVT::v4f32 &&((Op.getOperand(0).getValueType() == MVT::v4f32 && "Invalid type for custom lowering!"
) ? static_cast<void> (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::v4f32 && \"Invalid type for custom lowering!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 4526, __PRETTY_FUNCTION__))
4526 "Invalid type for custom lowering!")((Op.getOperand(0).getValueType() == MVT::v4f32 && "Invalid type for custom lowering!"
) ? static_cast<void> (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::v4f32 && \"Invalid type for custom lowering!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 4526, __PRETTY_FUNCTION__))
;
4527 if (VT != MVT::v4i16)
4528 return DAG.UnrollVectorOp(Op.getNode());
4529
4530 Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0));
4531 return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
4532}
4533
4534SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
4535 EVT VT = Op.getValueType();
4536 if (VT.isVector())
4537 return LowerVectorFP_TO_INT(Op, DAG);
4538 if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) {
4539 RTLIB::Libcall LC;
4540 if (Op.getOpcode() == ISD::FP_TO_SINT)
4541 LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(),
4542 Op.getValueType());
4543 else
4544 LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(),
4545 Op.getValueType());
4546 return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
4547 /*isSigned*/ false, SDLoc(Op)).first;
4548 }
4549
4550 return Op;
4551}
4552
4553static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
4554 EVT VT = Op.getValueType();
4555 SDLoc dl(Op);
4556
4557 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
4558 if (VT.getVectorElementType() == MVT::f32)
4559 return Op;
4560 return DAG.UnrollVectorOp(Op.getNode());
4561 }
4562
4563 assert(Op.getOperand(0).getValueType() == MVT::v4i16 &&((Op.getOperand(0).getValueType() == MVT::v4i16 && "Invalid type for custom lowering!"
) ? static_cast<void> (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::v4i16 && \"Invalid type for custom lowering!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 4564, __PRETTY_FUNCTION__))
4564 "Invalid type for custom lowering!")((Op.getOperand(0).getValueType() == MVT::v4i16 && "Invalid type for custom lowering!"
) ? static_cast<void> (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::v4i16 && \"Invalid type for custom lowering!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 4564, __PRETTY_FUNCTION__))
;
4565 if (VT != MVT::v4f32)
4566 return DAG.UnrollVectorOp(Op.getNode());
4567
4568 unsigned CastOpc;
4569 unsigned Opc;
4570 switch (Op.getOpcode()) {
4571 default: llvm_unreachable("Invalid opcode!")::llvm::llvm_unreachable_internal("Invalid opcode!", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 4571)
;
4572 case ISD::SINT_TO_FP:
4573 CastOpc = ISD::SIGN_EXTEND;
4574 Opc = ISD::SINT_TO_FP;
4575 break;
4576 case ISD::UINT_TO_FP:
4577 CastOpc = ISD::ZERO_EXTEND;
4578 Opc = ISD::UINT_TO_FP;
4579 break;
4580 }
4581
4582 Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0));
4583 return DAG.getNode(Opc, dl, VT, Op);
4584}
4585
4586SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
4587 EVT VT = Op.getValueType();
4588 if (VT.isVector())
4589 return LowerVectorINT_TO_FP(Op, DAG);
4590 if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) {
4591 RTLIB::Libcall LC;
4592 if (Op.getOpcode() == ISD::SINT_TO_FP)
4593 LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),
4594 Op.getValueType());
4595 else
4596 LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(),
4597 Op.getValueType());
4598 return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
4599 /*isSigned*/ false, SDLoc(Op)).first;
4600 }
4601
4602 return Op;
4603}
4604
4605SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
4606 // Implement fcopysign with a fabs and a conditional fneg.
4607 SDValue Tmp0 = Op.getOperand(0);
4608 SDValue Tmp1 = Op.getOperand(1);
4609 SDLoc dl(Op);
4610 EVT VT = Op.getValueType();
4611 EVT SrcVT = Tmp1.getValueType();
4612 bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
4613 Tmp0.getOpcode() == ARMISD::VMOVDRR;
4614 bool UseNEON = !InGPR && Subtarget->hasNEON();
4615
4616 if (UseNEON) {
4617 // Use VBSL to copy the sign bit.
4618 unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80);
4619 SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
4620 DAG.getTargetConstant(EncodedVal, dl, MVT::i32));
4621 EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
4622 if (VT == MVT::f64)
4623 Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,
4624 DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
4625 DAG.getConstant(32, dl, MVT::i32));
4626 else /*if (VT == MVT::f32)*/
4627 Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
4628 if (SrcVT == MVT::f32) {
4629 Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
4630 if (VT == MVT::f64)
4631 Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
4632 DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
4633 DAG.getConstant(32, dl, MVT::i32));
4634 } else if (VT == MVT::f32)
4635 Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64,
4636 DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
4637 DAG.getConstant(32, dl, MVT::i32));
4638 Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
4639 Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
4640
4641 SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff),
4642 dl, MVT::i32);
4643 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
4644 SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
4645 DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
4646
4647 SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
4648 DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
4649 DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
4650 if (VT == MVT::f32) {
4651 Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
4652 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
4653 DAG.getConstant(0, dl, MVT::i32));
4654 } else {
4655 Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
4656 }
4657
4658 return Res;
4659 }
4660
4661 // Bitcast operand 1 to i32.
4662 if (SrcVT == MVT::f64)
4663 Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
4664 Tmp1).getValue(1);
4665 Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
4666
4667 // Or in the signbit with integer operations.
4668 SDValue Mask1 = DAG.getConstant(0x80000000, dl, MVT::i32);
4669 SDValue Mask2 = DAG.getConstant(0x7fffffff, dl, MVT::i32);
4670 Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
4671 if (VT == MVT::f32) {
4672 Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
4673 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
4674 return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
4675 DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
4676 }
4677
4678 // f64: Or the high part with signbit and then combine two parts.
4679 Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
4680 Tmp0);
4681 SDValue Lo = Tmp0.getValue(0);
4682 SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
4683 Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
4684 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
4685}
4686
4687SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
4688 MachineFunction &MF = DAG.getMachineFunction();
4689 MachineFrameInfo &MFI = MF.getFrameInfo();
4690 MFI.setReturnAddressIsTaken(true);
4691
4692 if (verifyReturnAddressArgumentIsConstant(Op, DAG))
4693 return SDValue();
4694
4695 EVT VT = Op.getValueType();
4696 SDLoc dl(Op);
4697 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4698 if (Depth) {
4699 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
4700 SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
4701 return DAG.getLoad(VT, dl, DAG.getEntryNode(),
4702 DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
4703 MachinePointerInfo());
4704 }
4705
4706 // Return LR, which contains the return address. Mark it an implicit live-in.
4707 unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
4708 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
4709}
4710
4711SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
4712 const ARMBaseRegisterInfo &ARI =
4713 *static_cast<const ARMBaseRegisterInfo*>(RegInfo);
4714 MachineFunction &MF = DAG.getMachineFunction();
4715 MachineFrameInfo &MFI = MF.getFrameInfo();
4716 MFI.setFrameAddressIsTaken(true);
4717
4718 EVT VT = Op.getValueType();
4719 SDLoc dl(Op); // FIXME probably not meaningful
4720 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4721 unsigned FrameReg = ARI.getFrameRegister(MF);
4722 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
4723 while (Depth--)
4724 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
4725 MachinePointerInfo());
4726 return FrameAddr;
4727}
4728
4729// FIXME? Maybe this could be a TableGen attribute on some registers and
4730// this table could be generated automatically from RegInfo.
4731unsigned ARMTargetLowering::getRegisterByName(const char* RegName, EVT VT,
4732 SelectionDAG &DAG) const {
4733 unsigned Reg = StringSwitch<unsigned>(RegName)
4734 .Case("sp", ARM::SP)
4735 .Default(0);
4736 if (Reg)
4737 return Reg;
4738 report_fatal_error(Twine("Invalid register name \""
4739 + StringRef(RegName) + "\"."));
4740}
4741
4742// Result is 64 bit value so split into two 32 bit values and return as a
4743// pair of values.
4744static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl<SDValue> &Results,
4745 SelectionDAG &DAG) {
4746 SDLoc DL(N);
4747
4748 // This function is only supposed to be called for i64 type destination.
4749 assert(N->getValueType(0) == MVT::i64((N->getValueType(0) == MVT::i64 && "ExpandREAD_REGISTER called for non-i64 type result."
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::i64 && \"ExpandREAD_REGISTER called for non-i64 type result.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 4750, __PRETTY_FUNCTION__))
4750 && "ExpandREAD_REGISTER called for non-i64 type result.")((N->getValueType(0) == MVT::i64 && "ExpandREAD_REGISTER called for non-i64 type result."
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::i64 && \"ExpandREAD_REGISTER called for non-i64 type result.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 4750, __PRETTY_FUNCTION__))
;
4751
4752 SDValue Read = DAG.getNode(ISD::READ_REGISTER, DL,
4753 DAG.getVTList(MVT::i32, MVT::i32, MVT::Other),
4754 N->getOperand(0),
4755 N->getOperand(1));
4756
4757 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Read.getValue(0),
4758 Read.getValue(1)));
4759 Results.push_back(Read.getOperand(0));
4760}
4761
4762/// \p BC is a bitcast that is about to be turned into a VMOVDRR.
4763/// When \p DstVT, the destination type of \p BC, is on the vector
4764/// register bank and the source of bitcast, \p Op, operates on the same bank,
4765/// it might be possible to combine them, such that everything stays on the
4766/// vector register bank.
4767/// \p return The node that would replace \p BT, if the combine
4768/// is possible.
4769static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC,
4770 SelectionDAG &DAG) {
4771 SDValue Op = BC->getOperand(0);
4772 EVT DstVT = BC->getValueType(0);
4773
4774 // The only vector instruction that can produce a scalar (remember,
4775 // since the bitcast was about to be turned into VMOVDRR, the source
4776 // type is i64) from a vector is EXTRACT_VECTOR_ELT.
4777 // Moreover, we can do this combine only if there is one use.
4778 // Finally, if the destination type is not a vector, there is not
4779 // much point on forcing everything on the vector bank.
4780 if (!DstVT.isVector() || Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
4781 !Op.hasOneUse())
4782 return SDValue();
4783
4784 // If the index is not constant, we will introduce an additional
4785 // multiply that will stick.
4786 // Give up in that case.
4787 ConstantSDNode *Index = dyn_cast<ConstantSDNode>(Op.getOperand(1));
4788 if (!Index)
4789 return SDValue();
4790 unsigned DstNumElt = DstVT.getVectorNumElements();
4791
4792 // Compute the new index.
4793 const APInt &APIntIndex = Index->getAPIntValue();
4794 APInt NewIndex(APIntIndex.getBitWidth(), DstNumElt);
4795 NewIndex *= APIntIndex;
4796 // Check if the new constant index fits into i32.
4797 if (NewIndex.getBitWidth() > 32)
4798 return SDValue();
4799
4800 // vMTy bitcast(i64 extractelt vNi64 src, i32 index) ->
4801 // vMTy extractsubvector vNxMTy (bitcast vNi64 src), i32 index*M)
4802 SDLoc dl(Op);
4803 SDValue ExtractSrc = Op.getOperand(0);
4804 EVT VecVT = EVT::getVectorVT(
4805 *DAG.getContext(), DstVT.getScalarType(),
4806 ExtractSrc.getValueType().getVectorNumElements() * DstNumElt);
4807 SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtractSrc);
4808 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, BitCast,
4809 DAG.getConstant(NewIndex.getZExtValue(), dl, MVT::i32));
4810}
4811
4812/// ExpandBITCAST - If the target supports VFP, this function is called to
4813/// expand a bit convert where either the source or destination type is i64 to
4814/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
4815/// operand type is illegal (e.g., v2f32 for a target that doesn't support
4816/// vectors), since the legalizer won't know what to do with that.
4817static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
4818 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4819 SDLoc dl(N);
4820 SDValue Op = N->getOperand(0);
4821
4822 // This function is only supposed to be called for i64 types, either as the
4823 // source or destination of the bit convert.
4824 EVT SrcVT = Op.getValueType();
4825 EVT DstVT = N->getValueType(0);
4826 assert((SrcVT == MVT::i64 || DstVT == MVT::i64) &&(((SrcVT == MVT::i64 || DstVT == MVT::i64) && "ExpandBITCAST called for non-i64 type"
) ? static_cast<void> (0) : __assert_fail ("(SrcVT == MVT::i64 || DstVT == MVT::i64) && \"ExpandBITCAST called for non-i64 type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 4827, __PRETTY_FUNCTION__))
4827 "ExpandBITCAST called for non-i64 type")(((SrcVT == MVT::i64 || DstVT == MVT::i64) && "ExpandBITCAST called for non-i64 type"
) ? static_cast<void> (0) : __assert_fail ("(SrcVT == MVT::i64 || DstVT == MVT::i64) && \"ExpandBITCAST called for non-i64 type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 4827, __PRETTY_FUNCTION__))
;
4828
4829 // Turn i64->f64 into VMOVDRR.
4830 if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
4831 // Do not force values to GPRs (this is what VMOVDRR does for the inputs)
4832 // if we can combine the bitcast with its source.
4833 if (SDValue Val = CombineVMOVDRRCandidateWithVecOp(N, DAG))
4834 return Val;
4835
4836 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
4837 DAG.getConstant(0, dl, MVT::i32));
4838 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
4839 DAG.getConstant(1, dl, MVT::i32));
4840 return DAG.getNode(ISD::BITCAST, dl, DstVT,
4841 DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
4842 }
4843
4844 // Turn f64->i64 into VMOVRRD.
4845 if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
4846 SDValue Cvt;
4847 if (DAG.getDataLayout().isBigEndian() && SrcVT.isVector() &&
4848 SrcVT.getVectorNumElements() > 1)
4849 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
4850 DAG.getVTList(MVT::i32, MVT::i32),
4851 DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op));
4852 else
4853 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
4854 DAG.getVTList(MVT::i32, MVT::i32), Op);
4855 // Merge the pieces into a single i64 value.
4856 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
4857 }
4858
4859 return SDValue();
4860}
4861
4862/// getZeroVector - Returns a vector of specified type with all zero elements.
4863/// Zero vectors are used to represent vector negation and in those cases
4864/// will be implemented with the NEON VNEG instruction. However, VNEG does
4865/// not support i64 elements, so sometimes the zero vectors will need to be
4866/// explicitly constructed. Regardless, use a canonical VMOV to create the
4867/// zero vector.
4868static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
4869 assert(VT.isVector() && "Expected a vector type")((VT.isVector() && "Expected a vector type") ? static_cast
<void> (0) : __assert_fail ("VT.isVector() && \"Expected a vector type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 4869, __PRETTY_FUNCTION__))
;
4870 // The canonical modified immediate encoding of a zero vector is....0!
4871 SDValue EncodedVal = DAG.getTargetConstant(0, dl, MVT::i32);
4872 EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
4873 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
4874 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
4875}
4876
4877/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
4878/// i32 values and take a 2 x i32 value to shift plus a shift amount.
4879SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
4880 SelectionDAG &DAG) const {
4881 assert(Op.getNumOperands() == 3 && "Not a double-shift!")((Op.getNumOperands() == 3 && "Not a double-shift!") ?
static_cast<void> (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 4881, __PRETTY_FUNCTION__))
;
4882 EVT VT = Op.getValueType();
4883 unsigned VTBits = VT.getSizeInBits();
4884 SDLoc dl(Op);
4885 SDValue ShOpLo = Op.getOperand(0);
4886 SDValue ShOpHi = Op.getOperand(1);
4887 SDValue ShAmt = Op.getOperand(2);
4888 SDValue ARMcc;
4889 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4890 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
4891
4892 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS)((Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::
SRL_PARTS) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 4892, __PRETTY_FUNCTION__))
;
4893
4894 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
4895 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
4896 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
4897 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
4898 DAG.getConstant(VTBits, dl, MVT::i32));
4899 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
4900 SDValue LoSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
4901 SDValue LoBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
4902 SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
4903 ISD::SETGE, ARMcc, DAG, dl);
4904 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift,
4905 ARMcc, CCR, CmpLo);
4906
4907
4908 SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
4909 SDValue HiBigShift = Opc == ISD::SRA
4910 ? DAG.getNode(Opc, dl, VT, ShOpHi,
4911 DAG.getConstant(VTBits - 1, dl, VT))
4912 : DAG.getConstant(0, dl, VT);
4913 SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
4914 ISD::SETGE, ARMcc, DAG, dl);
4915 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
4916 ARMcc, CCR, CmpHi);
4917
4918 SDValue Ops[2] = { Lo, Hi };
4919 return DAG.getMergeValues(Ops, dl);
4920}
4921
4922/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
4923/// i32 values and take a 2 x i32 value to shift plus a shift amount.
4924SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
4925 SelectionDAG &DAG) const {
4926 assert(Op.getNumOperands() == 3 && "Not a double-shift!")((Op.getNumOperands() == 3 && "Not a double-shift!") ?
static_cast<void> (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 4926, __PRETTY_FUNCTION__))
;
4927 EVT VT = Op.getValueType();
4928 unsigned VTBits = VT.getSizeInBits();
4929 SDLoc dl(Op);
4930 SDValue ShOpLo = Op.getOperand(0);
4931 SDValue ShOpHi = Op.getOperand(1);
4932 SDValue ShAmt = Op.getOperand(2);
4933 SDValue ARMcc;
4934 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4935
4936 assert(Op.getOpcode() == ISD::SHL_PARTS)((Op.getOpcode() == ISD::SHL_PARTS) ? static_cast<void>
(0) : __assert_fail ("Op.getOpcode() == ISD::SHL_PARTS", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 4936, __PRETTY_FUNCTION__))
;
4937 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
4938 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
4939 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
4940 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
4941 SDValue HiSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
4942
4943 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
4944 DAG.getConstant(VTBits, dl, MVT::i32));
4945 SDValue HiBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
4946 SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
4947 ISD::SETGE, ARMcc, DAG, dl);
4948 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
4949 ARMcc, CCR, CmpHi);
4950
4951 SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
4952 ISD::SETGE, ARMcc, DAG, dl);
4953 SDValue LoSmallShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
4954 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift,
4955 DAG.getConstant(0, dl, VT), ARMcc, CCR, CmpLo);
4956
4957 SDValue Ops[2] = { Lo, Hi };
4958 return DAG.getMergeValues(Ops, dl);
4959}
4960
4961SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
4962 SelectionDAG &DAG) const {
4963 // The rounding mode is in bits 23:22 of the FPSCR.
4964 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
4965 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
4966 // so that the shift + and get folded into a bitfield extract.
4967 SDLoc dl(Op);
4968 SDValue Ops[] = { DAG.getEntryNode(),
4969 DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32) };
4970
4971 SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, MVT::i32, Ops);
4972 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
4973 DAG.getConstant(1U << 22, dl, MVT::i32));
4974 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
4975 DAG.getConstant(22, dl, MVT::i32));
4976 return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
4977 DAG.getConstant(3, dl, MVT::i32));
4978}
4979
4980static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
4981 const ARMSubtarget *ST) {
4982 SDLoc dl(N);
4983 EVT VT = N->getValueType(0);
4984 if (VT.isVector()) {
4985 assert(ST->hasNEON())((ST->hasNEON()) ? static_cast<void> (0) : __assert_fail
("ST->hasNEON()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 4985, __PRETTY_FUNCTION__))
;
4986
4987 // Compute the least significant set bit: LSB = X & -X
4988 SDValue X = N->getOperand(0);
4989 SDValue NX = DAG.getNode(ISD::SUB, dl, VT, getZeroVector(VT, DAG, dl), X);
4990 SDValue LSB = DAG.getNode(ISD::AND, dl, VT, X, NX);
4991
4992 EVT ElemTy = VT.getVectorElementType();
4993
4994 if (ElemTy == MVT::i8) {
4995 // Compute with: cttz(x) = ctpop(lsb - 1)
4996 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
4997 DAG.getTargetConstant(1, dl, ElemTy));
4998 SDValue Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
4999 return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
5000 }
5001
5002 if ((ElemTy == MVT::i16 || ElemTy == MVT::i32) &&
5003 (N->getOpcode() == ISD::CTTZ_ZERO_UNDEF)) {
5004 // Compute with: cttz(x) = (width - 1) - ctlz(lsb), if x != 0
5005 unsigned NumBits = ElemTy.getSizeInBits();
5006 SDValue WidthMinus1 =
5007 DAG.getNode(ARMISD::VMOVIMM, dl, VT,
5008 DAG.getTargetConstant(NumBits - 1, dl, ElemTy));
5009 SDValue CTLZ = DAG.getNode(ISD::CTLZ, dl, VT, LSB);
5010 return DAG.getNode(ISD::SUB, dl, VT, WidthMinus1, CTLZ);
5011 }
5012
5013 // Compute with: cttz(x) = ctpop(lsb - 1)
5014
5015 // Since we can only compute the number of bits in a byte with vcnt.8, we
5016 // have to gather the result with pairwise addition (vpaddl) for i16, i32,
5017 // and i64.
5018
5019 // Compute LSB - 1.
5020 SDValue Bits;
5021 if (ElemTy == MVT::i64) {
5022 // Load constant 0xffff'ffff'ffff'ffff to register.
5023 SDValue FF = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
5024 DAG.getTargetConstant(0x1eff, dl, MVT::i32));
5025 Bits = DAG.getNode(ISD::ADD, dl, VT, LSB, FF);
5026 } else {
5027 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
5028 DAG.getTargetConstant(1, dl, ElemTy));
5029 Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
5030 }
5031
5032 // Count #bits with vcnt.8.
5033 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
5034 SDValue BitsVT8 = DAG.getNode(ISD::BITCAST, dl, VT8Bit, Bits);
5035 SDValue Cnt8 = DAG.getNode(ISD::CTPOP, dl, VT8Bit, BitsVT8);
5036
5037 // Gather the #bits with vpaddl (pairwise add.)
5038 EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
5039 SDValue Cnt16 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT16Bit,
5040 DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
5041 Cnt8);
5042 if (ElemTy == MVT::i16)
5043 return Cnt16;
5044
5045 EVT VT32Bit = VT.is64BitVector() ? MVT::v2i32 : MVT::v4i32;
5046 SDValue Cnt32 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT32Bit,
5047 DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
5048 Cnt16);
5049 if (ElemTy == MVT::i32)
5050 return Cnt32;
5051
5052 assert(ElemTy == MVT::i64)((ElemTy == MVT::i64) ? static_cast<void> (0) : __assert_fail
("ElemTy == MVT::i64", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 5052, __PRETTY_FUNCTION__))
;
5053 SDValue Cnt64 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
5054 DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
5055 Cnt32);
5056 return Cnt64;
5057 }
5058
5059 if (!ST->hasV6T2Ops())
5060 return SDValue();
5061
5062 SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, VT, N->getOperand(0));
5063 return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
5064}
5065
5066/// getCTPOP16BitCounts - Returns a v8i8/v16i8 vector containing the bit-count
5067/// for each 16-bit element from operand, repeated. The basic idea is to
5068/// leverage vcnt to get the 8-bit counts, gather and add the results.
5069///
5070/// Trace for v4i16:
5071/// input = [v0 v1 v2 v3 ] (vi 16-bit element)
5072/// cast: N0 = [w0 w1 w2 w3 w4 w5 w6 w7] (v0 = [w0 w1], wi 8-bit element)
5073/// vcnt: N1 = [b0 b1 b2 b3 b4 b5 b6 b7] (bi = bit-count of 8-bit element wi)
5074/// vrev: N2 = [b1 b0 b3 b2 b5 b4 b7 b6]
5075/// [b0 b1 b2 b3 b4 b5 b6 b7]
5076/// +[b1 b0 b3 b2 b5 b4 b7 b6]
5077/// N3=N1+N2 = [k0 k0 k1 k1 k2 k2 k3 k3] (k0 = b0+b1 = bit-count of 16-bit v0,
5078/// vuzp: = [k0 k1 k2 k3 k0 k1 k2 k3] each ki is 8-bits)
5079static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) {
5080 EVT VT = N->getValueType(0);
5081 SDLoc DL(N);
5082
5083 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
5084 SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0));
5085 SDValue N1 = DAG.getNode(ISD::CTPOP, DL, VT8Bit, N0);
5086 SDValue N2 = DAG.getNode(ARMISD::VREV16, DL, VT8Bit, N1);
5087 SDValue N3 = DAG.getNode(ISD::ADD, DL, VT8Bit, N1, N2);
5088 return DAG.getNode(ARMISD::VUZP, DL, VT8Bit, N3, N3);
5089}
5090
5091/// lowerCTPOP16BitElements - Returns a v4i16/v8i16 vector containing the
5092/// bit-count for each 16-bit element from the operand. We need slightly
5093/// different sequencing for v4i16 and v8i16 to stay within NEON's available
5094/// 64/128-bit registers.
5095///
5096/// Trace for v4i16:
5097/// input = [v0 v1 v2 v3 ] (vi 16-bit element)
5098/// v8i8: BitCounts = [k0 k1 k2 k3 k0 k1 k2 k3 ] (ki is the bit-count of vi)
5099/// v8i16:Extended = [k0 k1 k2 k3 k0 k1 k2 k3 ]
5100/// v4i16:Extracted = [k0 k1 k2 k3 ]
5101static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) {
5102 EVT VT = N->getValueType(0);
5103 SDLoc DL(N);
5104
5105 SDValue BitCounts = getCTPOP16BitCounts(N, DAG);
5106 if (VT.is64BitVector()) {
5107 SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, BitCounts);
5108 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Extended,
5109 DAG.getIntPtrConstant(0, DL));
5110 } else {
5111 SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8,
5112 BitCounts, DAG.getIntPtrConstant(0, DL));
5113 return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, Extracted);
5114 }
5115}
5116
5117/// lowerCTPOP32BitElements - Returns a v2i32/v4i32 vector containing the
5118/// bit-count for each 32-bit element from the operand. The idea here is
5119/// to split the vector into 16-bit elements, leverage the 16-bit count
5120/// routine, and then combine the results.
5121///
5122/// Trace for v2i32 (v4i32 similar with Extracted/Extended exchanged):
5123/// input = [v0 v1 ] (vi: 32-bit elements)
5124/// Bitcast = [w0 w1 w2 w3 ] (wi: 16-bit elements, v0 = [w0 w1])
5125/// Counts16 = [k0 k1 k2 k3 ] (ki: 16-bit elements, bit-count of wi)
5126/// vrev: N0 = [k1 k0 k3 k2 ]
5127/// [k0 k1 k2 k3 ]
5128/// N1 =+[k1 k0 k3 k2 ]
5129/// [k0 k2 k1 k3 ]
5130/// N2 =+[k1 k3 k0 k2 ]
5131/// [k0 k2 k1 k3 ]
5132/// Extended =+[k1 k3 k0 k2 ]
5133/// [k0 k2 ]
5134/// Extracted=+[k1 k3 ]
5135///
5136static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) {
5137 EVT VT = N->getValueType(0);
5138 SDLoc DL(N);
5139
5140 EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
5141
5142 SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT16Bit, N->getOperand(0));
5143 SDValue Counts16 = lowerCTPOP16BitElements(Bitcast.getNode(), DAG);
5144 SDValue N0 = DAG.getNode(ARMISD::VREV32, DL, VT16Bit, Counts16);
5145 SDValue N1 = DAG.getNode(ISD::ADD, DL, VT16Bit, Counts16, N0);
5146 SDValue N2 = DAG.getNode(ARMISD::VUZP, DL, VT16Bit, N1, N1);
5147
5148 if (VT.is64BitVector()) {
5149 SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, N2);
5150 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Extended,
5151 DAG.getIntPtrConstant(0, DL));
5152 } else {
5153 SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, N2,
5154 DAG.getIntPtrConstant(0, DL));
5155 return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, Extracted);
5156 }
5157}
5158
5159static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG,
5160 const ARMSubtarget *ST) {
5161 EVT VT = N->getValueType(0);
5162
5163 assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.")((ST->hasNEON() && "Custom ctpop lowering requires NEON."
) ? static_cast<void> (0) : __assert_fail ("ST->hasNEON() && \"Custom ctpop lowering requires NEON.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 5163, __PRETTY_FUNCTION__))
;
5164 assert((VT == MVT::v2i32 || VT == MVT::v4i32 ||(((VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 ||
VT == MVT::v8i16) && "Unexpected type for custom ctpop lowering"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 5166, __PRETTY_FUNCTION__))
5165 VT == MVT::v4i16 || VT == MVT::v8i16) &&(((VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 ||
VT == MVT::v8i16) && "Unexpected type for custom ctpop lowering"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 5166, __PRETTY_FUNCTION__))
5166 "Unexpected type for custom ctpop lowering")(((VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 ||
VT == MVT::v8i16) && "Unexpected type for custom ctpop lowering"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 5166, __PRETTY_FUNCTION__))
;
5167
5168 if (VT.getVectorElementType() == MVT::i32)
5169 return lowerCTPOP32BitElements(N, DAG);
5170 else
5171 return lowerCTPOP16BitElements(N, DAG);
5172}
5173
5174static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
5175 const ARMSubtarget *ST) {
5176 EVT VT = N->getValueType(0);
5177 SDLoc dl(N);
5178
5179 if (!VT.isVector())
5180 return SDValue();
5181
5182 // Lower vector shifts on NEON to use VSHL.
5183 assert(ST->hasNEON() && "unexpected vector shift")((ST->hasNEON() && "unexpected vector shift") ? static_cast
<void> (0) : __assert_fail ("ST->hasNEON() && \"unexpected vector shift\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 5183, __PRETTY_FUNCTION__))
;
5184
5185 // Left shifts translate directly to the vshiftu intrinsic.
5186 if (N->getOpcode() == ISD::SHL)
5187 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
5188 DAG.getConstant(Intrinsic::arm_neon_vshiftu, dl,
5189 MVT::i32),
5190 N->getOperand(0), N->getOperand(1));
5191
5192 assert((N->getOpcode() == ISD::SRA ||(((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::
SRL) && "unexpected vector shift opcode") ? static_cast
<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && \"unexpected vector shift opcode\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 5193, __PRETTY_FUNCTION__))
5193 N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode")(((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::
SRL) && "unexpected vector shift opcode") ? static_cast
<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && \"unexpected vector shift opcode\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 5193, __PRETTY_FUNCTION__))
;
5194
5195 // NEON uses the same intrinsics for both left and right shifts. For
5196 // right shifts, the shift amounts are negative, so negate the vector of
5197 // shift amounts.
5198 EVT ShiftVT = N->getOperand(1).getValueType();
5199 SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
5200 getZeroVector(ShiftVT, DAG, dl),
5201 N->getOperand(1));
5202 Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
5203 Intrinsic::arm_neon_vshifts :
5204 Intrinsic::arm_neon_vshiftu);
5205 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
5206 DAG.getConstant(vshiftInt, dl, MVT::i32),
5207 N->getOperand(0), NegatedCount);
5208}
5209
5210static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
5211 const ARMSubtarget *ST) {
5212 EVT VT = N->getValueType(0);
5213 SDLoc dl(N);
5214
5215 // We can get here for a node like i32 = ISD::SHL i32, i64
5216 if (VT != MVT::i64)
5217 return SDValue();
5218
5219 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&(((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::
SRA) && "Unknown shift to lower!") ? static_cast<void
> (0) : __assert_fail ("(N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && \"Unknown shift to lower!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 5220, __PRETTY_FUNCTION__))
5220 "Unknown shift to lower!")(((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::
SRA) && "Unknown shift to lower!") ? static_cast<void
> (0) : __assert_fail ("(N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && \"Unknown shift to lower!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 5220, __PRETTY_FUNCTION__))
;
5221
5222 // We only lower SRA, SRL of 1 here, all others use generic lowering.
5223 if (!isOneConstant(N->getOperand(1)))
5224 return SDValue();
5225
5226 // If we are in thumb mode, we don't have RRX.
5227 if (ST->isThumb1Only()) return SDValue();
5228
5229 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
5230 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
5231 DAG.getConstant(0, dl, MVT::i32));
5232 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
5233 DAG.getConstant(1, dl, MVT::i32));
5234
5235 // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
5236 // captures the result into a carry flag.
5237 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
5238 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi);
5239
5240 // The low part is an ARMISD::RRX operand, which shifts the carry in.
5241 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
5242
5243 // Merge the pieces into a single i64 value.
5244 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
5245}
5246
5247static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
5248 SDValue TmpOp0, TmpOp1;
5249 bool Invert = false;
5250 bool Swap = false;
5251 unsigned Opc = 0;
5252
5253 SDValue Op0 = Op.getOperand(0);
5254 SDValue Op1 = Op.getOperand(1);
5255 SDValue CC = Op.getOperand(2);
5256 EVT CmpVT = Op0.getValueType().changeVectorElementTypeToInteger();
5257 EVT VT = Op.getValueType();
5258 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
5259 SDLoc dl(Op);
5260
5261 if (Op0.getValueType().getVectorElementType() == MVT::i64 &&
5262 (SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE)) {
5263 // Special-case integer 64-bit equality comparisons. They aren't legal,
5264 // but they can be lowered with a few vector instructions.
5265 unsigned CmpElements = CmpVT.getVectorNumElements() * 2;
5266 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, CmpElements);
5267 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op0);
5268 SDValue CastOp1 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op1);
5269 SDValue Cmp = DAG.getNode(ISD::SETCC, dl, SplitVT, CastOp0, CastOp1,
5270 DAG.getCondCode(ISD::SETEQ));
5271 SDValue Reversed = DAG.getNode(ARMISD::VREV64, dl, SplitVT, Cmp);
5272 SDValue Merged = DAG.getNode(ISD::AND, dl, SplitVT, Cmp, Reversed);
5273 Merged = DAG.getNode(ISD::BITCAST, dl, CmpVT, Merged);
5274 if (SetCCOpcode == ISD::SETNE)
5275 Merged = DAG.getNOT(dl, Merged, CmpVT);
5276 Merged = DAG.getSExtOrTrunc(Merged, dl, VT);
5277 return Merged;
5278 }
5279
5280 if (CmpVT.getVectorElementType() == MVT::i64)
5281 // 64-bit comparisons are not legal in general.
5282 return SDValue();
5283
5284 if (Op1.getValueType().isFloatingPoint()) {
5285 switch (SetCCOpcode) {
5286 default: llvm_unreachable("Illegal FP comparison")::llvm::llvm_unreachable_internal("Illegal FP comparison", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 5286)
;
5287 case ISD::SETUNE:
5288 case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5289 case ISD::SETOEQ:
5290 case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
5291 case ISD::SETOLT:
5292 case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5293 case ISD::SETOGT:
5294 case ISD::SETGT: Opc = ARMISD::VCGT; break;
5295 case ISD::SETOLE:
5296 case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5297 case ISD::SETOGE:
5298 case ISD::SETGE: Opc = ARMISD::VCGE; break;
5299 case ISD::SETUGE: Swap = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5300 case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break;
5301 case ISD::SETUGT: Swap = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5302 case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break;
5303 case ISD::SETUEQ: Invert = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5304 case ISD::SETONE:
5305 // Expand this to (OLT | OGT).
5306 TmpOp0 = Op0;
5307 TmpOp1 = Op1;
5308 Opc = ISD::OR;
5309 Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0);
5310 Op1 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp0, TmpOp1);
5311 break;
5312 case ISD::SETUO:
5313 Invert = true;
5314 LLVM_FALLTHROUGH[[clang::fallthrough]];
5315 case ISD::SETO:
5316 // Expand this to (OLT | OGE).
5317 TmpOp0 = Op0;
5318 TmpOp1 = Op1;
5319 Opc = ISD::OR;
5320 Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0);
5321 Op1 = DAG.getNode(ARMISD::VCGE, dl, CmpVT, TmpOp0, TmpOp1);
5322 break;
5323 }
5324 } else {
5325 // Integer comparisons.
5326 switch (SetCCOpcode) {
5327 default: llvm_unreachable("Illegal integer comparison")::llvm::llvm_unreachable_internal("Illegal integer comparison"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 5327)
;
5328 case ISD::SETNE: Invert = true;
5329 case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
5330 case ISD::SETLT: Swap = true;
5331 case ISD::SETGT: Opc = ARMISD::VCGT; break;
5332 case ISD::SETLE: Swap = true;
5333 case ISD::SETGE: Opc = ARMISD::VCGE; break;
5334 case ISD::SETULT: Swap = true;
5335 case ISD::SETUGT: Opc = ARMISD::VCGTU; break;
5336 case ISD::SETULE: Swap = true;
5337 case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
5338 }
5339
5340 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
5341 if (Opc == ARMISD::VCEQ) {
5342
5343 SDValue AndOp;
5344 if (ISD::isBuildVectorAllZeros(Op1.getNode()))
5345 AndOp = Op0;
5346 else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
5347 AndOp = Op1;
5348
5349 // Ignore bitconvert.
5350 if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
5351 AndOp = AndOp.getOperand(0);
5352
5353 if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
5354 Opc = ARMISD::VTST;
5355 Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0));
5356 Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1));
5357 Invert = !Invert;
5358 }
5359 }
5360 }
5361
5362 if (Swap)
5363 std::swap(Op0, Op1);
5364
5365 // If one of the operands is a constant vector zero, attempt to fold the
5366 // comparison to a specialized compare-against-zero form.
5367 SDValue SingleOp;
5368 if (ISD::isBuildVectorAllZeros(Op1.getNode()))
5369 SingleOp = Op0;
5370 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
5371 if (Opc == ARMISD::VCGE)
5372 Opc = ARMISD::VCLEZ;
5373 else if (Opc == ARMISD::VCGT)
5374 Opc = ARMISD::VCLTZ;
5375 SingleOp = Op1;
5376 }
5377
5378 SDValue Result;
5379 if (SingleOp.getNode()) {
5380 switch (Opc) {
5381 case ARMISD::VCEQ:
5382 Result = DAG.getNode(ARMISD::VCEQZ, dl, CmpVT, SingleOp); break;
5383 case ARMISD::VCGE:
5384 Result = DAG.getNode(ARMISD::VCGEZ, dl, CmpVT, SingleOp); break;
5385 case ARMISD::VCLEZ:
5386 Result = DAG.getNode(ARMISD::VCLEZ, dl, CmpVT, SingleOp); break;
5387 case ARMISD::VCGT:
5388 Result = DAG.getNode(ARMISD::VCGTZ, dl, CmpVT, SingleOp); break;
5389 case ARMISD::VCLTZ:
5390 Result = DAG.getNode(ARMISD::VCLTZ, dl, CmpVT, SingleOp); break;
5391 default:
5392 Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1);
5393 }
5394 } else {
5395 Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1);
5396 }
5397
5398 Result = DAG.getSExtOrTrunc(Result, dl, VT);
5399
5400 if (Invert)
5401 Result = DAG.getNOT(dl, Result, VT);
5402
5403 return Result;
5404}
5405
5406static SDValue LowerSETCCE(SDValue Op, SelectionDAG &DAG) {
5407 SDValue LHS = Op.getOperand(0);
5408 SDValue RHS = Op.getOperand(1);
5409 SDValue Carry = Op.getOperand(2);
5410 SDValue Cond = Op.getOperand(3);
5411 SDLoc DL(Op);
5412
5413 assert(LHS.getSimpleValueType().isInteger() && "SETCCE is integer only.")((LHS.getSimpleValueType().isInteger() && "SETCCE is integer only."
) ? static_cast<void> (0) : __assert_fail ("LHS.getSimpleValueType().isInteger() && \"SETCCE is integer only.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 5413, __PRETTY_FUNCTION__))
;
5414
5415 assert(Carry.getOpcode() != ISD::CARRY_FALSE)((Carry.getOpcode() != ISD::CARRY_FALSE) ? static_cast<void
> (0) : __assert_fail ("Carry.getOpcode() != ISD::CARRY_FALSE"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 5415, __PRETTY_FUNCTION__))
;
5416 SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
5417 SDValue Cmp = DAG.getNode(ARMISD::SUBE, DL, VTs, LHS, RHS, Carry);
5418
5419 SDValue FVal = DAG.getConstant(0, DL, MVT::i32);
5420 SDValue TVal = DAG.getConstant(1, DL, MVT::i32);
5421 SDValue ARMcc = DAG.getConstant(
5422 IntCCToARMCC(cast<CondCodeSDNode>(Cond)->get()), DL, MVT::i32);
5423 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5424 SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, ARM::CPSR,
5425 Cmp.getValue(1), SDValue());
5426 return DAG.getNode(ARMISD::CMOV, DL, Op.getValueType(), FVal, TVal, ARMcc,
5427 CCR, Chain.getValue(1));
5428}
5429
5430/// isNEONModifiedImm - Check if the specified splat value corresponds to a
5431/// valid vector constant for a NEON instruction with a "modified immediate"
5432/// operand (e.g., VMOV). If so, return the encoded value.
5433static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
5434 unsigned SplatBitSize, SelectionDAG &DAG,
5435 const SDLoc &dl, EVT &VT, bool is128Bits,
5436 NEONModImmType type) {
5437 unsigned OpCmode, Imm;
5438
5439 // SplatBitSize is set to the smallest size that splats the vector, so a
5440 // zero vector will always have SplatBitSize == 8. However, NEON modified
5441 // immediate instructions others than VMOV do not support the 8-bit encoding
5442 // of a zero vector, and the default encoding of zero is supposed to be the
5443 // 32-bit version.
5444 if (SplatBits == 0)
5445 SplatBitSize = 32;
5446
5447 switch (SplatBitSize) {
5448 case 8:
5449 if (type != VMOVModImm)
5450 return SDValue();
5451 // Any 1-byte value is OK. Op=0, Cmode=1110.
5452 assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big")(((SplatBits & ~0xff) == 0 && "one byte splat value is too big"
) ? static_cast<void> (0) : __assert_fail ("(SplatBits & ~0xff) == 0 && \"one byte splat value is too big\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 5452, __PRETTY_FUNCTION__))
;
5453 OpCmode = 0xe;
5454 Imm = SplatBits;
5455 VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
5456 break;
5457
5458 case 16:
5459 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
5460 VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
5461 if ((SplatBits & ~0xff) == 0) {
5462 // Value = 0x00nn: Op=x, Cmode=100x.
5463 OpCmode = 0x8;
5464 Imm = SplatBits;
5465 break;
5466 }
5467 if ((SplatBits & ~0xff00) == 0) {
5468 // Value = 0xnn00: Op=x, Cmode=101x.
5469 OpCmode = 0xa;
5470 Imm = SplatBits >> 8;
5471 break;
5472 }
5473 return SDValue();
5474
5475 case 32:
5476 // NEON's 32-bit VMOV supports splat values where:
5477 // * only one byte is nonzero, or
5478 // * the least significant byte is 0xff and the second byte is nonzero, or
5479 // * the least significant 2 bytes are 0xff and the third is nonzero.
5480 VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
5481 if ((SplatBits & ~0xff) == 0) {
5482 // Value = 0x000000nn: Op=x, Cmode=000x.
5483 OpCmode = 0;
5484 Imm = SplatBits;
5485 break;
5486 }
5487 if ((SplatBits & ~0xff00) == 0) {
5488 // Value = 0x0000nn00: Op=x, Cmode=001x.
5489 OpCmode = 0x2;
5490 Imm = SplatBits >> 8;
5491 break;
5492 }
5493 if ((SplatBits & ~0xff0000) == 0) {
5494 // Value = 0x00nn0000: Op=x, Cmode=010x.
5495 OpCmode = 0x4;
5496 Imm = SplatBits >> 16;
5497 break;
5498 }
5499 if ((SplatBits & ~0xff000000) == 0) {
5500 // Value = 0xnn000000: Op=x, Cmode=011x.
5501 OpCmode = 0x6;
5502 Imm = SplatBits >> 24;
5503 break;
5504 }
5505
5506 // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
5507 if (type == OtherModImm) return SDValue();
5508
5509 if ((SplatBits & ~0xffff) == 0 &&
5510 ((SplatBits | SplatUndef) & 0xff) == 0xff) {
5511 // Value = 0x0000nnff: Op=x, Cmode=1100.
5512 OpCmode = 0xc;
5513 Imm = SplatBits >> 8;
5514 break;
5515 }
5516
5517 if ((SplatBits & ~0xffffff) == 0 &&
5518 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
5519 // Value = 0x00nnffff: Op=x, Cmode=1101.
5520 OpCmode = 0xd;
5521 Imm = SplatBits >> 16;
5522 break;
5523 }
5524
5525 // Note: there are a few 32-bit splat values (specifically: 00ffff00,
5526 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
5527 // VMOV.I32. A (very) minor optimization would be to replicate the value
5528 // and fall through here to test for a valid 64-bit splat. But, then the
5529 // caller would also need to check and handle the change in size.
5530 return SDValue();
5531
5532 case 64: {
5533 if (type != VMOVModImm)
5534 return SDValue();
5535 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
5536 uint64_t BitMask = 0xff;
5537 uint64_t Val = 0;
5538 unsigned ImmMask = 1;
5539 Imm = 0;
5540 for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
5541 if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
5542 Val |= BitMask;
5543 Imm |= ImmMask;
5544 } else if ((SplatBits & BitMask) != 0) {
5545 return SDValue();
5546 }
5547 BitMask <<= 8;
5548 ImmMask <<= 1;
5549 }
5550
5551 if (DAG.getDataLayout().isBigEndian())
5552 // swap higher and lower 32 bit word
5553 Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4);
5554
5555 // Op=1, Cmode=1110.
5556 OpCmode = 0x1e;
5557 VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
5558 break;
5559 }
5560
5561 default:
5562 llvm_unreachable("unexpected size for isNEONModifiedImm")::llvm::llvm_unreachable_internal("unexpected size for isNEONModifiedImm"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 5562)
;
5563 }
5564
5565 unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
5566 return DAG.getTargetConstant(EncodedVal, dl, MVT::i32);
5567}
5568
5569SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
5570 const ARMSubtarget *ST) const {
5571 bool IsDouble = Op.getValueType() == MVT::f64;
5572 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
5573 const APFloat &FPVal = CFP->getValueAPF();
5574
5575 // Prevent floating-point constants from using literal loads
5576 // when execute-only is enabled.
5577 if (ST->genExecuteOnly()) {
5578 APInt INTVal = FPVal.bitcastToAPInt();
5579 SDLoc DL(CFP);
5580 if (IsDouble) {
5581 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
5582 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
5583 if (!ST->isLittle())
5584 std::swap(Lo, Hi);
5585 return DAG.getNode(ARMISD::VMOVDRR, DL, MVT::f64, Lo, Hi);
5586 } else {
5587 return DAG.getConstant(INTVal, DL, MVT::i32);
5588 }
5589 }
5590
5591 if (!ST->hasVFP3())
5592 return SDValue();
5593
5594 // Use the default (constant pool) lowering for double constants when we have
5595 // an SP-only FPU
5596 if (IsDouble && Subtarget->isFPOnlySP())
5597 return SDValue();
5598
5599 // Try splatting with a VMOV.f32...
5600 int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
5601
5602 if (ImmVal != -1) {
5603 if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
5604 // We have code in place to select a valid ConstantFP already, no need to
5605 // do any mangling.
5606 return Op;
5607 }
5608
5609 // It's a float and we are trying to use NEON operations where
5610 // possible. Lower it to a splat followed by an extract.
5611 SDLoc DL(Op);
5612 SDValue NewVal = DAG.getTargetConstant(ImmVal, DL, MVT::i32);
5613 SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
5614 NewVal);
5615 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
5616 DAG.getConstant(0, DL, MVT::i32));
5617 }
5618
5619 // The rest of our options are NEON only, make sure that's allowed before
5620 // proceeding..
5621 if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
5622 return SDValue();
5623
5624 EVT VMovVT;
5625 uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();
5626
5627 // It wouldn't really be worth bothering for doubles except for one very
5628 // important value, which does happen to match: 0.0. So make sure we don't do
5629 // anything stupid.
5630 if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
5631 return SDValue();
5632
5633 // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
5634 SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op),
5635 VMovVT, false, VMOVModImm);
5636 if (NewVal != SDValue()) {
5637 SDLoc DL(Op);
5638 SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
5639 NewVal);
5640 if (IsDouble)
5641 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
5642
5643 // It's a float: cast and extract a vector element.
5644 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
5645 VecConstant);
5646 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
5647 DAG.getConstant(0, DL, MVT::i32));
5648 }
5649
5650 // Finally, try a VMVN.i32
5651 NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT,
5652 false, VMVNModImm);
5653 if (NewVal != SDValue()) {
5654 SDLoc DL(Op);
5655 SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
5656
5657 if (IsDouble)
5658 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
5659
5660 // It's a float: cast and extract a vector element.
5661 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
5662 VecConstant);
5663 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
5664 DAG.getConstant(0, DL, MVT::i32));
5665 }
5666
5667 return SDValue();
5668}
5669
5670// check if an VEXT instruction can handle the shuffle mask when the
5671// vector sources of the shuffle are the same.
5672static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
5673 unsigned NumElts = VT.getVectorNumElements();
5674
5675 // Assume that the first shuffle index is not UNDEF. Fail if it is.
5676 if (M[0] < 0)
5677 return false;
5678
5679 Imm = M[0];
5680
5681 // If this is a VEXT shuffle, the immediate value is the index of the first
5682 // element. The other shuffle indices must be the successive elements after
5683 // the first one.
5684 unsigned ExpectedElt = Imm;
5685 for (unsigned i = 1; i < NumElts; ++i) {
5686 // Increment the expected index. If it wraps around, just follow it
5687 // back to index zero and keep going.
5688 ++ExpectedElt;
5689 if (ExpectedElt == NumElts)
5690 ExpectedElt = 0;
5691
5692 if (M[i] < 0) continue; // ignore UNDEF indices
5693 if (ExpectedElt != static_cast<unsigned>(M[i]))
5694 return false;
5695 }
5696
5697 return true;
5698}
5699
5700static bool isVEXTMask(ArrayRef<int> M, EVT VT,
5701 bool &ReverseVEXT, unsigned &Imm) {
5702 unsigned NumElts = VT.getVectorNumElements();
5703 ReverseVEXT = false;
5704
5705 // Assume that the first shuffle index is not UNDEF. Fail if it is.
5706 if (M[0] < 0)
5707 return false;
5708
5709 Imm = M[0];
5710
5711 // If this is a VEXT shuffle, the immediate value is the index of the first
5712 // element. The other shuffle indices must be the successive elements after
5713 // the first one.
5714 unsigned ExpectedElt = Imm;
5715 for (unsigned i = 1; i < NumElts; ++i) {
5716 // Increment the expected index. If it wraps around, it may still be
5717 // a VEXT but the source vectors must be swapped.
5718 ExpectedElt += 1;
5719 if (ExpectedElt == NumElts * 2) {
5720 ExpectedElt = 0;
5721 ReverseVEXT = true;
5722 }
5723
5724 if (M[i] < 0) continue; // ignore UNDEF indices
5725 if (ExpectedElt != static_cast<unsigned>(M[i]))
5726 return false;
5727 }
5728
5729 // Adjust the index value if the source operands will be swapped.
5730 if (ReverseVEXT)
5731 Imm -= NumElts;
5732
5733 return true;
5734}
5735
5736/// isVREVMask - Check if a vector shuffle corresponds to a VREV
5737/// instruction with the specified blocksize. (The order of the elements
5738/// within each block of the vector is reversed.)
5739static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
5740 assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&(((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
"Only possible block sizes for VREV are: 16, 32, 64") ? static_cast
<void> (0) : __assert_fail ("(BlockSize==16 || BlockSize==32 || BlockSize==64) && \"Only possible block sizes for VREV are: 16, 32, 64\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 5741, __PRETTY_FUNCTION__))
5741 "Only possible block sizes for VREV are: 16, 32, 64")(((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
"Only possible block sizes for VREV are: 16, 32, 64") ? static_cast
<void> (0) : __assert_fail ("(BlockSize==16 || BlockSize==32 || BlockSize==64) && \"Only possible block sizes for VREV are: 16, 32, 64\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 5741, __PRETTY_FUNCTION__))
;
5742
5743 unsigned EltSz = VT.getScalarSizeInBits();
5744 if (EltSz == 64)
5745 return false;
5746
5747 unsigned NumElts = VT.getVectorNumElements();
5748 unsigned BlockElts = M[0] + 1;
5749 // If the first shuffle index is UNDEF, be optimistic.
5750 if (M[0] < 0)
5751 BlockElts = BlockSize / EltSz;
5752
5753 if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
5754 return false;
5755
5756 for (unsigned i = 0; i < NumElts; ++i) {
5757 if (M[i] < 0) continue; // ignore UNDEF indices
5758 if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
5759 return false;
5760 }
5761
5762 return true;
5763}
5764
5765static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
5766 // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
5767 // range, then 0 is placed into the resulting vector. So pretty much any mask
5768 // of 8 elements can work here.
5769 return VT == MVT::v8i8 && M.size() == 8;
5770}
5771
5772// Checks whether the shuffle mask represents a vector transpose (VTRN) by
5773// checking that pairs of elements in the shuffle mask represent the same index
5774// in each vector, incrementing the expected index by 2 at each step.
5775// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 2, 6]
5776// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,c,g}
5777// v2={e,f,g,h}
5778// WhichResult gives the offset for each element in the mask based on which
5779// of the two results it belongs to.
5780//
5781// The transpose can be represented either as:
5782// result1 = shufflevector v1, v2, result1_shuffle_mask
5783// result2 = shufflevector v1, v2, result2_shuffle_mask
5784// where v1/v2 and the shuffle masks have the same number of elements
5785// (here WhichResult (see below) indicates which result is being checked)
5786//
5787// or as:
5788// results = shufflevector v1, v2, shuffle_mask
5789// where both results are returned in one vector and the shuffle mask has twice
5790// as many elements as v1/v2 (here WhichResult will always be 0 if true) here we
5791// want to check the low half and high half of the shuffle mask as if it were
5792// the other case
5793static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5794 unsigned EltSz = VT.getScalarSizeInBits();
5795 if (EltSz == 64)
5796 return false;
5797
5798 unsigned NumElts = VT.getVectorNumElements();
5799 if (M.size() != NumElts && M.size() != NumElts*2)
5800 return false;
5801
5802 // If the mask is twice as long as the input vector then we need to check the
5803 // upper and lower parts of the mask with a matching value for WhichResult
5804 // FIXME: A mask with only even values will be rejected in case the first
5805 // element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only
5806 // M[0] is used to determine WhichResult
5807 for (unsigned i = 0; i < M.size(); i += NumElts) {
5808 if (M.size() == NumElts * 2)
5809 WhichResult = i / NumElts;
5810 else
5811 WhichResult = M[i] == 0 ? 0 : 1;
5812 for (unsigned j = 0; j < NumElts; j += 2) {
5813 if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
5814 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult))
5815 return false;
5816 }
5817 }
5818
5819 if (M.size() == NumElts*2)
5820 WhichResult = 0;
5821
5822 return true;
5823}
5824
5825/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
5826/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
5827/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
5828static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
5829 unsigned EltSz = VT.getScalarSizeInBits();
5830 if (EltSz == 64)
5831 return false;
5832
5833 unsigned NumElts = VT.getVectorNumElements();
5834 if (M.size() != NumElts && M.size() != NumElts*2)
5835 return false;
5836
5837 for (unsigned i = 0; i < M.size(); i += NumElts) {
5838 if (M.size() == NumElts * 2)
5839 WhichResult = i / NumElts;
5840 else
5841 WhichResult = M[i] == 0 ? 0 : 1;
5842 for (unsigned j = 0; j < NumElts; j += 2) {
5843 if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
5844 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult))
5845 return false;
5846 }
5847 }
5848
5849 if (M.size() == NumElts*2)
5850 WhichResult = 0;
5851
5852 return true;
5853}
5854
5855// Checks whether the shuffle mask represents a vector unzip (VUZP) by checking
5856// that the mask elements are either all even and in steps of size 2 or all odd
5857// and in steps of size 2.
5858// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 2, 4, 6]
5859// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,c,e,g}
5860// v2={e,f,g,h}
5861// Requires similar checks to that of isVTRNMask with
5862// respect the how results are returned.
5863static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5864 unsigned EltSz = VT.getScalarSizeInBits();
5865 if (EltSz == 64)
5866 return false;
5867
5868 unsigned NumElts = VT.getVectorNumElements();
5869 if (M.size() != NumElts && M.size() != NumElts*2)
5870 return false;
5871
5872 for (unsigned i = 0; i < M.size(); i += NumElts) {
5873 WhichResult = M[i] == 0 ? 0 : 1;
5874 for (unsigned j = 0; j < NumElts; ++j) {
5875 if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult)
5876 return false;
5877 }
5878 }
5879
5880 if (M.size() == NumElts*2)
5881 WhichResult = 0;
5882
5883 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
5884 if (VT.is64BitVector() && EltSz == 32)
5885 return false;
5886
5887 return true;
5888}
5889
5890/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
5891/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
5892/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
5893static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
5894 unsigned EltSz = VT.getScalarSizeInBits();
5895 if (EltSz == 64)
5896 return false;
5897
5898 unsigned NumElts = VT.getVectorNumElements();
5899 if (M.size() != NumElts && M.size() != NumElts*2)
5900 return false;
5901
5902 unsigned Half = NumElts / 2;
5903 for (unsigned i = 0; i < M.size(); i += NumElts) {
5904 WhichResult = M[i] == 0 ? 0 : 1;
5905 for (unsigned j = 0; j < NumElts; j += Half) {
5906 unsigned Idx = WhichResult;
5907 for (unsigned k = 0; k < Half; ++k) {
5908 int MIdx = M[i + j + k];
5909 if (MIdx >= 0 && (unsigned) MIdx != Idx)
5910 return false;
5911 Idx += 2;
5912 }
5913 }
5914 }
5915
5916 if (M.size() == NumElts*2)
5917 WhichResult = 0;
5918
5919 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
5920 if (VT.is64BitVector() && EltSz == 32)
5921 return false;
5922
5923 return true;
5924}
5925
5926// Checks whether the shuffle mask represents a vector zip (VZIP) by checking
5927// that pairs of elements of the shufflemask represent the same index in each
5928// vector incrementing sequentially through the vectors.
5929// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 1, 5]
5930// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,b,f}
5931// v2={e,f,g,h}
5932// Requires similar checks to that of isVTRNMask with respect the how results
5933// are returned.
5934static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5935 unsigned EltSz = VT.getScalarSizeInBits();
5936 if (EltSz == 64)
5937 return false;
5938
5939 unsigned NumElts = VT.getVectorNumElements();
5940 if (M.size() != NumElts && M.size() != NumElts*2)
5941 return false;
5942
5943 for (unsigned i = 0; i < M.size(); i += NumElts) {
5944 WhichResult = M[i] == 0 ? 0 : 1;
5945 unsigned Idx = WhichResult * NumElts / 2;
5946 for (unsigned j = 0; j < NumElts; j += 2) {
5947 if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
5948 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx + NumElts))
5949 return false;
5950 Idx += 1;
5951 }
5952 }
5953
5954 if (M.size() == NumElts*2)
5955 WhichResult = 0;
5956
5957 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
5958 if (VT.is64BitVector() && EltSz == 32)
5959 return false;
5960
5961 return true;
5962}
5963
5964/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
5965/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
5966/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
5967static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
5968 unsigned EltSz = VT.getScalarSizeInBits();
5969 if (EltSz == 64)
5970 return false;
5971
5972 unsigned NumElts = VT.getVectorNumElements();
5973 if (M.size() != NumElts && M.size() != NumElts*2)
5974 return false;
5975
5976 for (unsigned i = 0; i < M.size(); i += NumElts) {
5977 WhichResult = M[i] == 0 ? 0 : 1;
5978 unsigned Idx = WhichResult * NumElts / 2;
5979 for (unsigned j = 0; j < NumElts; j += 2) {
5980 if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
5981 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx))
5982 return false;
5983 Idx += 1;
5984 }
5985 }
5986
5987 if (M.size() == NumElts*2)
5988 WhichResult = 0;
5989
5990 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
5991 if (VT.is64BitVector() && EltSz == 32)
5992 return false;
5993
5994 return true;
5995}
5996
5997/// Check if \p ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN),
5998/// and return the corresponding ARMISD opcode if it is, or 0 if it isn't.
5999static unsigned isNEONTwoResultShuffleMask(ArrayRef<int> ShuffleMask, EVT VT,
6000 unsigned &WhichResult,
6001 bool &isV_UNDEF) {
6002 isV_UNDEF = false;
6003 if (isVTRNMask(ShuffleMask, VT, WhichResult))
6004 return ARMISD::VTRN;
6005 if (isVUZPMask(ShuffleMask, VT, WhichResult))
6006 return ARMISD::VUZP;
6007 if (isVZIPMask(ShuffleMask, VT, WhichResult))
6008 return ARMISD::VZIP;
6009
6010 isV_UNDEF = true;
6011 if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
6012 return ARMISD::VTRN;
6013 if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
6014 return ARMISD::VUZP;
6015 if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
6016 return ARMISD::VZIP;
6017
6018 return 0;
6019}
6020
6021/// \return true if this is a reverse operation on an vector.
6022static bool isReverseMask(ArrayRef<int> M, EVT VT) {
6023 unsigned NumElts = VT.getVectorNumElements();
6024 // Make sure the mask has the right size.
6025 if (NumElts != M.size())
6026 return false;
6027
6028 // Look for <15, ..., 3, -1, 1, 0>.
6029 for (unsigned i = 0; i != NumElts; ++i)
6030 if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
6031 return false;
6032
6033 return true;
6034}
6035
6036// If N is an integer constant that can be moved into a register in one
6037// instruction, return an SDValue of such a constant (will become a MOV
6038// instruction). Otherwise return null.
6039static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
6040 const ARMSubtarget *ST, const SDLoc &dl) {
6041 uint64_t Val;
6042 if (!isa<ConstantSDNode>(N))
6043 return SDValue();
6044 Val = cast<ConstantSDNode>(N)->getZExtValue();
6045
6046 if (ST->isThumb1Only()) {
6047 if (Val <= 255 || ~Val <= 255)
6048 return DAG.getConstant(Val, dl, MVT::i32);
6049 } else {
6050 if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
6051 return DAG.getConstant(Val, dl, MVT::i32);
6052 }
6053 return SDValue();
6054}
6055
6056// If this is a case we can't handle, return null and let the default
6057// expansion code take care of it.
6058SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
6059 const ARMSubtarget *ST) const {
6060 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
6061 SDLoc dl(Op);
6062 EVT VT = Op.getValueType();
6063
6064 APInt SplatBits, SplatUndef;
6065 unsigned SplatBitSize;
6066 bool HasAnyUndefs;
6067 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
6068 if (SplatUndef.isAllOnesValue())
6069 return DAG.getUNDEF(VT);
6070
6071 if (SplatBitSize <= 64) {
6072 // Check if an immediate VMOV works.
6073 EVT VmovVT;
6074 SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
6075 SplatUndef.getZExtValue(), SplatBitSize,
6076 DAG, dl, VmovVT, VT.is128BitVector(),
6077 VMOVModImm);
6078 if (Val.getNode()) {
6079 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
6080 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
6081 }
6082
6083 // Try an immediate VMVN.
6084 uint64_t NegatedImm = (~SplatBits).getZExtValue();
6085 Val = isNEONModifiedImm(NegatedImm,
6086 SplatUndef.getZExtValue(), SplatBitSize,
6087 DAG, dl, VmovVT, VT.is128BitVector(),
6088 VMVNModImm);
6089 if (Val.getNode()) {
6090 SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
6091 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
6092 }
6093
6094 // Use vmov.f32 to materialize other v2f32 and v4f32 splats.
6095 if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
6096 int ImmVal = ARM_AM::getFP32Imm(SplatBits);
6097 if (ImmVal != -1) {
6098 SDValue Val = DAG.getTargetConstant(ImmVal, dl, MVT::i32);
6099 return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
6100 }
6101 }
6102 }
6103 }
6104
6105 // Scan through the operands to see if only one value is used.
6106 //
6107 // As an optimisation, even if more than one value is used it may be more
6108 // profitable to splat with one value then change some lanes.
6109 //
6110 // Heuristically we decide to do this if the vector has a "dominant" value,
6111 // defined as splatted to more than half of the lanes.
6112 unsigned NumElts = VT.getVectorNumElements();
6113 bool isOnlyLowElement = true;
6114 bool usesOnlyOneValue = true;
6115 bool hasDominantValue = false;
6116 bool isConstant = true;
6117
6118 // Map of the number of times a particular SDValue appears in the
6119 // element list.
6120 DenseMap<SDValue, unsigned> ValueCounts;
6121 SDValue Value;
6122 for (unsigned i = 0; i < NumElts; ++i) {
6123 SDValue V = Op.getOperand(i);
6124 if (V.isUndef())
6125 continue;
6126 if (i > 0)
6127 isOnlyLowElement = false;
6128 if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
6129 isConstant = false;
6130
6131 ValueCounts.insert(std::make_pair(V, 0));
6132 unsigned &Count = ValueCounts[V];
6133
6134 // Is this value dominant? (takes up more than half of the lanes)
6135 if (++Count > (NumElts / 2)) {
6136 hasDominantValue = true;
6137 Value = V;
6138 }
6139 }
6140 if (ValueCounts.size() != 1)
6141 usesOnlyOneValue = false;
6142 if (!Value.getNode() && !ValueCounts.empty())
6143 Value = ValueCounts.begin()->first;
6144
6145 if (ValueCounts.empty())
6146 return DAG.getUNDEF(VT);
6147
6148 // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR.
6149 // Keep going if we are hitting this case.
6150 if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
6151 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
6152
6153 unsigned EltSize = VT.getScalarSizeInBits();
6154
6155 // Use VDUP for non-constant splats. For f32 constant splats, reduce to
6156 // i32 and try again.
6157 if (hasDominantValue && EltSize <= 32) {
6158 if (!isConstant) {
6159 SDValue N;
6160
6161 // If we are VDUPing a value that comes directly from a vector, that will
6162 // cause an unnecessary move to and from a GPR, where instead we could
6163 // just use VDUPLANE. We can only do this if the lane being extracted
6164 // is at a constant index, as the VDUP from lane instructions only have
6165 // constant-index forms.
6166 ConstantSDNode *constIndex;
6167 if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6168 (constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)))) {
6169 // We need to create a new undef vector to use for the VDUPLANE if the
6170 // size of the vector from which we get the value is different than the
6171 // size of the vector that we need to create. We will insert the element
6172 // such that the register coalescer will remove unnecessary copies.
6173 if (VT != Value->getOperand(0).getValueType()) {
6174 unsigned index = constIndex->getAPIntValue().getLimitedValue() %
6175 VT.getVectorNumElements();
6176 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
6177 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
6178 Value, DAG.getConstant(index, dl, MVT::i32)),
6179 DAG.getConstant(index, dl, MVT::i32));
6180 } else
6181 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
6182 Value->getOperand(0), Value->getOperand(1));
6183 } else
6184 N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
6185
6186 if (!usesOnlyOneValue) {
6187 // The dominant value was splatted as 'N', but we now have to insert
6188 // all differing elements.
6189 for (unsigned I = 0; I < NumElts; ++I) {
6190 if (Op.getOperand(I) == Value)
6191 continue;
6192 SmallVector<SDValue, 3> Ops;
6193 Ops.push_back(N);
6194 Ops.push_back(Op.getOperand(I));
6195 Ops.push_back(DAG.getConstant(I, dl, MVT::i32));
6196 N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops);
6197 }
6198 }
6199 return N;
6200 }
6201 if (VT.getVectorElementType().isFloatingPoint()) {
6202 SmallVector<SDValue, 8> Ops;
6203 for (unsigned i = 0; i < NumElts; ++i)
6204 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32,
6205 Op.getOperand(i)));
6206 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
6207 SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
6208 Val = LowerBUILD_VECTOR(Val, DAG, ST);
6209 if (Val.getNode())
6210 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
6211 }
6212 if (usesOnlyOneValue) {
6213 SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
6214 if (isConstant && Val.getNode())
6215 return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
6216 }
6217 }
6218
6219 // If all elements are constants and the case above didn't get hit, fall back
6220 // to the default expansion, which will generate a load from the constant
6221 // pool.
6222 if (isConstant)
6223 return SDValue();
6224
6225 // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
6226 if (NumElts >= 4) {
6227 SDValue shuffle = ReconstructShuffle(Op, DAG);
6228 if (shuffle != SDValue())
6229 return shuffle;
6230 }
6231
6232 if (VT.is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) {
6233 // If we haven't found an efficient lowering, try splitting a 128-bit vector
6234 // into two 64-bit vectors; we might discover a better way to lower it.
6235 SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElts);
6236 EVT ExtVT = VT.getVectorElementType();
6237 EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElts / 2);
6238 SDValue Lower =
6239 DAG.getBuildVector(HVT, dl, makeArrayRef(&Ops[0], NumElts / 2));
6240 if (Lower.getOpcode() == ISD::BUILD_VECTOR)
6241 Lower = LowerBUILD_VECTOR(Lower, DAG, ST);
6242 SDValue Upper = DAG.getBuildVector(
6243 HVT, dl, makeArrayRef(&Ops[NumElts / 2], NumElts / 2));
6244 if (Upper.getOpcode() == ISD::BUILD_VECTOR)
6245 Upper = LowerBUILD_VECTOR(Upper, DAG, ST);
6246 if (Lower && Upper)
6247 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lower, Upper);
6248 }
6249
6250 // Vectors with 32- or 64-bit elements can be built by directly assigning
6251 // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
6252 // will be legalized.
6253 if (EltSize >= 32) {
6254 // Do the expansion with floating-point types, since that is what the VFP
6255 // registers are defined to use, and since i64 is not legal.
6256 EVT EltVT = EVT::getFloatingPointVT(EltSize);
6257 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
6258 SmallVector<SDValue, 8> Ops;
6259 for (unsigned i = 0; i < NumElts; ++i)
6260 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
6261 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
6262 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
6263 }
6264
6265 // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
6266 // know the default expansion would otherwise fall back on something even
6267 // worse. For a vector with one or two non-undef values, that's
6268 // scalar_to_vector for the elements followed by a shuffle (provided the
6269 // shuffle is valid for the target) and materialization element by element
6270 // on the stack followed by a load for everything else.
6271 if (!isConstant && !usesOnlyOneValue) {
6272 SDValue Vec = DAG.getUNDEF(VT);
6273 for (unsigned i = 0 ; i < NumElts; ++i) {
6274 SDValue V = Op.getOperand(i);
6275 if (V.isUndef())
6276 continue;
6277 SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i32);
6278 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
6279 }
6280 return Vec;
6281 }
6282
6283 return SDValue();
6284}
6285
6286// Gather data to see if the operation can be modelled as a
6287// shuffle in combination with VEXTs.
6288SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
6289 SelectionDAG &DAG) const {
6290 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!")((Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::BUILD_VECTOR && \"Unknown opcode!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 6290, __PRETTY_FUNCTION__))
;
6291 SDLoc dl(Op);
6292 EVT VT = Op.getValueType();
6293 unsigned NumElts = VT.getVectorNumElements();
6294
6295 struct ShuffleSourceInfo {
6296 SDValue Vec;
6297 unsigned MinElt = std::numeric_limits<unsigned>::max();
6298 unsigned MaxElt = 0;
6299
6300 // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
6301 // be compatible with the shuffle we intend to construct. As a result
6302 // ShuffleVec will be some sliding window into the original Vec.
6303 SDValue ShuffleVec;
6304
6305 // Code should guarantee that element i in Vec starts at element "WindowBase
6306 // + i * WindowScale in ShuffleVec".
6307 int WindowBase = 0;
6308 int WindowScale = 1;
6309
6310 ShuffleSourceInfo(SDValue Vec) : Vec(Vec), ShuffleVec(Vec) {}
6311
6312 bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
6313 };
6314
6315 // First gather all vectors used as an immediate source for this BUILD_VECTOR
6316 // node.
6317 SmallVector<ShuffleSourceInfo, 2> Sources;
6318 for (unsigned i = 0; i < NumElts; ++i) {
6319 SDValue V = Op.getOperand(i);
6320 if (V.isUndef())
6321 continue;
6322 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
6323 // A shuffle can only come from building a vector from various
6324 // elements of other vectors.
6325 return SDValue();
6326 } else if (!isa<ConstantSDNode>(V.getOperand(1))) {
6327 // Furthermore, shuffles require a constant mask, whereas extractelts
6328 // accept variable indices.
6329 return SDValue();
6330 }
6331
6332 // Add this element source to the list if it's not already there.
6333 SDValue SourceVec = V.getOperand(0);
6334 auto Source = llvm::find(Sources, SourceVec);
6335 if (Source == Sources.end())
6336 Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
6337
6338 // Update the minimum and maximum lane number seen.
6339 unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
6340 Source->MinElt = std::min(Source->MinElt, EltNo);
6341 Source->MaxElt = std::max(Source->MaxElt, EltNo);
6342 }
6343
6344 // Currently only do something sane when at most two source vectors
6345 // are involved.
6346 if (Sources.size() > 2)
6347 return SDValue();
6348
6349 // Find out the smallest element size among result and two sources, and use
6350 // it as element size to build the shuffle_vector.
6351 EVT SmallestEltTy = VT.getVectorElementType();
6352 for (auto &Source : Sources) {
6353 EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
6354 if (SrcEltTy.bitsLT(SmallestEltTy))
6355 SmallestEltTy = SrcEltTy;
6356 }
6357 unsigned ResMultiplier =
6358 VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits();
6359 NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
6360 EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
6361
6362 // If the source vector is too wide or too narrow, we may nevertheless be able
6363 // to construct a compatible shuffle either by concatenating it with UNDEF or
6364 // extracting a suitable range of elements.
6365 for (auto &Src : Sources) {
6366 EVT SrcVT = Src.ShuffleVec.getValueType();
6367
6368 if (SrcVT.getSizeInBits() == VT.getSizeInBits())
6369 continue;
6370
6371 // This stage of the search produces a source with the same element type as
6372 // the original, but with a total width matching the BUILD_VECTOR output.
6373 EVT EltVT = SrcVT.getVectorElementType();
6374 unsigned NumSrcElts = VT.getSizeInBits() / EltVT.getSizeInBits();
6375 EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
6376
6377 if (SrcVT.getSizeInBits() < VT.getSizeInBits()) {
6378 if (2 * SrcVT.getSizeInBits() != VT.getSizeInBits())
6379 return SDValue();
6380 // We can pad out the smaller vector for free, so if it's part of a
6381 // shuffle...
6382 Src.ShuffleVec =
6383 DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
6384 DAG.getUNDEF(Src.ShuffleVec.getValueType()));
6385 continue;
6386 }
6387
6388 if (SrcVT.getSizeInBits() != 2 * VT.getSizeInBits())
6389 return SDValue();
6390
6391 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
6392 // Span too large for a VEXT to cope
6393 return SDValue();
6394 }
6395
6396 if (Src.MinElt >= NumSrcElts) {
6397 // The extraction can just take the second half
6398 Src.ShuffleVec =
6399 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
6400 DAG.getConstant(NumSrcElts, dl, MVT::i32));
6401 Src.WindowBase = -NumSrcElts;
6402 } else if (Src.MaxElt < NumSrcElts) {
6403 // The extraction can just take the first half
6404 Src.ShuffleVec =
6405 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
6406 DAG.getConstant(0, dl, MVT::i32));
6407 } else {
6408 // An actual VEXT is needed
6409 SDValue VEXTSrc1 =
6410 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
6411 DAG.getConstant(0, dl, MVT::i32));
6412 SDValue VEXTSrc2 =
6413 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
6414 DAG.getConstant(NumSrcElts, dl, MVT::i32));
6415
6416 Src.ShuffleVec = DAG.getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1,
6417 VEXTSrc2,
6418 DAG.getConstant(Src.MinElt, dl, MVT::i32));
6419 Src.WindowBase = -Src.MinElt;
6420 }
6421 }
6422
6423 // Another possible incompatibility occurs from the vector element types. We
6424 // can fix this by bitcasting the source vectors to the same type we intend
6425 // for the shuffle.
6426 for (auto &Src : Sources) {
6427 EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
6428 if (SrcEltTy == SmallestEltTy)
6429 continue;
6430 assert(ShuffleVT.getVectorElementType() == SmallestEltTy)((ShuffleVT.getVectorElementType() == SmallestEltTy) ? static_cast
<void> (0) : __assert_fail ("ShuffleVT.getVectorElementType() == SmallestEltTy"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 6430, __PRETTY_FUNCTION__))
;
6431 Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
6432 Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
6433 Src.WindowBase *= Src.WindowScale;
6434 }
6435
6436 // Final sanity check before we try to actually produce a shuffle.
6437 DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { for (auto Src : Sources) ((Src.ShuffleVec.getValueType
() == ShuffleVT) ? static_cast<void> (0) : __assert_fail
("Src.ShuffleVec.getValueType() == ShuffleVT", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 6439, __PRETTY_FUNCTION__));; } } while (false)
6438 for (auto Src : Sources)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { for (auto Src : Sources) ((Src.ShuffleVec.getValueType
() == ShuffleVT) ? static_cast<void> (0) : __assert_fail
("Src.ShuffleVec.getValueType() == ShuffleVT", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 6439, __PRETTY_FUNCTION__));; } } while (false)
6439 assert(Src.ShuffleVec.getValueType() == ShuffleVT);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { for (auto Src : Sources) ((Src.ShuffleVec.getValueType
() == ShuffleVT) ? static_cast<void> (0) : __assert_fail
("Src.ShuffleVec.getValueType() == ShuffleVT", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 6439, __PRETTY_FUNCTION__));; } } while (false)
6440 )do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { for (auto Src : Sources) ((Src.ShuffleVec.getValueType
() == ShuffleVT) ? static_cast<void> (0) : __assert_fail
("Src.ShuffleVec.getValueType() == ShuffleVT", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 6439, __PRETTY_FUNCTION__));; } } while (false)
;
6441
6442 // The stars all align, our next step is to produce the mask for the shuffle.
6443 SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
6444 int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
6445 for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
6446 SDValue Entry = Op.getOperand(i);
6447 if (Entry.isUndef())
6448 continue;
6449
6450 auto Src = llvm::find(Sources, Entry.getOperand(0));
6451 int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
6452
6453 // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
6454 // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
6455 // segment.
6456 EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
6457 int BitsDefined = std::min(OrigEltTy.getSizeInBits(),
6458 VT.getScalarSizeInBits());
6459 int LanesDefined = BitsDefined / BitsPerShuffleLane;
6460
6461 // This source is expected to fill ResMultiplier lanes of the final shuffle,
6462 // starting at the appropriate offset.
6463 int *LaneMask = &Mask[i * ResMultiplier];
6464
6465 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
6466 ExtractBase += NumElts * (Src - Sources.begin());
6467 for (int j = 0; j < LanesDefined; ++j)
6468 LaneMask[j] = ExtractBase + j;
6469 }
6470
6471 // Final check before we try to produce nonsense...
6472 if (!isShuffleMaskLegal(Mask, ShuffleVT))
6473 return SDValue();
6474
6475 // We can't handle more than two sources. This should have already
6476 // been checked before this point.
6477 assert(Sources.size() <= 2 && "Too many sources!")((Sources.size() <= 2 && "Too many sources!") ? static_cast
<void> (0) : __assert_fail ("Sources.size() <= 2 && \"Too many sources!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 6477, __PRETTY_FUNCTION__))
;
6478
6479 SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
6480 for (unsigned i = 0; i < Sources.size(); ++i)
6481 ShuffleOps[i] = Sources[i].ShuffleVec;
6482
6483 SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
6484 ShuffleOps[1], Mask);
6485 return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
6486}
6487
6488/// isShuffleMaskLegal - Targets can use this to indicate that they only
6489/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
6490/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
6491/// are assumed to be legal.
6492bool
6493ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
6494 EVT VT) const {
6495 if (VT.getVectorNumElements() == 4 &&
6496 (VT.is128BitVector() || VT.is64BitVector())) {
6497 unsigned PFIndexes[4];
6498 for (unsigned i = 0; i != 4; ++i) {
6499 if (M[i] < 0)
6500 PFIndexes[i] = 8;
6501 else
6502 PFIndexes[i] = M[i];
6503 }
6504
6505 // Compute the index in the perfect shuffle table.
6506 unsigned PFTableIndex =
6507 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
6508 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
6509 unsigned Cost = (PFEntry >> 30);
6510
6511 if (Cost <= 4)
6512 return true;
6513 }
6514
6515 bool ReverseVEXT, isV_UNDEF;
6516 unsigned Imm, WhichResult;
6517
6518 unsigned EltSize = VT.getScalarSizeInBits();
6519 return (EltSize >= 32 ||
6520 ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
6521 isVREVMask(M, VT, 64) ||
6522 isVREVMask(M, VT, 32) ||
6523 isVREVMask(M, VT, 16) ||
6524 isVEXTMask(M, VT, ReverseVEXT, Imm) ||
6525 isVTBLMask(M, VT) ||
6526 isNEONTwoResultShuffleMask(M, VT, WhichResult, isV_UNDEF) ||
6527 ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT)));
6528}
6529
6530/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
6531/// the specified operations to build the shuffle.
6532static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
6533 SDValue RHS, SelectionDAG &DAG,
6534 const SDLoc &dl) {
6535 unsigned OpNum = (PFEntry >> 26) & 0x0F;
6536 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
6537 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
6538
6539 enum {
6540 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
6541 OP_VREV,
6542 OP_VDUP0,
6543 OP_VDUP1,
6544 OP_VDUP2,
6545 OP_VDUP3,
6546 OP_VEXT1,
6547 OP_VEXT2,
6548 OP_VEXT3,
6549 OP_VUZPL, // VUZP, left result
6550 OP_VUZPR, // VUZP, right result
6551 OP_VZIPL, // VZIP, left result
6552 OP_VZIPR, // VZIP, right result
6553 OP_VTRNL, // VTRN, left result
6554 OP_VTRNR // VTRN, right result
6555 };
6556
6557 if (OpNum == OP_COPY) {
6558 if (LHSID == (1*9+2)*9+3) return LHS;
6559 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!")((LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!") ?
static_cast<void> (0) : __assert_fail ("LHSID == ((4*9+5)*9+6)*9+7 && \"Illegal OP_COPY!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 6559, __PRETTY_FUNCTION__))
;
6560 return RHS;
6561 }
6562
6563 SDValue OpLHS, OpRHS;
6564 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
6565 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
6566 EVT VT = OpLHS.getValueType();
6567
6568 switch (OpNum) {
6569 default: llvm_unreachable("Unknown shuffle opcode!")::llvm::llvm_unreachable_internal("Unknown shuffle opcode!", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 6569)
;
6570 case OP_VREV:
6571 // VREV divides the vector in half and swaps within the half.
6572 if (VT.getVectorElementType() == MVT::i32 ||
6573 VT.getVectorElementType() == MVT::f32)
6574 return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
6575 // vrev <4 x i16> -> VREV32
6576 if (VT.getVectorElementType() == MVT::i16)
6577 return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
6578 // vrev <4 x i8> -> VREV16
6579 assert(VT.getVectorElementType() == MVT::i8)((VT.getVectorElementType() == MVT::i8) ? static_cast<void
> (0) : __assert_fail ("VT.getVectorElementType() == MVT::i8"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 6579, __PRETTY_FUNCTION__))
;
6580 return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
6581 case OP_VDUP0:
6582 case OP_VDUP1:
6583 case OP_VDUP2:
6584 case OP_VDUP3:
6585 return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
6586 OpLHS, DAG.getConstant(OpNum-OP_VDUP0, dl, MVT::i32));
6587 case OP_VEXT1:
6588 case OP_VEXT2:
6589 case OP_VEXT3:
6590 return DAG.getNode(ARMISD::VEXT, dl, VT,
6591 OpLHS, OpRHS,
6592 DAG.getConstant(OpNum - OP_VEXT1 + 1, dl, MVT::i32));
6593 case OP_VUZPL:
6594 case OP_VUZPR:
6595 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
6596 OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
6597 case OP_VZIPL:
6598 case OP_VZIPR:
6599 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
6600 OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
6601 case OP_VTRNL:
6602 case OP_VTRNR:
6603 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
6604 OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
6605 }
6606}
6607
6608static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
6609 ArrayRef<int> ShuffleMask,
6610 SelectionDAG &DAG) {
6611 // Check to see if we can use the VTBL instruction.
6612 SDValue V1 = Op.getOperand(0);
6613 SDValue V2 = Op.getOperand(1);
6614 SDLoc DL(Op);
6615
6616 SmallVector<SDValue, 8> VTBLMask;
6617 for (ArrayRef<int>::iterator
6618 I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I)
6619 VTBLMask.push_back(DAG.getConstant(*I, DL, MVT::i32));
6620
6621 if (V2.getNode()->isUndef())
6622 return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
6623 DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
6624
6625 return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
6626 DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
6627}
6628
6629static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op,
6630 SelectionDAG &DAG) {
6631 SDLoc DL(Op);
6632 SDValue OpLHS = Op.getOperand(0);
6633 EVT VT = OpLHS.getValueType();
6634
6635 assert((VT == MVT::v8i16 || VT == MVT::v16i8) &&(((VT == MVT::v8i16 || VT == MVT::v16i8) && "Expect an v8i16/v16i8 type"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v8i16 || VT == MVT::v16i8) && \"Expect an v8i16/v16i8 type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 6636, __PRETTY_FUNCTION__))
6636 "Expect an v8i16/v16i8 type")(((VT == MVT::v8i16 || VT == MVT::v16i8) && "Expect an v8i16/v16i8 type"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v8i16 || VT == MVT::v16i8) && \"Expect an v8i16/v16i8 type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 6636, __PRETTY_FUNCTION__))
;
6637 OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS);
6638 // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now,
6639 // extract the first 8 bytes into the top double word and the last 8 bytes
6640 // into the bottom double word. The v8i16 case is similar.
6641 unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4;
6642 return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS,
6643 DAG.getConstant(ExtractNum, DL, MVT::i32));
6644}
6645
6646static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
6647 SDValue V1 = Op.getOperand(0);
6648 SDValue V2 = Op.getOperand(1);
6649 SDLoc dl(Op);
6650 EVT VT = Op.getValueType();
6651 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
6652
6653 // Convert shuffles that are directly supported on NEON to target-specific
6654 // DAG nodes, instead of keeping them as shuffles and matching them again
6655 // during code selection. This is more efficient and avoids the possibility
6656 // of inconsistencies between legalization and selection.
6657 // FIXME: floating-point vectors should be canonicalized to integer vectors
6658 // of the same time so that they get CSEd properly.
6659 ArrayRef<int> ShuffleMask = SVN->getMask();
6660
6661 unsigned EltSize = VT.getScalarSizeInBits();
6662 if (EltSize <= 32) {
6663 if (SVN->isSplat()) {
6664 int Lane = SVN->getSplatIndex();
6665 // If this is undef splat, generate it via "just" vdup, if possible.
6666 if (Lane == -1) Lane = 0;
6667
6668 // Test if V1 is a SCALAR_TO_VECTOR.
6669 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
6670 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
6671 }
6672 // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
6673 // (and probably will turn into a SCALAR_TO_VECTOR once legalization
6674 // reaches it).
6675 if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
6676 !isa<ConstantSDNode>(V1.getOperand(0))) {
6677 bool IsScalarToVector = true;
6678 for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
6679 if (!V1.getOperand(i).isUndef()) {
6680 IsScalarToVector = false;
6681 break;
6682 }
6683 if (IsScalarToVector)
6684 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
6685 }
6686 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
6687 DAG.getConstant(Lane, dl, MVT::i32));
6688 }
6689
6690 bool ReverseVEXT;
6691 unsigned Imm;
6692 if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
6693 if (ReverseVEXT)
6694 std::swap(V1, V2);
6695 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
6696 DAG.getConstant(Imm, dl, MVT::i32));
6697 }
6698
6699 if (isVREVMask(ShuffleMask, VT, 64))
6700 return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
6701 if (isVREVMask(ShuffleMask, VT, 32))
6702 return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
6703 if (isVREVMask(ShuffleMask, VT, 16))
6704 return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
6705
6706 if (V2->isUndef() && isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
6707 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
6708 DAG.getConstant(Imm, dl, MVT::i32));
6709 }
6710
6711 // Check for Neon shuffles that modify both input vectors in place.
6712 // If both results are used, i.e., if there are two shuffles with the same
6713 // source operands and with masks corresponding to both results of one of
6714 // these operations, DAG memoization will ensure that a single node is
6715 // used for both shuffles.
6716 unsigned WhichResult;
6717 bool isV_UNDEF;
6718 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
6719 ShuffleMask, VT, WhichResult, isV_UNDEF)) {
6720 if (isV_UNDEF)
6721 V2 = V1;
6722 return DAG.getNode(ShuffleOpc, dl, DAG.getVTList(VT, VT), V1, V2)
6723 .getValue(WhichResult);
6724 }
6725
6726 // Also check for these shuffles through CONCAT_VECTORS: we canonicalize
6727 // shuffles that produce a result larger than their operands with:
6728 // shuffle(concat(v1, undef), concat(v2, undef))
6729 // ->
6730 // shuffle(concat(v1, v2), undef)
6731 // because we can access quad vectors (see PerformVECTOR_SHUFFLECombine).
6732 //
6733 // This is useful in the general case, but there are special cases where
6734 // native shuffles produce larger results: the two-result ops.
6735 //
6736 // Look through the concat when lowering them:
6737 // shuffle(concat(v1, v2), undef)
6738 // ->
6739 // concat(VZIP(v1, v2):0, :1)
6740 //
6741 if (V1->getOpcode() == ISD::CONCAT_VECTORS && V2->isUndef()) {
6742 SDValue SubV1 = V1->getOperand(0);
6743 SDValue SubV2 = V1->getOperand(1);
6744 EVT SubVT = SubV1.getValueType();
6745
6746 // We expect these to have been canonicalized to -1.
6747 assert(llvm::all_of(ShuffleMask, [&](int i) {((llvm::all_of(ShuffleMask, [&](int i) { return i < (int
)VT.getVectorNumElements(); }) && "Unexpected shuffle index into UNDEF operand!"
) ? static_cast<void> (0) : __assert_fail ("llvm::all_of(ShuffleMask, [&](int i) { return i < (int)VT.getVectorNumElements(); }) && \"Unexpected shuffle index into UNDEF operand!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 6749, __PRETTY_FUNCTION__))
6748 return i < (int)VT.getVectorNumElements();((llvm::all_of(ShuffleMask, [&](int i) { return i < (int
)VT.getVectorNumElements(); }) && "Unexpected shuffle index into UNDEF operand!"
) ? static_cast<void> (0) : __assert_fail ("llvm::all_of(ShuffleMask, [&](int i) { return i < (int)VT.getVectorNumElements(); }) && \"Unexpected shuffle index into UNDEF operand!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 6749, __PRETTY_FUNCTION__))
6749 }) && "Unexpected shuffle index into UNDEF operand!")((llvm::all_of(ShuffleMask, [&](int i) { return i < (int
)VT.getVectorNumElements(); }) && "Unexpected shuffle index into UNDEF operand!"
) ? static_cast<void> (0) : __assert_fail ("llvm::all_of(ShuffleMask, [&](int i) { return i < (int)VT.getVectorNumElements(); }) && \"Unexpected shuffle index into UNDEF operand!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 6749, __PRETTY_FUNCTION__))
;
6750
6751 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
6752 ShuffleMask, SubVT, WhichResult, isV_UNDEF)) {
6753 if (isV_UNDEF)
6754 SubV2 = SubV1;
6755 assert((WhichResult == 0) &&(((WhichResult == 0) && "In-place shuffle of concat can only have one result!"
) ? static_cast<void> (0) : __assert_fail ("(WhichResult == 0) && \"In-place shuffle of concat can only have one result!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 6756, __PRETTY_FUNCTION__))
6756 "In-place shuffle of concat can only have one result!")(((WhichResult == 0) && "In-place shuffle of concat can only have one result!"
) ? static_cast<void> (0) : __assert_fail ("(WhichResult == 0) && \"In-place shuffle of concat can only have one result!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 6756, __PRETTY_FUNCTION__))
;
6757 SDValue Res = DAG.getNode(ShuffleOpc, dl, DAG.getVTList(SubVT, SubVT),
6758 SubV1, SubV2);
6759 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Res.getValue(0),
6760 Res.getValue(1));
6761 }
6762 }
6763 }
6764
6765 // If the shuffle is not directly supported and it has 4 elements, use
6766 // the PerfectShuffle-generated table to synthesize it from other shuffles.
6767 unsigned NumElts = VT.getVectorNumElements();
6768 if (NumElts == 4) {
6769 unsigned PFIndexes[4];
6770 for (unsigned i = 0; i != 4; ++i) {
6771 if (ShuffleMask[i] < 0)
6772 PFIndexes[i] = 8;
6773 else
6774 PFIndexes[i] = ShuffleMask[i];
6775 }
6776
6777 // Compute the index in the perfect shuffle table.
6778 unsigned PFTableIndex =
6779 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
6780 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
6781 unsigned Cost = (PFEntry >> 30);
6782
6783 if (Cost <= 4)
6784 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
6785 }
6786
6787 // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
6788 if (EltSize >= 32) {
6789 // Do the expansion with floating-point types, since that is what the VFP
6790 // registers are defined to use, and since i64 is not legal.
6791 EVT EltVT = EVT::getFloatingPointVT(EltSize);
6792 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
6793 V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
6794 V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
6795 SmallVector<SDValue, 8> Ops;
6796 for (unsigned i = 0; i < NumElts; ++i) {
6797 if (ShuffleMask[i] < 0)
6798 Ops.push_back(DAG.getUNDEF(EltVT));
6799 else
6800 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
6801 ShuffleMask[i] < (int)NumElts ? V1 : V2,
6802 DAG.getConstant(ShuffleMask[i] & (NumElts-1),
6803 dl, MVT::i32)));
6804 }
6805 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
6806 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
6807 }
6808
6809 if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT))
6810 return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG);
6811
6812 if (VT == MVT::v8i8)
6813 if (SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG))
6814 return NewOp;
6815
6816 return SDValue();
6817}
6818
6819static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
6820 // INSERT_VECTOR_ELT is legal only for immediate indexes.
6821 SDValue Lane = Op.getOperand(2);
6822 if (!isa<ConstantSDNode>(Lane))
6823 return SDValue();
6824
6825 return Op;
6826}
6827
6828static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
6829 // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
6830 SDValue Lane = Op.getOperand(1);
6831 if (!isa<ConstantSDNode>(Lane))
6832 return SDValue();
6833
6834 SDValue Vec = Op.getOperand(0);
6835 if (Op.getValueType() == MVT::i32 && Vec.getScalarValueSizeInBits() < 32) {
6836 SDLoc dl(Op);
6837 return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
6838 }
6839
6840 return Op;
6841}
6842
6843static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
6844 // The only time a CONCAT_VECTORS operation can have legal types is when
6845 // two 64-bit vectors are concatenated to a 128-bit vector.
6846 assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&((Op.getValueType().is128BitVector() && Op.getNumOperands
() == 2 && "unexpected CONCAT_VECTORS") ? static_cast
<void> (0) : __assert_fail ("Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && \"unexpected CONCAT_VECTORS\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 6847, __PRETTY_FUNCTION__))
6847 "unexpected CONCAT_VECTORS")((Op.getValueType().is128BitVector() && Op.getNumOperands
() == 2 && "unexpected CONCAT_VECTORS") ? static_cast
<void> (0) : __assert_fail ("Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && \"unexpected CONCAT_VECTORS\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 6847, __PRETTY_FUNCTION__))
;
6848 SDLoc dl(Op);
6849 SDValue Val = DAG.getUNDEF(MVT::v2f64);
6850 SDValue Op0 = Op.getOperand(0);
6851 SDValue Op1 = Op.getOperand(1);
6852 if (!Op0.isUndef())
6853 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
6854 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
6855 DAG.getIntPtrConstant(0, dl));
6856 if (!Op1.isUndef())
6857 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
6858 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
6859 DAG.getIntPtrConstant(1, dl));
6860 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
6861}
6862
6863/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
6864/// element has been zero/sign-extended, depending on the isSigned parameter,
6865/// from an integer type half its size.
6866static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
6867 bool isSigned) {
6868 // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
6869 EVT VT = N->getValueType(0);
6870 if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
6871 SDNode *BVN = N->getOperand(0).getNode();
6872 if (BVN->getValueType(0) != MVT::v4i32 ||
6873 BVN->getOpcode() != ISD::BUILD_VECTOR)
6874 return false;
6875 unsigned LoElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
6876 unsigned HiElt = 1 - LoElt;
6877 ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
6878 ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
6879 ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
6880 ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
6881 if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
6882 return false;
6883 if (isSigned) {
6884 if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
6885 Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
6886 return true;
6887 } else {
6888 if (Hi0->isNullValue() && Hi1->isNullValue())
6889 return true;
6890 }
6891 return false;
6892 }
6893
6894 if (N->getOpcode() != ISD::BUILD_VECTOR)
6895 return false;
6896
6897 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
6898 SDNode *Elt = N->getOperand(i).getNode();
6899 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
6900 unsigned EltSize = VT.getScalarSizeInBits();
6901 unsigned HalfSize = EltSize / 2;
6902 if (isSigned) {
6903 if (!isIntN(HalfSize, C->getSExtValue()))
6904 return false;
6905 } else {
6906 if (!isUIntN(HalfSize, C->getZExtValue()))
6907 return false;
6908 }
6909 continue;
6910 }
6911 return false;
6912 }
6913
6914 return true;
6915}
6916
6917/// isSignExtended - Check if a node is a vector value that is sign-extended
6918/// or a constant BUILD_VECTOR with sign-extended elements.
6919static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
6920 if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
6921 return true;
6922 if (isExtendedBUILD_VECTOR(N, DAG, true))
6923 return true;
6924 return false;
6925}
6926
6927/// isZeroExtended - Check if a node is a vector value that is zero-extended
6928/// or a constant BUILD_VECTOR with zero-extended elements.
6929static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
6930 if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N))
6931 return true;
6932 if (isExtendedBUILD_VECTOR(N, DAG, false))
6933 return true;
6934 return false;
6935}
6936
6937static EVT getExtensionTo64Bits(const EVT &OrigVT) {
6938 if (OrigVT.getSizeInBits() >= 64)
6939 return OrigVT;
6940
6941 assert(OrigVT.isSimple() && "Expecting a simple value type")((OrigVT.isSimple() && "Expecting a simple value type"
) ? static_cast<void> (0) : __assert_fail ("OrigVT.isSimple() && \"Expecting a simple value type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 6941, __PRETTY_FUNCTION__))
;
6942
6943 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
6944 switch (OrigSimpleTy) {
6945 default: llvm_unreachable("Unexpected Vector Type")::llvm::llvm_unreachable_internal("Unexpected Vector Type", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 6945)
;
6946 case MVT::v2i8:
6947 case MVT::v2i16:
6948 return MVT::v2i32;
6949 case MVT::v4i8:
6950 return MVT::v4i16;
6951 }
6952}
6953
6954/// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
6955/// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
6956/// We insert the required extension here to get the vector to fill a D register.
6957static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG,
6958 const EVT &OrigTy,
6959 const EVT &ExtTy,
6960 unsigned ExtOpcode) {
6961 // The vector originally had a size of OrigTy. It was then extended to ExtTy.
6962 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
6963 // 64-bits we need to insert a new extension so that it will be 64-bits.
6964 assert(ExtTy.is128BitVector() && "Unexpected extension size")((ExtTy.is128BitVector() && "Unexpected extension size"
) ? static_cast<void> (0) : __assert_fail ("ExtTy.is128BitVector() && \"Unexpected extension size\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 6964, __PRETTY_FUNCTION__))
;
6965 if (OrigTy.getSizeInBits() >= 64)
6966 return N;
6967
6968 // Must extend size to at least 64 bits to be used as an operand for VMULL.
6969 EVT NewVT = getExtensionTo64Bits(OrigTy);
6970
6971 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
6972}
6973
6974/// SkipLoadExtensionForVMULL - return a load of the original vector size that
6975/// does not do any sign/zero extension. If the original vector is less
6976/// than 64 bits, an appropriate extension will be added after the load to
6977/// reach a total size of 64 bits. We have to add the extension separately
6978/// because ARM does not have a sign/zero extending load for vectors.
6979static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) {
6980 EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT());
6981
6982 // The load already has the right type.
6983 if (ExtendedTy == LD->getMemoryVT())
6984 return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(),
6985 LD->getBasePtr(), LD->getPointerInfo(),
6986 LD->getAlignment(), LD->getMemOperand()->getFlags());
6987
6988 // We need to create a zextload/sextload. We cannot just create a load
6989 // followed by a zext/zext node because LowerMUL is also run during normal
6990 // operation legalization where we can't create illegal types.
6991 return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
6992 LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
6993 LD->getMemoryVT(), LD->getAlignment(),
6994 LD->getMemOperand()->getFlags());
6995}
6996
6997/// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
6998/// extending load, or BUILD_VECTOR with extended elements, return the
6999/// unextended value. The unextended vector should be 64 bits so that it can
7000/// be used as an operand to a VMULL instruction. If the original vector size
7001/// before extension is less than 64 bits we add a an extension to resize
7002/// the vector to 64 bits.
7003static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
7004 if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
7005 return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,
7006 N->getOperand(0)->getValueType(0),
7007 N->getValueType(0),
7008 N->getOpcode());
7009
7010 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
7011 assert((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) &&(((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) && "Expected extending load"
) ? static_cast<void> (0) : __assert_fail ("(ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) && \"Expected extending load\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 7012, __PRETTY_FUNCTION__))
7012 "Expected extending load")(((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) && "Expected extending load"
) ? static_cast<void> (0) : __assert_fail ("(ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) && \"Expected extending load\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 7012, __PRETTY_FUNCTION__))
;
7013
7014 SDValue newLoad = SkipLoadExtensionForVMULL(LD, DAG);
7015 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), newLoad.getValue(1));
7016 unsigned Opcode = ISD::isSEXTLoad(LD) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
7017 SDValue extLoad =
7018 DAG.getNode(Opcode, SDLoc(newLoad), LD->getValueType(0), newLoad);
7019 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 0), extLoad);
7020
7021 return newLoad;
7022 }
7023
7024 // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will
7025 // have been legalized as a BITCAST from v4i32.
7026 if (N->getOpcode() == ISD::BITCAST) {
7027 SDNode *BVN = N->getOperand(0).getNode();
7028 assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&((BVN->getOpcode() == ISD::BUILD_VECTOR && BVN->
getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"
) ? static_cast<void> (0) : __assert_fail ("BVN->getOpcode() == ISD::BUILD_VECTOR && BVN->getValueType(0) == MVT::v4i32 && \"expected v4i32 BUILD_VECTOR\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 7029, __PRETTY_FUNCTION__))
7029 BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR")((BVN->getOpcode() == ISD::BUILD_VECTOR && BVN->
getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"
) ? static_cast<void> (0) : __assert_fail ("BVN->getOpcode() == ISD::BUILD_VECTOR && BVN->getValueType(0) == MVT::v4i32 && \"expected v4i32 BUILD_VECTOR\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 7029, __PRETTY_FUNCTION__))
;
7030 unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
7031 return DAG.getBuildVector(
7032 MVT::v2i32, SDLoc(N),
7033 {BVN->getOperand(LowElt), BVN->getOperand(LowElt + 2)});
7034 }
7035 // Construct a new BUILD_VECTOR with elements truncated to half the size.
7036 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR")((N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"expected BUILD_VECTOR\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 7036, __PRETTY_FUNCTION__))
;
7037 EVT VT = N->getValueType(0);
7038 unsigned EltSize = VT.getScalarSizeInBits() / 2;
7039 unsigned NumElts = VT.getVectorNumElements();
7040 MVT TruncVT = MVT::getIntegerVT(EltSize);
7041 SmallVector<SDValue, 8> Ops;
7042 SDLoc dl(N);
7043 for (unsigned i = 0; i != NumElts; ++i) {
7044 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
7045 const APInt &CInt = C->getAPIntValue();
7046 // Element types smaller than 32 bits are not legal, so use i32 elements.
7047 // The values are implicitly truncated so sext vs. zext doesn't matter.
7048 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
7049 }
7050 return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
7051}
7052
7053static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
7054 unsigned Opcode = N->getOpcode();
7055 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
7056 SDNode *N0 = N->getOperand(0).getNode();
7057 SDNode *N1 = N->getOperand(1).getNode();
7058 return N0->hasOneUse() && N1->hasOneUse() &&
7059 isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
7060 }
7061 return false;
7062}
7063
7064static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
7065 unsigned Opcode = N->getOpcode();
7066 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
7067 SDNode *N0 = N->getOperand(0).getNode();
7068 SDNode *N1 = N->getOperand(1).getNode();
7069 return N0->hasOneUse() && N1->hasOneUse() &&
7070 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
7071 }
7072 return false;
7073}
7074
7075static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
7076 // Multiplications are only custom-lowered for 128-bit vectors so that
7077 // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
7078 EVT VT = Op.getValueType();
7079 assert(VT.is128BitVector() && VT.isInteger() &&((VT.is128BitVector() && VT.isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? static_cast<void> (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 7080, __PRETTY_FUNCTION__))
7080 "unexpected type for custom-lowering ISD::MUL")((VT.is128BitVector() && VT.isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? static_cast<void> (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 7080, __PRETTY_FUNCTION__))
;
7081 SDNode *N0 = Op.getOperand(0).getNode();
7082 SDNode *N1 = Op.getOperand(1).getNode();
7083 unsigned NewOpc = 0;
7084 bool isMLA = false;
7085 bool isN0SExt = isSignExtended(N0, DAG);
7086 bool isN1SExt = isSignExtended(N1, DAG);
7087 if (isN0SExt && isN1SExt)
7088 NewOpc = ARMISD::VMULLs;
7089 else {
7090 bool isN0ZExt = isZeroExtended(N0, DAG);
7091 bool isN1ZExt = isZeroExtended(N1, DAG);
7092 if (isN0ZExt && isN1ZExt)
7093 NewOpc = ARMISD::VMULLu;
7094 else if (isN1SExt || isN1ZExt) {
7095 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
7096 // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
7097 if (isN1SExt && isAddSubSExt(N0, DAG)) {
7098 NewOpc = ARMISD::VMULLs;
7099 isMLA = true;
7100 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
7101 NewOpc = ARMISD::VMULLu;
7102 isMLA = true;
7103 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
7104 std::swap(N0, N1);
7105 NewOpc = ARMISD::VMULLu;
7106 isMLA = true;
7107 }
7108 }
7109
7110 if (!NewOpc) {
7111 if (VT == MVT::v2i64)
7112 // Fall through to expand this. It is not legal.
7113 return SDValue();
7114 else
7115 // Other vector multiplications are legal.
7116 return Op;
7117 }
7118 }
7119
7120 // Legalize to a VMULL instruction.
7121 SDLoc DL(Op);
7122 SDValue Op0;
7123 SDValue Op1 = SkipExtensionForVMULL(N1, DAG);
7124 if (!isMLA) {
7125 Op0 = SkipExtensionForVMULL(N0, DAG);
7126 assert(Op0.getValueType().is64BitVector() &&((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 7128, __PRETTY_FUNCTION__))
7127 Op1.getValueType().is64BitVector() &&((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 7128, __PRETTY_FUNCTION__))
7128 "unexpected types for extended operands to VMULL")((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 7128, __PRETTY_FUNCTION__))
;
7129 return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
7130 }
7131
7132 // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
7133 // isel lowering to take advantage of no-stall back to back vmul + vmla.
7134 // vmull q0, d4, d6
7135 // vmlal q0, d5, d6
7136 // is faster than
7137 // vaddl q0, d4, d5
7138 // vmovl q1, d6
7139 // vmul q0, q0, q1
7140 SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG);
7141 SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG);
7142 EVT Op1VT = Op1.getValueType();
7143 return DAG.getNode(N0->getOpcode(), DL, VT,
7144 DAG.getNode(NewOpc, DL, VT,
7145 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
7146 DAG.getNode(NewOpc, DL, VT,
7147 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
7148}
7149
7150static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, const SDLoc &dl,
7151 SelectionDAG &DAG) {
7152 // TODO: Should this propagate fast-math-flags?
7153
7154 // Convert to float
7155 // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
7156 // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
7157 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
7158 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
7159 X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
7160 Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
7161 // Get reciprocal estimate.
7162 // float4 recip = vrecpeq_f32(yf);
7163 Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
7164 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
7165 Y);
7166 // Because char has a smaller range than uchar, we can actually get away
7167 // without any newton steps. This requires that we use a weird bias
7168 // of 0xb000, however (again, this has been exhaustively tested).
7169 // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
7170 X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
7171 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
7172 Y = DAG.getConstant(0xb000, dl, MVT::v4i32);
7173 X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
7174 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
7175 // Convert back to short.
7176 X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
7177 X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
7178 return X;
7179}
7180
7181static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl,
7182 SelectionDAG &DAG) {
7183 // TODO: Should this propagate fast-math-flags?
7184
7185 SDValue N2;
7186 // Convert to float.
7187 // float4 yf = vcvt_f32_s32(vmovl_s16(y));
7188 // float4 xf = vcvt_f32_s32(vmovl_s16(x));
7189 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
7190 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
7191 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
7192 N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
7193
7194 // Use reciprocal estimate and one refinement step.
7195 // float4 recip = vrecpeq_f32(yf);
7196 // recip *= vrecpsq_f32(yf, recip);
7197 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
7198 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
7199 N1);
7200 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
7201 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
7202 N1, N2);
7203 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
7204 // Because short has a smaller range than ushort, we can actually get away
7205 // with only a single newton step. This requires that we use a weird bias
7206 // of 89, however (again, this has been exhaustively tested).
7207 // float4 result = as_float4(as_int4(xf*recip) + 0x89);
7208 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
7209 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
7210 N1 = DAG.getConstant(0x89, dl, MVT::v4i32);
7211 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
7212 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
7213 // Convert back to integer and return.
7214 // return vmovn_s32(vcvt_s32_f32(result));
7215 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
7216 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
7217 return N0;
7218}
7219
7220static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
7221 EVT VT = Op.getValueType();
7222 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&(((VT == MVT::v4i16 || VT == MVT::v8i8) && "unexpected type for custom-lowering ISD::SDIV"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v4i16 || VT == MVT::v8i8) && \"unexpected type for custom-lowering ISD::SDIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 7223, __PRETTY_FUNCTION__))
7223 "unexpected type for custom-lowering ISD::SDIV")(((VT == MVT::v4i16 || VT == MVT::v8i8) && "unexpected type for custom-lowering ISD::SDIV"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v4i16 || VT == MVT::v8i8) && \"unexpected type for custom-lowering ISD::SDIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 7223, __PRETTY_FUNCTION__))
;
7224
7225 SDLoc dl(Op);
7226 SDValue N0 = Op.getOperand(0);
7227 SDValue N1 = Op.getOperand(1);
7228 SDValue N2, N3;
7229
7230 if (VT == MVT::v8i8) {
7231 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
7232 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
7233
7234 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
7235 DAG.getIntPtrConstant(4, dl));
7236 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
7237 DAG.getIntPtrConstant(4, dl));
7238 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
7239 DAG.getIntPtrConstant(0, dl));
7240 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
7241 DAG.getIntPtrConstant(0, dl));
7242
7243 N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
7244 N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
7245
7246 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
7247 N0 = LowerCONCAT_VECTORS(N0, DAG);
7248
7249 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
7250 return N0;
7251 }
7252 return LowerSDIV_v4i16(N0, N1, dl, DAG);
7253}
7254
7255static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
7256 // TODO: Should this propagate fast-math-flags?
7257 EVT VT = Op.getValueType();
7258 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&(((VT == MVT::v4i16 || VT == MVT::v8i8) && "unexpected type for custom-lowering ISD::UDIV"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v4i16 || VT == MVT::v8i8) && \"unexpected type for custom-lowering ISD::UDIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 7259, __PRETTY_FUNCTION__))
7259 "unexpected type for custom-lowering ISD::UDIV")(((VT == MVT::v4i16 || VT == MVT::v8i8) && "unexpected type for custom-lowering ISD::UDIV"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v4i16 || VT == MVT::v8i8) && \"unexpected type for custom-lowering ISD::UDIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 7259, __PRETTY_FUNCTION__))
;
7260
7261 SDLoc dl(Op);
7262 SDValue N0 = Op.getOperand(0);
7263 SDValue N1 = Op.getOperand(1);
7264 SDValue N2, N3;
7265
7266 if (VT == MVT::v8i8) {
7267 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
7268 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
7269
7270 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
7271 DAG.getIntPtrConstant(4, dl));
7272 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
7273 DAG.getIntPtrConstant(4, dl));
7274 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
7275 DAG.getIntPtrConstant(0, dl));
7276 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
7277 DAG.getIntPtrConstant(0, dl));
7278
7279 N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
7280 N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
7281
7282 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
7283 N0 = LowerCONCAT_VECTORS(N0, DAG);
7284
7285 N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,
7286 DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, dl,
7287 MVT::i32),
7288 N0);
7289 return N0;
7290 }
7291
7292 // v4i16 sdiv ... Convert to float.
7293 // float4 yf = vcvt_f32_s32(vmovl_u16(y));
7294 // float4 xf = vcvt_f32_s32(vmovl_u16(x));
7295 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
7296 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
7297 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
7298 SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
7299
7300 // Use reciprocal estimate and two refinement steps.
7301 // float4 recip = vrecpeq_f32(yf);
7302 // recip *= vrecpsq_f32(yf, recip);
7303 // recip *= vrecpsq_f32(yf, recip);
7304 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
7305 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
7306 BN1);
7307 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
7308 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
7309 BN1, N2);
7310 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
7311 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
7312 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
7313 BN1, N2);
7314 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
7315 // Simply multiplying by the reciprocal estimate can leave us a few ulps
7316 // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
7317 // and that it will never cause us to return an answer too large).
7318 // float4 result = as_float4(as_int4(xf*recip) + 2);
7319 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
7320 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
7321 N1 = DAG.getConstant(2, dl, MVT::v4i32);
7322 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
7323 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
7324 // Convert back to integer and return.
7325 // return vmovn_u32(vcvt_s32_f32(result));
7326 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
7327 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
7328 return N0;
7329}
7330
7331static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
7332 EVT VT = Op.getNode()->getValueType(0);
7333 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
7334
7335 unsigned Opc;
7336 bool ExtraOp = false;
7337 switch (Op.getOpcode()) {
7338 default: llvm_unreachable("Invalid code")::llvm::llvm_unreachable_internal("Invalid code", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 7338)
;
7339 case ISD::ADDC: Opc = ARMISD::ADDC; break;
7340 case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break;
7341 case ISD::SUBC: Opc = ARMISD::SUBC; break;
7342 case ISD::SUBE: Opc = ARMISD::SUBE; ExtraOp = true; break;
7343 }
7344
7345 if (!ExtraOp)
7346 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0),
7347 Op.getOperand(1));
7348 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0),
7349 Op.getOperand(1), Op.getOperand(2));
7350}
7351
7352SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
7353 assert(Subtarget->isTargetDarwin())((Subtarget->isTargetDarwin()) ? static_cast<void> (
0) : __assert_fail ("Subtarget->isTargetDarwin()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 7353, __PRETTY_FUNCTION__))
;
7354
7355 // For iOS, we want to call an alternative entry point: __sincos_stret,
7356 // return values are passed via sret.
7357 SDLoc dl(Op);
7358 SDValue Arg = Op.getOperand(0);
7359 EVT ArgVT = Arg.getValueType();
7360 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
7361 auto PtrVT = getPointerTy(DAG.getDataLayout());
7362
7363 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
7364 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7365
7366 // Pair of floats / doubles used to pass the result.
7367 Type *RetTy = StructType::get(ArgTy, ArgTy);
7368 auto &DL = DAG.getDataLayout();
7369
7370 ArgListTy Args;
7371 bool ShouldUseSRet = Subtarget->isAPCS_ABI();
7372 SDValue SRet;
7373 if (ShouldUseSRet) {
7374 // Create stack object for sret.
7375 const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
7376 const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy);
7377 int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
7378 SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy(DL));
7379
7380 ArgListEntry Entry;
7381 Entry.Node = SRet;
7382 Entry.Ty = RetTy->getPointerTo();
7383 Entry.IsSExt = false;
7384 Entry.IsZExt = false;
7385 Entry.IsSRet = true;
7386 Args.push_back(Entry);
7387 RetTy = Type::getVoidTy(*DAG.getContext());
7388 }
7389
7390 ArgListEntry Entry;
7391 Entry.Node = Arg;
7392 Entry.Ty = ArgTy;
7393 Entry.IsSExt = false;
7394 Entry.IsZExt = false;
7395 Args.push_back(Entry);
7396
7397 const char *LibcallName =
7398 (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret";
7399 RTLIB::Libcall LC =
7400 (ArgVT == MVT::f64) ? RTLIB::SINCOS_F64 : RTLIB::SINCOS_F32;
7401 CallingConv::ID CC = getLibcallCallingConv(LC);
7402 SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL));
7403
7404 TargetLowering::CallLoweringInfo CLI(DAG);
7405 CLI.setDebugLoc(dl)
7406 .setChain(DAG.getEntryNode())
7407 .setCallee(CC, RetTy, Callee, std::move(Args))
7408 .setDiscardResult(ShouldUseSRet);
7409 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
7410
7411 if (!ShouldUseSRet)
7412 return CallResult.first;
7413
7414 SDValue LoadSin =
7415 DAG.getLoad(ArgVT, dl, CallResult.second, SRet, MachinePointerInfo());
7416
7417 // Address of cos field.
7418 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet,
7419 DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl));
7420 SDValue LoadCos =
7421 DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, MachinePointerInfo());
7422
7423 SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
7424 return DAG.getNode(ISD::MERGE_VALUES, dl, Tys,
7425 LoadSin.getValue(0), LoadCos.getValue(0));
7426}
7427
7428SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG,
7429 bool Signed,
7430 SDValue &Chain) const {
7431 EVT VT = Op.getValueType();
7432 assert((VT == MVT::i32 || VT == MVT::i64) &&(((VT == MVT::i32 || VT == MVT::i64) && "unexpected type for custom lowering DIV"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::i32 || VT == MVT::i64) && \"unexpected type for custom lowering DIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 7433, __PRETTY_FUNCTION__))
7433 "unexpected type for custom lowering DIV")(((VT == MVT::i32 || VT == MVT::i64) && "unexpected type for custom lowering DIV"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::i32 || VT == MVT::i64) && \"unexpected type for custom lowering DIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 7433, __PRETTY_FUNCTION__))
;
7434 SDLoc dl(Op);
7435
7436 const auto &DL = DAG.getDataLayout();
7437 const auto &TLI = DAG.getTargetLoweringInfo();
7438
7439 const char *Name = nullptr;
7440 if (Signed)
7441 Name = (VT == MVT::i32) ? "__rt_sdiv" : "__rt_sdiv64";
7442 else
7443 Name = (VT == MVT::i32) ? "__rt_udiv" : "__rt_udiv64";
7444
7445 SDValue ES = DAG.getExternalSymbol(Name, TLI.getPointerTy(DL));
7446
7447 ARMTargetLowering::ArgListTy Args;
7448
7449 for (auto AI : {1, 0}) {
7450 ArgListEntry Arg;
7451 Arg.Node = Op.getOperand(AI);
7452 Arg.Ty = Arg.Node.getValueType().getTypeForEVT(*DAG.getContext());
7453 Args.push_back(Arg);
7454 }
7455
7456 CallLoweringInfo CLI(DAG);
7457 CLI.setDebugLoc(dl)
7458 .setChain(Chain)
7459 .setCallee(CallingConv::ARM_AAPCS_VFP, VT.getTypeForEVT(*DAG.getContext()),
7460 ES, std::move(Args));
7461
7462 return LowerCallTo(CLI).first;
7463}
7464
7465SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG,
7466 bool Signed) const {
7467 assert(Op.getValueType() == MVT::i32 &&((Op.getValueType() == MVT::i32 && "unexpected type for custom lowering DIV"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i32 && \"unexpected type for custom lowering DIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 7468, __PRETTY_FUNCTION__))
7468 "unexpected type for custom lowering DIV")((Op.getValueType() == MVT::i32 && "unexpected type for custom lowering DIV"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i32 && \"unexpected type for custom lowering DIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 7468, __PRETTY_FUNCTION__))
;
7469 SDLoc dl(Op);
7470
7471 SDValue DBZCHK = DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other,
7472 DAG.getEntryNode(), Op.getOperand(1));
7473
7474 return LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
7475}
7476
7477static SDValue WinDBZCheckDenominator(SelectionDAG &DAG, SDNode *N, SDValue InChain) {
7478 SDLoc DL(N);
7479 SDValue Op = N->getOperand(1);
7480 if (N->getValueType(0) == MVT::i32)
7481 return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain, Op);
7482 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Op,
7483 DAG.getConstant(0, DL, MVT::i32));
7484 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Op,
7485 DAG.getConstant(1, DL, MVT::i32));
7486 return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain,
7487 DAG.getNode(ISD::OR, DL, MVT::i32, Lo, Hi));
7488}
7489
7490void ARMTargetLowering::ExpandDIV_Windows(
7491 SDValue Op, SelectionDAG &DAG, bool Signed,
7492 SmallVectorImpl<SDValue> &Results) const {
7493 const auto &DL = DAG.getDataLayout();
7494 const auto &TLI = DAG.getTargetLoweringInfo();
7495
7496 assert(Op.getValueType() == MVT::i64 &&((Op.getValueType() == MVT::i64 && "unexpected type for custom lowering DIV"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i64 && \"unexpected type for custom lowering DIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 7497, __PRETTY_FUNCTION__))
7497 "unexpected type for custom lowering DIV")((Op.getValueType() == MVT::i64 && "unexpected type for custom lowering DIV"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i64 && \"unexpected type for custom lowering DIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 7497, __PRETTY_FUNCTION__))
;
7498 SDLoc dl(Op);
7499
7500 SDValue DBZCHK = WinDBZCheckDenominator(DAG, Op.getNode(), DAG.getEntryNode());
7501
7502 SDValue Result = LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
7503
7504 SDValue Lower = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Result);
7505 SDValue Upper = DAG.getNode(ISD::SRL, dl, MVT::i64, Result,
7506 DAG.getConstant(32, dl, TLI.getPointerTy(DL)));
7507 Upper = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Upper);
7508
7509 Results.push_back(Lower);
7510 Results.push_back(Upper);
7511}
7512
7513static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
7514 if (isStrongerThanMonotonic(cast<AtomicSDNode>(Op)->getOrdering()))
7515 // Acquire/Release load/store is not legal for targets without a dmb or
7516 // equivalent available.
7517 return SDValue();
7518
7519 // Monotonic load/store is legal for all targets.
7520 return Op;
7521}
7522
7523static void ReplaceREADCYCLECOUNTER(SDNode *N,
7524 SmallVectorImpl<SDValue> &Results,
7525 SelectionDAG &DAG,
7526 const ARMSubtarget *Subtarget) {
7527 SDLoc DL(N);
7528 // Under Power Management extensions, the cycle-count is:
7529 // mrc p15, #0, <Rt>, c9, c13, #0
7530 SDValue Ops[] = { N->getOperand(0), // Chain
7531 DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
7532 DAG.getConstant(15, DL, MVT::i32),
7533 DAG.getConstant(0, DL, MVT::i32),
7534 DAG.getConstant(9, DL, MVT::i32),
7535 DAG.getConstant(13, DL, MVT::i32),
7536 DAG.getConstant(0, DL, MVT::i32)
7537 };
7538
7539 SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
7540 DAG.getVTList(MVT::i32, MVT::Other), Ops);
7541 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Cycles32,
7542 DAG.getConstant(0, DL, MVT::i32)));
7543 Results.push_back(Cycles32.getValue(1));
7544}
7545
7546static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) {
7547 SDLoc dl(V.getNode());
7548 SDValue VLo = DAG.getAnyExtOrTrunc(V, dl, MVT::i32);
7549 SDValue VHi = DAG.getAnyExtOrTrunc(
7550 DAG.getNode(ISD::SRL, dl, MVT::i64, V, DAG.getConstant(32, dl, MVT::i32)),
7551 dl, MVT::i32);
7552 SDValue RegClass =
7553 DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
7554 SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32);
7555 SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32);
7556 const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
7557 return SDValue(
7558 DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
7559}
7560
7561static void ReplaceCMP_SWAP_64Results(SDNode *N,
7562 SmallVectorImpl<SDValue> & Results,
7563 SelectionDAG &DAG) {
7564 assert(N->getValueType(0) == MVT::i64 &&((N->getValueType(0) == MVT::i64 && "AtomicCmpSwap on types less than 64 should be legal"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::i64 && \"AtomicCmpSwap on types less than 64 should be legal\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 7565, __PRETTY_FUNCTION__))
7565 "AtomicCmpSwap on types less than 64 should be legal")((N->getValueType(0) == MVT::i64 && "AtomicCmpSwap on types less than 64 should be legal"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::i64 && \"AtomicCmpSwap on types less than 64 should be legal\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 7565, __PRETTY_FUNCTION__))
;
7566 SDValue Ops[] = {N->getOperand(1),
7567 createGPRPairNode(DAG, N->getOperand(2)),
7568 createGPRPairNode(DAG, N->getOperand(3)),
7569 N->getOperand(0)};
7570 SDNode *CmpSwap = DAG.getMachineNode(
7571 ARM::CMP_SWAP_64, SDLoc(N),
7572 DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other), Ops);
7573
7574 MachineFunction &MF = DAG.getMachineFunction();
7575 MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1);
7576 MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
7577 cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
7578
7579 Results.push_back(DAG.getTargetExtractSubreg(ARM::gsub_0, SDLoc(N), MVT::i32,
7580 SDValue(CmpSwap, 0)));
7581 Results.push_back(DAG.getTargetExtractSubreg(ARM::gsub_1, SDLoc(N), MVT::i32,
7582 SDValue(CmpSwap, 0)));
7583 Results.push_back(SDValue(CmpSwap, 2));
7584}
7585
7586static SDValue LowerFPOWI(SDValue Op, const ARMSubtarget &Subtarget,
7587 SelectionDAG &DAG) {
7588 const auto &TLI = DAG.getTargetLoweringInfo();
7589
7590 assert(Subtarget.getTargetTriple().isOSMSVCRT() &&((Subtarget.getTargetTriple().isOSMSVCRT() && "Custom lowering is MSVCRT specific!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.getTargetTriple().isOSMSVCRT() && \"Custom lowering is MSVCRT specific!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 7591, __PRETTY_FUNCTION__))
7591 "Custom lowering is MSVCRT specific!")((Subtarget.getTargetTriple().isOSMSVCRT() && "Custom lowering is MSVCRT specific!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.getTargetTriple().isOSMSVCRT() && \"Custom lowering is MSVCRT specific!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 7591, __PRETTY_FUNCTION__))
;
7592
7593 SDLoc dl(Op);
7594 SDValue Val = Op.getOperand(0);
7595 MVT Ty = Val->getSimpleValueType(0);
7596 SDValue Exponent = DAG.getNode(ISD::SINT_TO_FP, dl, Ty, Op.getOperand(1));
7597 SDValue Callee = DAG.getExternalSymbol(Ty == MVT::f32 ? "powf" : "pow",
7598 TLI.getPointerTy(DAG.getDataLayout()));
7599
7600 TargetLowering::ArgListTy Args;
7601 TargetLowering::ArgListEntry Entry;
7602
7603 Entry.Node = Val;
7604 Entry.Ty = Val.getValueType().getTypeForEVT(*DAG.getContext());
7605 Entry.IsZExt = true;
7606 Args.push_back(Entry);
7607
7608 Entry.Node = Exponent;
7609 Entry.Ty = Exponent.getValueType().getTypeForEVT(*DAG.getContext());
7610 Entry.IsZExt = true;
7611 Args.push_back(Entry);
7612
7613 Type *LCRTy = Val.getValueType().getTypeForEVT(*DAG.getContext());
7614
7615 // In the in-chain to the call is the entry node If we are emitting a
7616 // tailcall, the chain will be mutated if the node has a non-entry input
7617 // chain.
7618 SDValue InChain = DAG.getEntryNode();
7619 SDValue TCChain = InChain;
7620
7621 const auto *F = DAG.getMachineFunction().getFunction();
7622 bool IsTC = TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
7623 F->getReturnType() == LCRTy;
7624 if (IsTC)
7625 InChain = TCChain;
7626
7627 TargetLowering::CallLoweringInfo CLI(DAG);
7628 CLI.setDebugLoc(dl)
7629 .setChain(InChain)
7630 .setCallee(CallingConv::ARM_AAPCS_VFP, LCRTy, Callee, std::move(Args))
7631 .setTailCall(IsTC);
7632 std::pair<SDValue, SDValue> CI = TLI.LowerCallTo(CLI);
7633
7634 // Return the chain (the DAG root) if it is a tail call
7635 return !CI.second.getNode() ? DAG.getRoot() : CI.first;
7636}
7637
7638SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
7639 switch (Op.getOpcode()) {
7640 default: llvm_unreachable("Don't know how to custom lower this!")::llvm::llvm_unreachable_internal("Don't know how to custom lower this!"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 7640)
;
7641 case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG);
7642 case ISD::ConstantPool:
7643 if (Subtarget->genExecuteOnly())
7644 llvm_unreachable("execute-only should not generate constant pools")::llvm::llvm_unreachable_internal("execute-only should not generate constant pools"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 7644)
;
7645 return LowerConstantPool(Op, DAG);
7646 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
7647 case ISD::GlobalAddress:
7648 switch (Subtarget->getTargetTriple().getObjectFormat()) {
7649 default: llvm_unreachable("unknown object format")::llvm::llvm_unreachable_internal("unknown object format", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 7649)
;
7650 case Triple::COFF:
7651 return LowerGlobalAddressWindows(Op, DAG);
7652 case Triple::ELF:
7653 return LowerGlobalAddressELF(Op, DAG);
7654 case Triple::MachO:
7655 return LowerGlobalAddressDarwin(Op, DAG);
7656 }
7657 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
7658 case ISD::SELECT: return LowerSELECT(Op, DAG);
7659 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
7660 case ISD::BR_CC: return LowerBR_CC(Op, DAG);
7661 case ISD::BR_JT: return LowerBR_JT(Op, DAG);
7662 case ISD::VASTART: return LowerVASTART(Op, DAG);
7663 case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget);
7664 case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget);
7665 case ISD::SINT_TO_FP:
7666 case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
7667 case ISD::FP_TO_SINT:
7668 case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
7669 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
7670 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
7671 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
7672 case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
7673 case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
7674 case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
7675 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
7676 Subtarget);
7677 case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG);
7678 case ISD::SHL:
7679 case ISD::SRL:
7680 case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget);
7681 case ISD::SREM: return LowerREM(Op.getNode(), DAG);
7682 case ISD::UREM: return LowerREM(Op.getNode(), DAG);
7683 case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
7684 case ISD::SRL_PARTS:
7685 case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
7686 case ISD::CTTZ:
7687 case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
7688 case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget);
7689 case ISD::SETCC: return LowerVSETCC(Op, DAG);
7690 case ISD::SETCCE: return LowerSETCCE(Op, DAG);
7691 case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);
7692 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
7693 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
7694 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
7695 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
7696 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
7697 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
7698 case ISD::MUL: return LowerMUL(Op, DAG);
7699 case ISD::SDIV:
7700 if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
7701 return LowerDIV_Windows(Op, DAG, /* Signed */ true);
7702 return LowerSDIV(Op, DAG);
7703 case ISD::UDIV:
7704 if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
7705 return LowerDIV_Windows(Op, DAG, /* Signed */ false);
7706 return LowerUDIV(Op, DAG);
7707 case ISD::ADDC:
7708 case ISD::ADDE:
7709 case ISD::SUBC:
7710 case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
7711 case ISD::SADDO:
7712 case ISD::UADDO:
7713 case ISD::SSUBO:
7714 case ISD::USUBO:
7715 return LowerXALUO(Op, DAG);
7716 case ISD::ATOMIC_LOAD:
7717 case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
7718 case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
7719 case ISD::SDIVREM:
7720 case ISD::UDIVREM: return LowerDivRem(Op, DAG);
7721 case ISD::DYNAMIC_STACKALLOC:
7722 if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
7723 return LowerDYNAMIC_STACKALLOC(Op, DAG);
7724 llvm_unreachable("Don't know how to custom lower this!")::llvm::llvm_unreachable_internal("Don't know how to custom lower this!"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/ARM/ARMISelLowering.cpp"
, 7724)
;
7725 case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
7726 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
7727 case ISD::FPOWI: return LowerFPOWI(Op, *Subtarget, DAG);
7728 case ARMISD::WIN__DBZCHK: return SDValue();
7729 }
7730}
7731
7732static void ReplaceLongIntrinsic(SDNode *N, SmallVectorImpl<SDValue> &Results,
7733 SelectionDAG &DAG) {
7734 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
7735 unsigned Opc = 0;
7736