Line data Source code
1 : //===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : // This file defines the interfaces that ARM uses to lower LLVM code into a
11 : // selection DAG.
12 : //
13 : //===----------------------------------------------------------------------===//
14 :
15 : #include "ARMISelLowering.h"
16 : #include "ARMBaseInstrInfo.h"
17 : #include "ARMBaseRegisterInfo.h"
18 : #include "ARMCallingConv.h"
19 : #include "ARMConstantPoolValue.h"
20 : #include "ARMMachineFunctionInfo.h"
21 : #include "ARMPerfectShuffle.h"
22 : #include "ARMRegisterInfo.h"
23 : #include "ARMSelectionDAGInfo.h"
24 : #include "ARMSubtarget.h"
25 : #include "MCTargetDesc/ARMAddressingModes.h"
26 : #include "MCTargetDesc/ARMBaseInfo.h"
27 : #include "Utils/ARMBaseInfo.h"
28 : #include "llvm/ADT/APFloat.h"
29 : #include "llvm/ADT/APInt.h"
30 : #include "llvm/ADT/ArrayRef.h"
31 : #include "llvm/ADT/BitVector.h"
32 : #include "llvm/ADT/DenseMap.h"
33 : #include "llvm/ADT/STLExtras.h"
34 : #include "llvm/ADT/SmallPtrSet.h"
35 : #include "llvm/ADT/SmallVector.h"
36 : #include "llvm/ADT/Statistic.h"
37 : #include "llvm/ADT/StringExtras.h"
38 : #include "llvm/ADT/StringRef.h"
39 : #include "llvm/ADT/StringSwitch.h"
40 : #include "llvm/ADT/Triple.h"
41 : #include "llvm/ADT/Twine.h"
42 : #include "llvm/Analysis/VectorUtils.h"
43 : #include "llvm/CodeGen/CallingConvLower.h"
44 : #include "llvm/CodeGen/ISDOpcodes.h"
45 : #include "llvm/CodeGen/IntrinsicLowering.h"
46 : #include "llvm/CodeGen/MachineBasicBlock.h"
47 : #include "llvm/CodeGen/MachineConstantPool.h"
48 : #include "llvm/CodeGen/MachineFrameInfo.h"
49 : #include "llvm/CodeGen/MachineFunction.h"
50 : #include "llvm/CodeGen/MachineInstr.h"
51 : #include "llvm/CodeGen/MachineInstrBuilder.h"
52 : #include "llvm/CodeGen/MachineJumpTableInfo.h"
53 : #include "llvm/CodeGen/MachineMemOperand.h"
54 : #include "llvm/CodeGen/MachineOperand.h"
55 : #include "llvm/CodeGen/MachineRegisterInfo.h"
56 : #include "llvm/CodeGen/RuntimeLibcalls.h"
57 : #include "llvm/CodeGen/SelectionDAG.h"
58 : #include "llvm/CodeGen/SelectionDAGNodes.h"
59 : #include "llvm/CodeGen/TargetInstrInfo.h"
60 : #include "llvm/CodeGen/TargetLowering.h"
61 : #include "llvm/CodeGen/TargetOpcodes.h"
62 : #include "llvm/CodeGen/TargetRegisterInfo.h"
63 : #include "llvm/CodeGen/TargetSubtargetInfo.h"
64 : #include "llvm/CodeGen/ValueTypes.h"
65 : #include "llvm/IR/Attributes.h"
66 : #include "llvm/IR/CallingConv.h"
67 : #include "llvm/IR/Constant.h"
68 : #include "llvm/IR/Constants.h"
69 : #include "llvm/IR/DataLayout.h"
70 : #include "llvm/IR/DebugLoc.h"
71 : #include "llvm/IR/DerivedTypes.h"
72 : #include "llvm/IR/Function.h"
73 : #include "llvm/IR/GlobalAlias.h"
74 : #include "llvm/IR/GlobalValue.h"
75 : #include "llvm/IR/GlobalVariable.h"
76 : #include "llvm/IR/IRBuilder.h"
77 : #include "llvm/IR/InlineAsm.h"
78 : #include "llvm/IR/Instruction.h"
79 : #include "llvm/IR/Instructions.h"
80 : #include "llvm/IR/IntrinsicInst.h"
81 : #include "llvm/IR/Intrinsics.h"
82 : #include "llvm/IR/Module.h"
83 : #include "llvm/IR/Type.h"
84 : #include "llvm/IR/User.h"
85 : #include "llvm/IR/Value.h"
86 : #include "llvm/MC/MCInstrDesc.h"
87 : #include "llvm/MC/MCInstrItineraries.h"
88 : #include "llvm/MC/MCRegisterInfo.h"
89 : #include "llvm/MC/MCSchedule.h"
90 : #include "llvm/Support/AtomicOrdering.h"
91 : #include "llvm/Support/BranchProbability.h"
92 : #include "llvm/Support/Casting.h"
93 : #include "llvm/Support/CodeGen.h"
94 : #include "llvm/Support/CommandLine.h"
95 : #include "llvm/Support/Compiler.h"
96 : #include "llvm/Support/Debug.h"
97 : #include "llvm/Support/ErrorHandling.h"
98 : #include "llvm/Support/KnownBits.h"
99 : #include "llvm/Support/MachineValueType.h"
100 : #include "llvm/Support/MathExtras.h"
101 : #include "llvm/Support/raw_ostream.h"
102 : #include "llvm/Target/TargetMachine.h"
103 : #include "llvm/Target/TargetOptions.h"
104 : #include <algorithm>
105 : #include <cassert>
106 : #include <cstdint>
107 : #include <cstdlib>
108 : #include <iterator>
109 : #include <limits>
110 : #include <string>
111 : #include <tuple>
112 : #include <utility>
113 : #include <vector>
114 :
115 : using namespace llvm;
116 :
117 : #define DEBUG_TYPE "arm-isel"
118 :
119 : STATISTIC(NumTailCalls, "Number of tail calls");
120 : STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
121 : STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
122 : STATISTIC(NumConstpoolPromoted,
123 : "Number of constants with their storage promoted into constant pools");
124 :
125 : static cl::opt<bool>
126 : ARMInterworking("arm-interworking", cl::Hidden,
127 : cl::desc("Enable / disable ARM interworking (for debugging only)"),
128 : cl::init(true));
129 :
130 : static cl::opt<bool> EnableConstpoolPromotion(
131 : "arm-promote-constant", cl::Hidden,
132 : cl::desc("Enable / disable promotion of unnamed_addr constants into "
133 : "constant pools"),
134 : cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
135 : static cl::opt<unsigned> ConstpoolPromotionMaxSize(
136 : "arm-promote-constant-max-size", cl::Hidden,
137 : cl::desc("Maximum size of constant to promote into a constant pool"),
138 : cl::init(64));
139 : static cl::opt<unsigned> ConstpoolPromotionMaxTotal(
140 : "arm-promote-constant-max-total", cl::Hidden,
141 : cl::desc("Maximum size of ALL constants to promote into a constant pool"),
142 : cl::init(128));
143 :
144 : // The APCS parameter registers.
145 : static const MCPhysReg GPRArgRegs[] = {
146 : ARM::R0, ARM::R1, ARM::R2, ARM::R3
147 : };
148 :
149 28648 : void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
150 : MVT PromotedBitwiseVT) {
151 28648 : if (VT != PromotedLdStVT) {
152 : setOperationAction(ISD::LOAD, VT, Promote);
153 : AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
154 :
155 : setOperationAction(ISD::STORE, VT, Promote);
156 : AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
157 : }
158 :
159 28648 : MVT ElemTy = VT.getVectorElementType();
160 28648 : if (ElemTy != MVT::f64)
161 : setOperationAction(ISD::SETCC, VT, Custom);
162 : setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
163 : setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
164 28648 : if (ElemTy == MVT::i32) {
165 : setOperationAction(ISD::SINT_TO_FP, VT, Custom);
166 : setOperationAction(ISD::UINT_TO_FP, VT, Custom);
167 : setOperationAction(ISD::FP_TO_SINT, VT, Custom);
168 : setOperationAction(ISD::FP_TO_UINT, VT, Custom);
169 : } else {
170 : setOperationAction(ISD::SINT_TO_FP, VT, Expand);
171 : setOperationAction(ISD::UINT_TO_FP, VT, Expand);
172 : setOperationAction(ISD::FP_TO_SINT, VT, Expand);
173 : setOperationAction(ISD::FP_TO_UINT, VT, Expand);
174 : }
175 : setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
176 : setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
177 : setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
178 : setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
179 : setOperationAction(ISD::SELECT, VT, Expand);
180 : setOperationAction(ISD::SELECT_CC, VT, Expand);
181 : setOperationAction(ISD::VSELECT, VT, Expand);
182 : setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
183 28648 : if (VT.isInteger()) {
184 : setOperationAction(ISD::SHL, VT, Custom);
185 : setOperationAction(ISD::SRA, VT, Custom);
186 : setOperationAction(ISD::SRL, VT, Custom);
187 : }
188 :
189 : // Promote all bit-wise operations.
190 28648 : if (VT.isInteger() && VT != PromotedBitwiseVT) {
191 : setOperationAction(ISD::AND, VT, Promote);
192 : AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
193 : setOperationAction(ISD::OR, VT, Promote);
194 : AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT);
195 : setOperationAction(ISD::XOR, VT, Promote);
196 : AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
197 : }
198 :
199 : // Neon does not support vector divide/remainder operations.
200 : setOperationAction(ISD::SDIV, VT, Expand);
201 : setOperationAction(ISD::UDIV, VT, Expand);
202 : setOperationAction(ISD::FDIV, VT, Expand);
203 : setOperationAction(ISD::SREM, VT, Expand);
204 : setOperationAction(ISD::UREM, VT, Expand);
205 : setOperationAction(ISD::FREM, VT, Expand);
206 :
207 20784 : if (!VT.isFloatingPoint() &&
208 46834 : VT != MVT::v2i64 && VT != MVT::v1i64)
209 93528 : for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
210 : setOperationAction(Opcode, VT, Legal);
211 28648 : }
212 :
213 13025 : void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
214 : addRegisterClass(VT, &ARM::DPRRegClass);
215 13025 : addTypeForNEON(VT, MVT::f64, MVT::v2i32);
216 13025 : }
217 :
218 15623 : void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
219 : addRegisterClass(VT, &ARM::DPairRegClass);
220 15623 : addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
221 15623 : }
222 :
223 5050 : ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
224 5050 : const ARMSubtarget &STI)
225 5050 : : TargetLowering(TM), Subtarget(&STI) {
226 5050 : RegInfo = Subtarget->getRegisterInfo();
227 5050 : Itins = Subtarget->getInstrItineraryData();
228 :
229 : setBooleanContents(ZeroOrOneBooleanContent);
230 : setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
231 :
232 5050 : if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
233 : !Subtarget->isTargetWatchOS()) {
234 4273 : bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard;
235 2012583 : for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
236 2008310 : setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
237 : IsHFTarget ? CallingConv::ARM_AAPCS_VFP
238 : : CallingConv::ARM_AAPCS);
239 : }
240 :
241 10100 : if (Subtarget->isTargetMachO()) {
242 : // Uses VFP for Thumb libfuncs if available.
243 469 : if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
244 1184 : Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
245 : static const struct {
246 : const RTLIB::Libcall Op;
247 : const char * const Name;
248 : const ISD::CondCode Cond;
249 : } LibraryCalls[] = {
250 : // Single-precision floating-point arithmetic.
251 : { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
252 : { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
253 : { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
254 : { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
255 :
256 : // Double-precision floating-point arithmetic.
257 : { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
258 : { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
259 : { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
260 : { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
261 :
262 : // Single-precision comparisons.
263 : { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
264 : { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
265 : { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
266 : { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
267 : { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
268 : { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
269 : { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
270 : { RTLIB::O_F32, "__unordsf2vfp", ISD::SETEQ },
271 :
272 : // Double-precision comparisons.
273 : { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
274 : { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
275 : { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
276 : { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
277 : { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
278 : { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
279 : { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
280 : { RTLIB::O_F64, "__unorddf2vfp", ISD::SETEQ },
281 :
282 : // Floating-point to integer conversions.
283 : // i64 conversions are done via library routines even when generating VFP
284 : // instructions, so use the same ones.
285 : { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
286 : { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
287 : { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
288 : { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
289 :
290 : // Conversions between floating types.
291 : { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
292 : { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
293 :
294 : // Integer to floating-point conversions.
295 : // i64 conversions are done via library routines even when generating VFP
296 : // instructions, so use the same ones.
297 : // FIXME: There appears to be some naming inconsistency in ARM libgcc:
298 : // e.g., __floatunsidf vs. __floatunssidfvfp.
299 : { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
300 : { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
301 : { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
302 : { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
303 : };
304 :
305 12390 : for (const auto &LC : LibraryCalls) {
306 12036 : setLibcallName(LC.Op, LC.Name);
307 12036 : if (LC.Cond != ISD::SETCC_INVALID)
308 5664 : setCmpLibcallCC(LC.Op, LC.Cond);
309 : }
310 : }
311 : }
312 :
313 : // These libcalls are not available in 32-bit.
314 : setLibcallName(RTLIB::SHL_I128, nullptr);
315 : setLibcallName(RTLIB::SRL_I128, nullptr);
316 : setLibcallName(RTLIB::SRA_I128, nullptr);
317 :
318 : // RTLIB
319 5050 : if (Subtarget->isAAPCS_ABI() &&
320 4301 : (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
321 1034 : Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
322 : static const struct {
323 : const RTLIB::Libcall Op;
324 : const char * const Name;
325 : const CallingConv::ID CC;
326 : const ISD::CondCode Cond;
327 : } LibraryCalls[] = {
328 : // Double-precision floating-point arithmetic helper functions
329 : // RTABI chapter 4.1.2, Table 2
330 : { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
331 : { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
332 : { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
333 : { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
334 :
335 : // Double-precision floating-point comparison helper functions
336 : // RTABI chapter 4.1.2, Table 3
337 : { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
338 : { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
339 : { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
340 : { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
341 : { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
342 : { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
343 : { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
344 : { RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
345 :
346 : // Single-precision floating-point arithmetic helper functions
347 : // RTABI chapter 4.1.2, Table 4
348 : { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
349 : { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
350 : { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
351 : { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
352 :
353 : // Single-precision floating-point comparison helper functions
354 : // RTABI chapter 4.1.2, Table 5
355 : { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
356 : { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
357 : { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
358 : { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
359 : { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
360 : { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
361 : { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
362 : { RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
363 :
364 : // Floating-point to integer conversions.
365 : // RTABI chapter 4.1.2, Table 6
366 : { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
367 : { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
368 : { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
369 : { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
370 : { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
371 : { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
372 : { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
373 : { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
374 :
375 : // Conversions between floating types.
376 : // RTABI chapter 4.1.2, Table 7
377 : { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
378 : { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
379 : { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
380 :
381 : // Integer to floating-point conversions.
382 : // RTABI chapter 4.1.2, Table 8
383 : { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
384 : { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
385 : { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
386 : { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
387 : { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
388 : { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
389 : { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
390 : { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
391 :
392 : // Long long helper functions
393 : // RTABI chapter 4.2, Table 9
394 : { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
395 : { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
396 : { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
397 : { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
398 :
399 : // Integer division functions
400 : // RTABI chapter 4.3.1
401 : { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
402 : { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
403 : { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
404 : { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
405 : { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
406 : { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
407 : { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
408 : { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
409 : };
410 :
411 188048 : for (const auto &LC : LibraryCalls) {
412 184690 : setLibcallName(LC.Op, LC.Name);
413 184690 : setLibcallCallingConv(LC.Op, LC.CC);
414 184690 : if (LC.Cond != ISD::SETCC_INVALID)
415 53728 : setCmpLibcallCC(LC.Op, LC.Cond);
416 : }
417 :
418 : // EABI dependent RTLIB
419 3358 : if (TM.Options.EABIVersion == EABI::EABI4 ||
420 : TM.Options.EABIVersion == EABI::EABI5) {
421 : static const struct {
422 : const RTLIB::Libcall Op;
423 : const char *const Name;
424 : const CallingConv::ID CC;
425 : const ISD::CondCode Cond;
426 : } MemOpsLibraryCalls[] = {
427 : // Memory operations
428 : // RTABI chapter 4.3.4
429 : { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
430 : { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
431 : { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
432 : };
433 :
434 8012 : for (const auto &LC : MemOpsLibraryCalls) {
435 6009 : setLibcallName(LC.Op, LC.Name);
436 6009 : setLibcallCallingConv(LC.Op, LC.CC);
437 6009 : if (LC.Cond != ISD::SETCC_INVALID)
438 0 : setCmpLibcallCC(LC.Op, LC.Cond);
439 : }
440 : }
441 : }
442 :
443 10100 : if (Subtarget->isTargetWindows()) {
444 : static const struct {
445 : const RTLIB::Libcall Op;
446 : const char * const Name;
447 : const CallingConv::ID CC;
448 : } LibraryCalls[] = {
449 : { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
450 : { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
451 : { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
452 : { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
453 : { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
454 : { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
455 : { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
456 : { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
457 : };
458 :
459 756 : for (const auto &LC : LibraryCalls) {
460 672 : setLibcallName(LC.Op, LC.Name);
461 672 : setLibcallCallingConv(LC.Op, LC.CC);
462 : }
463 : }
464 :
465 : // Use divmod compiler-rt calls for iOS 5.0 and later.
466 5050 : if (Subtarget->isTargetMachO() &&
467 382 : !(Subtarget->isTargetIOS() &&
468 382 : Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
469 : setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
470 : setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
471 : }
472 :
473 : // The half <-> float conversion functions are always soft-float on
474 : // non-watchos platforms, but are needed for some targets which use a
475 : // hard-float calling convention by default.
476 10100 : if (!Subtarget->isTargetWatchABI()) {
477 5025 : if (Subtarget->isAAPCS_ABI()) {
478 : setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
479 : setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
480 : setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
481 : } else {
482 : setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
483 : setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
484 : setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
485 : }
486 : }
487 :
488 : // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
489 : // a __gnu_ prefix (which is the default).
490 5050 : if (Subtarget->isTargetAEABI()) {
491 : static const struct {
492 : const RTLIB::Libcall Op;
493 : const char * const Name;
494 : const CallingConv::ID CC;
495 : } LibraryCalls[] = {
496 : { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
497 : { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
498 : { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
499 : };
500 :
501 7624 : for (const auto &LC : LibraryCalls) {
502 5718 : setLibcallName(LC.Op, LC.Name);
503 5718 : setLibcallCallingConv(LC.Op, LC.CC);
504 : }
505 : }
506 :
507 5050 : if (Subtarget->isThumb1Only())
508 : addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
509 : else
510 : addRegisterClass(MVT::i32, &ARM::GPRRegClass);
511 :
512 5050 : if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
513 3028 : !Subtarget->isThumb1Only()) {
514 : addRegisterClass(MVT::f32, &ARM::SPRRegClass);
515 : addRegisterClass(MVT::f64, &ARM::DPRRegClass);
516 : }
517 :
518 5050 : if (Subtarget->hasFullFP16()) {
519 : addRegisterClass(MVT::f16, &ARM::HPRRegClass);
520 : setOperationAction(ISD::BITCAST, MVT::i16, Custom);
521 : setOperationAction(ISD::BITCAST, MVT::i32, Custom);
522 : setOperationAction(ISD::BITCAST, MVT::f16, Custom);
523 :
524 : setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
525 : setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
526 : }
527 :
528 484800 : for (MVT VT : MVT::vector_valuetypes()) {
529 46056000 : for (MVT InnerVT : MVT::vector_valuetypes()) {
530 : setTruncStoreAction(VT, InnerVT, Expand);
531 : setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
532 : setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
533 : setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
534 : }
535 :
536 : setOperationAction(ISD::MULHS, VT, Expand);
537 : setOperationAction(ISD::SMUL_LOHI, VT, Expand);
538 : setOperationAction(ISD::MULHU, VT, Expand);
539 : setOperationAction(ISD::UMUL_LOHI, VT, Expand);
540 :
541 : setOperationAction(ISD::BSWAP, VT, Expand);
542 : }
543 :
544 : setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
545 : setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
546 :
547 : setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom);
548 : setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom);
549 :
550 5050 : if (Subtarget->hasNEON()) {
551 2598 : addDRTypeForNEON(MVT::v2f32);
552 2598 : addDRTypeForNEON(MVT::v8i8);
553 2598 : addDRTypeForNEON(MVT::v4i16);
554 2598 : addDRTypeForNEON(MVT::v2i32);
555 2598 : addDRTypeForNEON(MVT::v1i64);
556 :
557 2598 : addQRTypeForNEON(MVT::v4f32);
558 2598 : addQRTypeForNEON(MVT::v2f64);
559 2598 : addQRTypeForNEON(MVT::v16i8);
560 2598 : addQRTypeForNEON(MVT::v8i16);
561 2598 : addQRTypeForNEON(MVT::v4i32);
562 2598 : addQRTypeForNEON(MVT::v2i64);
563 :
564 2598 : if (Subtarget->hasFullFP16()) {
565 35 : addQRTypeForNEON(MVT::v8f16);
566 35 : addDRTypeForNEON(MVT::v4f16);
567 : }
568 :
569 : // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
570 : // neither Neon nor VFP support any arithmetic operations on it.
571 : // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
572 : // supported for v4f32.
573 : setOperationAction(ISD::FADD, MVT::v2f64, Expand);
574 : setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
575 : setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
576 : // FIXME: Code duplication: FDIV and FREM are expanded always, see
577 : // ARMTargetLowering::addTypeForNEON method for details.
578 : setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
579 : setOperationAction(ISD::FREM, MVT::v2f64, Expand);
580 : // FIXME: Create unittest.
581 : // In another words, find a way when "copysign" appears in DAG with vector
582 : // operands.
583 : setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
584 : // FIXME: Code duplication: SETCC has custom operation action, see
585 : // ARMTargetLowering::addTypeForNEON method for details.
586 : setOperationAction(ISD::SETCC, MVT::v2f64, Expand);
587 : // FIXME: Create unittest for FNEG and for FABS.
588 : setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
589 : setOperationAction(ISD::FABS, MVT::v2f64, Expand);
590 : setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
591 : setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
592 : setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
593 : setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
594 : setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
595 : setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
596 : setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
597 : setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
598 : setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
599 : // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
600 : setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
601 : setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
602 : setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
603 : setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
604 : setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
605 : setOperationAction(ISD::FMA, MVT::v2f64, Expand);
606 :
607 : setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
608 : setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
609 : setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
610 : setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
611 : setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
612 : setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
613 : setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
614 : setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
615 : setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
616 : setOperationAction(ISD::FCEIL, MVT::v4f32, Expand);
617 : setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand);
618 : setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
619 : setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
620 : setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand);
621 :
622 : // Mark v2f32 intrinsics.
623 : setOperationAction(ISD::FSQRT, MVT::v2f32, Expand);
624 : setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
625 : setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
626 : setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
627 : setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
628 : setOperationAction(ISD::FLOG2, MVT::v2f32, Expand);
629 : setOperationAction(ISD::FLOG10, MVT::v2f32, Expand);
630 : setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
631 : setOperationAction(ISD::FEXP2, MVT::v2f32, Expand);
632 : setOperationAction(ISD::FCEIL, MVT::v2f32, Expand);
633 : setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand);
634 : setOperationAction(ISD::FRINT, MVT::v2f32, Expand);
635 : setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand);
636 : setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand);
637 :
638 : // Neon does not support some operations on v1i64 and v2i64 types.
639 : setOperationAction(ISD::MUL, MVT::v1i64, Expand);
640 : // Custom handling for some quad-vector types to detect VMULL.
641 : setOperationAction(ISD::MUL, MVT::v8i16, Custom);
642 : setOperationAction(ISD::MUL, MVT::v4i32, Custom);
643 : setOperationAction(ISD::MUL, MVT::v2i64, Custom);
644 : // Custom handling for some vector types to avoid expensive expansions
645 : setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
646 : setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
647 : setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
648 : setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
649 : // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
650 : // a destination type that is wider than the source, and nor does
651 : // it have a FP_TO_[SU]INT instruction with a narrower destination than
652 : // source.
653 : setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
654 : setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);
655 : setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
656 : setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
657 : setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
658 : setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Custom);
659 : setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
660 : setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
661 :
662 : setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand);
663 : setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
664 :
665 : // NEON does not have single instruction CTPOP for vectors with element
666 : // types wider than 8-bits. However, custom lowering can leverage the
667 : // v8i8/v16i8 vcnt instruction.
668 : setOperationAction(ISD::CTPOP, MVT::v2i32, Custom);
669 : setOperationAction(ISD::CTPOP, MVT::v4i32, Custom);
670 : setOperationAction(ISD::CTPOP, MVT::v4i16, Custom);
671 : setOperationAction(ISD::CTPOP, MVT::v8i16, Custom);
672 : setOperationAction(ISD::CTPOP, MVT::v1i64, Custom);
673 : setOperationAction(ISD::CTPOP, MVT::v2i64, Custom);
674 :
675 : setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
676 : setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
677 :
678 : // NEON does not have single instruction CTTZ for vectors.
679 : setOperationAction(ISD::CTTZ, MVT::v8i8, Custom);
680 : setOperationAction(ISD::CTTZ, MVT::v4i16, Custom);
681 : setOperationAction(ISD::CTTZ, MVT::v2i32, Custom);
682 : setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
683 :
684 : setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);
685 : setOperationAction(ISD::CTTZ, MVT::v8i16, Custom);
686 : setOperationAction(ISD::CTTZ, MVT::v4i32, Custom);
687 : setOperationAction(ISD::CTTZ, MVT::v2i64, Custom);
688 :
689 : setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i8, Custom);
690 : setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i16, Custom);
691 : setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i32, Custom);
692 : setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v1i64, Custom);
693 :
694 : setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i8, Custom);
695 : setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i16, Custom);
696 : setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom);
697 : setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom);
698 :
699 : // NEON only has FMA instructions as of VFP4.
700 2598 : if (!Subtarget->hasVFP4()) {
701 : setOperationAction(ISD::FMA, MVT::v2f32, Expand);
702 : setOperationAction(ISD::FMA, MVT::v4f32, Expand);
703 : }
704 :
705 : setTargetDAGCombine(ISD::INTRINSIC_VOID);
706 : setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
707 : setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
708 : setTargetDAGCombine(ISD::SHL);
709 : setTargetDAGCombine(ISD::SRL);
710 : setTargetDAGCombine(ISD::SRA);
711 : setTargetDAGCombine(ISD::SIGN_EXTEND);
712 : setTargetDAGCombine(ISD::ZERO_EXTEND);
713 : setTargetDAGCombine(ISD::ANY_EXTEND);
714 : setTargetDAGCombine(ISD::BUILD_VECTOR);
715 : setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
716 : setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
717 : setTargetDAGCombine(ISD::STORE);
718 : setTargetDAGCombine(ISD::FP_TO_SINT);
719 : setTargetDAGCombine(ISD::FP_TO_UINT);
720 : setTargetDAGCombine(ISD::FDIV);
721 : setTargetDAGCombine(ISD::LOAD);
722 :
723 : // It is legal to extload from v4i8 to v4i16 or v4i32.
724 15588 : for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16,
725 18186 : MVT::v2i32}) {
726 1122336 : for (MVT VT : MVT::integer_vector_valuetypes()) {
727 : setLoadExtAction(ISD::EXTLOAD, VT, Ty, Legal);
728 : setLoadExtAction(ISD::ZEXTLOAD, VT, Ty, Legal);
729 : setLoadExtAction(ISD::SEXTLOAD, VT, Ty, Legal);
730 : }
731 : }
732 : }
733 :
734 5050 : if (Subtarget->isFPOnlySP()) {
735 : // When targeting a floating-point unit with only single-precision
736 : // operations, f64 is legal for the few double-precision instructions which
737 : // are present However, no double-precision operations other than moves,
738 : // loads and stores are provided by the hardware.
739 : setOperationAction(ISD::FADD, MVT::f64, Expand);
740 : setOperationAction(ISD::FSUB, MVT::f64, Expand);
741 : setOperationAction(ISD::FMUL, MVT::f64, Expand);
742 : setOperationAction(ISD::FMA, MVT::f64, Expand);
743 : setOperationAction(ISD::FDIV, MVT::f64, Expand);
744 : setOperationAction(ISD::FREM, MVT::f64, Expand);
745 : setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
746 : setOperationAction(ISD::FGETSIGN, MVT::f64, Expand);
747 : setOperationAction(ISD::FNEG, MVT::f64, Expand);
748 : setOperationAction(ISD::FABS, MVT::f64, Expand);
749 : setOperationAction(ISD::FSQRT, MVT::f64, Expand);
750 : setOperationAction(ISD::FSIN, MVT::f64, Expand);
751 : setOperationAction(ISD::FCOS, MVT::f64, Expand);
752 : setOperationAction(ISD::FPOW, MVT::f64, Expand);
753 : setOperationAction(ISD::FLOG, MVT::f64, Expand);
754 : setOperationAction(ISD::FLOG2, MVT::f64, Expand);
755 : setOperationAction(ISD::FLOG10, MVT::f64, Expand);
756 : setOperationAction(ISD::FEXP, MVT::f64, Expand);
757 : setOperationAction(ISD::FEXP2, MVT::f64, Expand);
758 : setOperationAction(ISD::FCEIL, MVT::f64, Expand);
759 : setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
760 : setOperationAction(ISD::FRINT, MVT::f64, Expand);
761 : setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
762 : setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
763 : setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
764 : setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
765 : setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
766 : setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
767 : setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom);
768 : setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom);
769 : setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
770 : setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
771 : }
772 :
773 5050 : computeRegisterProperties(Subtarget->getRegisterInfo());
774 :
775 : // ARM does not have floating-point extending loads.
776 35350 : for (MVT VT : MVT::fp_valuetypes()) {
777 : setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
778 : setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
779 : }
780 :
781 : // ... or truncating stores
782 : setTruncStoreAction(MVT::f64, MVT::f32, Expand);
783 : setTruncStoreAction(MVT::f32, MVT::f16, Expand);
784 : setTruncStoreAction(MVT::f64, MVT::f16, Expand);
785 :
786 : // ARM does not have i1 sign extending load.
787 35350 : for (MVT VT : MVT::integer_valuetypes())
788 : setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
789 :
790 : // ARM supports all 4 flavors of integer indexed load / store.
791 5050 : if (!Subtarget->isThumb1Only()) {
792 18176 : for (unsigned im = (unsigned)ISD::PRE_INC;
793 22720 : im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
794 : setIndexedLoadAction(im, MVT::i1, Legal);
795 : setIndexedLoadAction(im, MVT::i8, Legal);
796 : setIndexedLoadAction(im, MVT::i16, Legal);
797 : setIndexedLoadAction(im, MVT::i32, Legal);
798 : setIndexedStoreAction(im, MVT::i1, Legal);
799 : setIndexedStoreAction(im, MVT::i8, Legal);
800 : setIndexedStoreAction(im, MVT::i16, Legal);
801 : setIndexedStoreAction(im, MVT::i32, Legal);
802 : }
803 : } else {
804 : // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
805 : setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal);
806 : setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal);
807 : }
808 :
809 : setOperationAction(ISD::SADDO, MVT::i32, Custom);
810 : setOperationAction(ISD::UADDO, MVT::i32, Custom);
811 : setOperationAction(ISD::SSUBO, MVT::i32, Custom);
812 : setOperationAction(ISD::USUBO, MVT::i32, Custom);
813 :
814 : setOperationAction(ISD::ADDCARRY, MVT::i32, Custom);
815 : setOperationAction(ISD::SUBCARRY, MVT::i32, Custom);
816 :
817 : // i64 operation support.
818 : setOperationAction(ISD::MUL, MVT::i64, Expand);
819 : setOperationAction(ISD::MULHU, MVT::i32, Expand);
820 5050 : if (Subtarget->isThumb1Only()) {
821 : setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
822 : setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
823 : }
824 6607 : if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
825 1557 : || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
826 : setOperationAction(ISD::MULHS, MVT::i32, Expand);
827 :
828 : setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
829 : setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
830 : setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
831 : setOperationAction(ISD::SRL, MVT::i64, Custom);
832 : setOperationAction(ISD::SRA, MVT::i64, Custom);
833 : setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
834 :
835 : // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1.
836 5050 : if (Subtarget->isThumb1Only()) {
837 : setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
838 : setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
839 : setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
840 : }
841 :
842 6607 : if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
843 : setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
844 :
845 : // ARM does not have ROTL.
846 : setOperationAction(ISD::ROTL, MVT::i32, Expand);
847 484800 : for (MVT VT : MVT::vector_valuetypes()) {
848 : setOperationAction(ISD::ROTL, VT, Expand);
849 : setOperationAction(ISD::ROTR, VT, Expand);
850 : }
851 : setOperationAction(ISD::CTTZ, MVT::i32, Custom);
852 : setOperationAction(ISD::CTPOP, MVT::i32, Expand);
853 5050 : if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
854 : setOperationAction(ISD::CTLZ, MVT::i32, Expand);
855 : setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, LibCall);
856 : }
857 :
858 : // @llvm.readcyclecounter requires the Performance Monitors extension.
859 : // Default to the 0 expansion on unsupported platforms.
860 : // FIXME: Technically there are older ARM CPUs that have
861 : // implementation-specific ways of obtaining this information.
862 5050 : if (Subtarget->hasPerfMon())
863 : setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
864 :
865 : // Only ARMv6 has BSWAP.
866 5050 : if (!Subtarget->hasV6Ops())
867 : setOperationAction(ISD::BSWAP, MVT::i32, Expand);
868 :
869 5050 : bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
870 2987 : : Subtarget->hasDivideInARMMode();
871 5050 : if (!hasDivide) {
872 : // These are expanded into libcalls if the cpu doesn't have HW divider.
873 : setOperationAction(ISD::SDIV, MVT::i32, LibCall);
874 : setOperationAction(ISD::UDIV, MVT::i32, LibCall);
875 : }
876 :
877 5050 : if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
878 : setOperationAction(ISD::SDIV, MVT::i32, Custom);
879 : setOperationAction(ISD::UDIV, MVT::i32, Custom);
880 :
881 : setOperationAction(ISD::SDIV, MVT::i64, Custom);
882 : setOperationAction(ISD::UDIV, MVT::i64, Custom);
883 : }
884 :
885 : setOperationAction(ISD::SREM, MVT::i32, Expand);
886 : setOperationAction(ISD::UREM, MVT::i32, Expand);
887 :
888 : // Register based DivRem for AEABI (RTABI 4.2)
889 8194 : if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
890 1690 : Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
891 : Subtarget->isTargetWindows()) {
892 : setOperationAction(ISD::SREM, MVT::i64, Custom);
893 : setOperationAction(ISD::UREM, MVT::i64, Custom);
894 3444 : HasStandaloneRem = false;
895 :
896 3444 : if (Subtarget->isTargetWindows()) {
897 : const struct {
898 : const RTLIB::Libcall Op;
899 : const char * const Name;
900 : const CallingConv::ID CC;
901 84 : } LibraryCalls[] = {
902 : { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
903 : { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
904 : { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
905 : { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
906 :
907 : { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
908 : { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
909 : { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
910 : { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
911 : };
912 :
913 756 : for (const auto &LC : LibraryCalls) {
914 672 : setLibcallName(LC.Op, LC.Name);
915 672 : setLibcallCallingConv(LC.Op, LC.CC);
916 : }
917 : } else {
918 : const struct {
919 : const RTLIB::Libcall Op;
920 : const char * const Name;
921 : const CallingConv::ID CC;
922 3360 : } LibraryCalls[] = {
923 : { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
924 : { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
925 : { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
926 : { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
927 :
928 : { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
929 : { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
930 : { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
931 : { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
932 : };
933 :
934 30240 : for (const auto &LC : LibraryCalls) {
935 26880 : setLibcallName(LC.Op, LC.Name);
936 26880 : setLibcallCallingConv(LC.Op, LC.CC);
937 : }
938 : }
939 :
940 : setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
941 : setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
942 : setOperationAction(ISD::SDIVREM, MVT::i64, Custom);
943 : setOperationAction(ISD::UDIVREM, MVT::i64, Custom);
944 : } else {
945 : setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
946 : setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
947 : }
948 :
949 10100 : if (Subtarget->isTargetWindows() && Subtarget->getTargetTriple().isOSMSVCRT())
950 252 : for (auto &VT : {MVT::f32, MVT::f64})
951 168 : setOperationAction(ISD::FPOWI, VT, Custom);
952 :
953 : setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
954 : setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
955 : setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
956 : setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
957 :
958 : setOperationAction(ISD::TRAP, MVT::Other, Legal);
959 :
960 : // Use the default implementation.
961 : setOperationAction(ISD::VASTART, MVT::Other, Custom);
962 : setOperationAction(ISD::VAARG, MVT::Other, Expand);
963 : setOperationAction(ISD::VACOPY, MVT::Other, Expand);
964 : setOperationAction(ISD::VAEND, MVT::Other, Expand);
965 : setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
966 : setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
967 :
968 10100 : if (Subtarget->isTargetWindows())
969 : setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
970 : else
971 : setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
972 :
973 : // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
974 : // the default expansion.
975 5050 : InsertFencesForAtomic = false;
976 3428 : if (Subtarget->hasAnyDataBarrier() &&
977 3428 : (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
978 : // ATOMIC_FENCE needs custom lowering; the others should have been expanded
979 : // to ldrex/strex loops already.
980 : setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
981 3217 : if (!Subtarget->isThumb() || !Subtarget->isMClass())
982 : setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
983 :
984 : // On v8, we have particularly efficient implementations of atomic fences
985 : // if they can be combined with nearby atomic loads and stores.
986 3217 : if (!Subtarget->hasV8Ops() || getTargetMachine().getOptLevel() == 0) {
987 : // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
988 2894 : InsertFencesForAtomic = true;
989 : }
990 : } else {
991 : // If there's anything we can use as a barrier, go through custom lowering
992 : // for ATOMIC_FENCE.
993 : // If target has DMB in thumb, Fences can be inserted.
994 1833 : if (Subtarget->hasDataBarrier())
995 211 : InsertFencesForAtomic = true;
996 :
997 : setOperationAction(ISD::ATOMIC_FENCE, MVT::Other,
998 : Subtarget->hasAnyDataBarrier() ? Custom : Expand);
999 :
1000 : // Set them all for expansion, which will force libcalls.
1001 : setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
1002 : setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
1003 : setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
1004 : setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
1005 : setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
1006 : setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
1007 : setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
1008 : setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
1009 : setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
1010 : setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
1011 : setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
1012 : setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
1013 : // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
1014 : // Unordered/Monotonic case.
1015 1833 : if (!InsertFencesForAtomic) {
1016 : setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
1017 : setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
1018 : }
1019 : }
1020 :
1021 : setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
1022 :
1023 : // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1024 5050 : if (!Subtarget->hasV6Ops()) {
1025 : setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
1026 : setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
1027 : }
1028 : setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
1029 :
1030 5050 : if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
1031 3028 : !Subtarget->isThumb1Only()) {
1032 : // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1033 : // iff target supports vfp2.
1034 : setOperationAction(ISD::BITCAST, MVT::i64, Custom);
1035 : setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
1036 : }
1037 :
1038 : // We want to custom lower some of our intrinsics.
1039 : setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
1040 : setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
1041 : setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
1042 : setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
1043 5050 : if (Subtarget->useSjLjEH())
1044 : setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1045 :
1046 : setOperationAction(ISD::SETCC, MVT::i32, Expand);
1047 : setOperationAction(ISD::SETCC, MVT::f32, Expand);
1048 : setOperationAction(ISD::SETCC, MVT::f64, Expand);
1049 : setOperationAction(ISD::SELECT, MVT::i32, Custom);
1050 : setOperationAction(ISD::SELECT, MVT::f32, Custom);
1051 : setOperationAction(ISD::SELECT, MVT::f64, Custom);
1052 : setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
1053 : setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
1054 : setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
1055 5050 : if (Subtarget->hasFullFP16()) {
1056 : setOperationAction(ISD::SETCC, MVT::f16, Expand);
1057 : setOperationAction(ISD::SELECT, MVT::f16, Custom);
1058 : setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
1059 : }
1060 :
1061 : setOperationAction(ISD::SETCCCARRY, MVT::i32, Custom);
1062 :
1063 : setOperationAction(ISD::BRCOND, MVT::Other, Custom);
1064 : setOperationAction(ISD::BR_CC, MVT::i32, Custom);
1065 5050 : if (Subtarget->hasFullFP16())
1066 : setOperationAction(ISD::BR_CC, MVT::f16, Custom);
1067 : setOperationAction(ISD::BR_CC, MVT::f32, Custom);
1068 : setOperationAction(ISD::BR_CC, MVT::f64, Custom);
1069 : setOperationAction(ISD::BR_JT, MVT::Other, Custom);
1070 :
1071 : // We don't support sin/cos/fmod/copysign/pow
1072 : setOperationAction(ISD::FSIN, MVT::f64, Expand);
1073 : setOperationAction(ISD::FSIN, MVT::f32, Expand);
1074 : setOperationAction(ISD::FCOS, MVT::f32, Expand);
1075 : setOperationAction(ISD::FCOS, MVT::f64, Expand);
1076 : setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
1077 : setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
1078 : setOperationAction(ISD::FREM, MVT::f64, Expand);
1079 : setOperationAction(ISD::FREM, MVT::f32, Expand);
1080 5050 : if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
1081 3028 : !Subtarget->isThumb1Only()) {
1082 : setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
1083 : setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
1084 : }
1085 : setOperationAction(ISD::FPOW, MVT::f64, Expand);
1086 : setOperationAction(ISD::FPOW, MVT::f32, Expand);
1087 :
1088 5050 : if (!Subtarget->hasVFP4()) {
1089 : setOperationAction(ISD::FMA, MVT::f64, Expand);
1090 : setOperationAction(ISD::FMA, MVT::f32, Expand);
1091 : }
1092 :
1093 : // Various VFP goodness
1094 5050 : if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1095 : // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1096 4507 : if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) {
1097 : setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
1098 : setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
1099 : }
1100 :
1101 : // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1102 4507 : if (!Subtarget->hasFP16()) {
1103 : setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
1104 : setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
1105 : }
1106 : }
1107 :
1108 : // Use __sincos_stret if available.
1109 5050 : if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1110 : getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1111 : setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
1112 : setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
1113 : }
1114 :
1115 : // FP-ARMv8 implements a lot of rounding-like FP operations.
1116 5050 : if (Subtarget->hasFPARMv8()) {
1117 : setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
1118 : setOperationAction(ISD::FCEIL, MVT::f32, Legal);
1119 : setOperationAction(ISD::FROUND, MVT::f32, Legal);
1120 : setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
1121 : setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
1122 : setOperationAction(ISD::FRINT, MVT::f32, Legal);
1123 : setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
1124 : setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
1125 : setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal);
1126 : setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal);
1127 : setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
1128 : setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
1129 :
1130 465 : if (!Subtarget->isFPOnlySP()) {
1131 : setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
1132 : setOperationAction(ISD::FCEIL, MVT::f64, Legal);
1133 : setOperationAction(ISD::FROUND, MVT::f64, Legal);
1134 : setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
1135 : setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
1136 : setOperationAction(ISD::FRINT, MVT::f64, Legal);
1137 : setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
1138 : setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
1139 : }
1140 : }
1141 :
1142 5050 : if (Subtarget->hasNEON()) {
1143 : // vmin and vmax aren't available in a scalar form, so we use
1144 : // a NEON instruction with an undef lane instead.
1145 : setOperationAction(ISD::FMINNAN, MVT::f16, Legal);
1146 : setOperationAction(ISD::FMAXNAN, MVT::f16, Legal);
1147 : setOperationAction(ISD::FMINNAN, MVT::f32, Legal);
1148 : setOperationAction(ISD::FMAXNAN, MVT::f32, Legal);
1149 : setOperationAction(ISD::FMINNAN, MVT::v2f32, Legal);
1150 : setOperationAction(ISD::FMAXNAN, MVT::v2f32, Legal);
1151 : setOperationAction(ISD::FMINNAN, MVT::v4f32, Legal);
1152 : setOperationAction(ISD::FMAXNAN, MVT::v4f32, Legal);
1153 :
1154 2598 : if (Subtarget->hasFullFP16()) {
1155 : setOperationAction(ISD::FMINNUM, MVT::v4f16, Legal);
1156 : setOperationAction(ISD::FMAXNUM, MVT::v4f16, Legal);
1157 : setOperationAction(ISD::FMINNUM, MVT::v8f16, Legal);
1158 : setOperationAction(ISD::FMAXNUM, MVT::v8f16, Legal);
1159 :
1160 : setOperationAction(ISD::FMINNAN, MVT::v4f16, Legal);
1161 : setOperationAction(ISD::FMAXNAN, MVT::v4f16, Legal);
1162 : setOperationAction(ISD::FMINNAN, MVT::v8f16, Legal);
1163 : setOperationAction(ISD::FMAXNAN, MVT::v8f16, Legal);
1164 : }
1165 : }
1166 :
1167 : // We have target-specific dag combine patterns for the following nodes:
1168 : // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1169 : setTargetDAGCombine(ISD::ADD);
1170 : setTargetDAGCombine(ISD::SUB);
1171 : setTargetDAGCombine(ISD::MUL);
1172 : setTargetDAGCombine(ISD::AND);
1173 : setTargetDAGCombine(ISD::OR);
1174 : setTargetDAGCombine(ISD::XOR);
1175 :
1176 5050 : if (Subtarget->hasV6Ops())
1177 : setTargetDAGCombine(ISD::SRL);
1178 :
1179 : setStackPointerRegisterToSaveRestore(ARM::SP);
1180 :
1181 5050 : if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1182 4507 : !Subtarget->hasVFP2())
1183 : setSchedulingPreference(Sched::RegPressure);
1184 : else
1185 : setSchedulingPreference(Sched::Hybrid);
1186 :
1187 : //// temporary - rewrite interface to use type
1188 5050 : MaxStoresPerMemset = 8;
1189 5050 : MaxStoresPerMemsetOptSize = 4;
1190 5050 : MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1191 5050 : MaxStoresPerMemcpyOptSize = 2;
1192 5050 : MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1193 5050 : MaxStoresPerMemmoveOptSize = 2;
1194 :
1195 : // On ARM arguments smaller than 4 bytes are extended, so all arguments
1196 : // are at least 4 bytes aligned.
1197 : setMinStackArgumentAlignment(4);
1198 :
1199 : // Prefer likely predicted branches to selects on out-of-order cores.
1200 5050 : PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1201 :
1202 5050 : setPrefLoopAlignment(Subtarget->getPrefLoopAlignment());
1203 :
1204 5050 : setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
1205 5050 : }
1206 :
1207 4386 : bool ARMTargetLowering::useSoftFloat() const {
1208 4386 : return Subtarget->useSoftFloat();
1209 : }
1210 :
1211 : // FIXME: It might make sense to define the representative register class as the
1212 : // nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1213 : // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1214 : // SPR's representative would be DPR_VFP2. This should work well if register
1215 : // pressure tracking were modified such that a register use would increment the
1216 : // pressure of the register class's representative and all of it's super
1217 : // classes' representatives transitively. We have not implemented this because
1218 : // of the difficulty prior to coalescing of modeling operand register classes
1219 : // due to the common occurrence of cross class copies and subregister insertions
1220 : // and extractions.
1221 : std::pair<const TargetRegisterClass *, uint8_t>
1222 575700 : ARMTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
1223 : MVT VT) const {
1224 : const TargetRegisterClass *RRC = nullptr;
1225 : uint8_t Cost = 1;
1226 575700 : switch (VT.SimpleTy) {
1227 499950 : default:
1228 499950 : return TargetLowering::findRepresentativeClass(TRI, VT);
1229 : // Use DPR as representative register class for all floating point
1230 : // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1231 : // the cost is 1 for both f32 and f64.
1232 35350 : case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1233 : case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1234 : RRC = &ARM::DPRRegClass;
1235 : // When NEON is used for SP, only half of the register file is available
1236 : // because operations that define both SP and DP results will be constrained
1237 : // to the VFP2 class (D0-D15). We currently model this constraint prior to
1238 : // coalescing by double-counting the SP regs. See the FIXME above.
1239 35350 : if (Subtarget->useNEONForSinglePrecisionFP())
1240 : Cost = 2;
1241 : break;
1242 : case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1243 : case MVT::v4f32: case MVT::v2f64:
1244 : RRC = &ARM::DPRRegClass;
1245 : Cost = 2;
1246 : break;
1247 5050 : case MVT::v4i64:
1248 : RRC = &ARM::DPRRegClass;
1249 : Cost = 4;
1250 5050 : break;
1251 5050 : case MVT::v8i64:
1252 : RRC = &ARM::DPRRegClass;
1253 : Cost = 8;
1254 5050 : break;
1255 : }
1256 : return std::make_pair(RRC, Cost);
1257 : }
1258 :
1259 0 : const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1260 0 : switch ((ARMISD::NodeType)Opcode) {
1261 : case ARMISD::FIRST_NUMBER: break;
1262 : case ARMISD::Wrapper: return "ARMISD::Wrapper";
1263 0 : case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC";
1264 0 : case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
1265 0 : case ARMISD::COPY_STRUCT_BYVAL: return "ARMISD::COPY_STRUCT_BYVAL";
1266 0 : case ARMISD::CALL: return "ARMISD::CALL";
1267 0 : case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
1268 0 : case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
1269 0 : case ARMISD::BRCOND: return "ARMISD::BRCOND";
1270 0 : case ARMISD::BR_JT: return "ARMISD::BR_JT";
1271 0 : case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
1272 0 : case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
1273 0 : case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG";
1274 0 : case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
1275 0 : case ARMISD::CMP: return "ARMISD::CMP";
1276 0 : case ARMISD::CMN: return "ARMISD::CMN";
1277 0 : case ARMISD::CMPZ: return "ARMISD::CMPZ";
1278 0 : case ARMISD::CMPFP: return "ARMISD::CMPFP";
1279 0 : case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
1280 0 : case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
1281 0 : case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
1282 :
1283 0 : case ARMISD::CMOV: return "ARMISD::CMOV";
1284 :
1285 0 : case ARMISD::SSAT: return "ARMISD::SSAT";
1286 0 : case ARMISD::USAT: return "ARMISD::USAT";
1287 :
1288 0 : case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
1289 0 : case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
1290 0 : case ARMISD::RRX: return "ARMISD::RRX";
1291 :
1292 0 : case ARMISD::ADDC: return "ARMISD::ADDC";
1293 0 : case ARMISD::ADDE: return "ARMISD::ADDE";
1294 0 : case ARMISD::SUBC: return "ARMISD::SUBC";
1295 0 : case ARMISD::SUBE: return "ARMISD::SUBE";
1296 :
1297 0 : case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
1298 0 : case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
1299 0 : case ARMISD::VMOVhr: return "ARMISD::VMOVhr";
1300 0 : case ARMISD::VMOVrh: return "ARMISD::VMOVrh";
1301 0 : case ARMISD::VMOVSR: return "ARMISD::VMOVSR";
1302 :
1303 0 : case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
1304 0 : case ARMISD::EH_SJLJ_LONGJMP: return "ARMISD::EH_SJLJ_LONGJMP";
1305 0 : case ARMISD::EH_SJLJ_SETUP_DISPATCH: return "ARMISD::EH_SJLJ_SETUP_DISPATCH";
1306 :
1307 0 : case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
1308 :
1309 0 : case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
1310 :
1311 0 : case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
1312 :
1313 0 : case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
1314 :
1315 0 : case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
1316 :
1317 0 : case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK";
1318 0 : case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK";
1319 :
1320 0 : case ARMISD::VCEQ: return "ARMISD::VCEQ";
1321 0 : case ARMISD::VCEQZ: return "ARMISD::VCEQZ";
1322 0 : case ARMISD::VCGE: return "ARMISD::VCGE";
1323 0 : case ARMISD::VCGEZ: return "ARMISD::VCGEZ";
1324 0 : case ARMISD::VCLEZ: return "ARMISD::VCLEZ";
1325 0 : case ARMISD::VCGEU: return "ARMISD::VCGEU";
1326 0 : case ARMISD::VCGT: return "ARMISD::VCGT";
1327 0 : case ARMISD::VCGTZ: return "ARMISD::VCGTZ";
1328 0 : case ARMISD::VCLTZ: return "ARMISD::VCLTZ";
1329 0 : case ARMISD::VCGTU: return "ARMISD::VCGTU";
1330 0 : case ARMISD::VTST: return "ARMISD::VTST";
1331 :
1332 0 : case ARMISD::VSHL: return "ARMISD::VSHL";
1333 0 : case ARMISD::VSHRs: return "ARMISD::VSHRs";
1334 0 : case ARMISD::VSHRu: return "ARMISD::VSHRu";
1335 0 : case ARMISD::VRSHRs: return "ARMISD::VRSHRs";
1336 0 : case ARMISD::VRSHRu: return "ARMISD::VRSHRu";
1337 0 : case ARMISD::VRSHRN: return "ARMISD::VRSHRN";
1338 0 : case ARMISD::VQSHLs: return "ARMISD::VQSHLs";
1339 0 : case ARMISD::VQSHLu: return "ARMISD::VQSHLu";
1340 0 : case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu";
1341 0 : case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs";
1342 0 : case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu";
1343 0 : case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu";
1344 0 : case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs";
1345 0 : case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu";
1346 0 : case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu";
1347 0 : case ARMISD::VSLI: return "ARMISD::VSLI";
1348 0 : case ARMISD::VSRI: return "ARMISD::VSRI";
1349 0 : case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";
1350 0 : case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
1351 0 : case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM";
1352 0 : case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM";
1353 0 : case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM";
1354 0 : case ARMISD::VDUP: return "ARMISD::VDUP";
1355 0 : case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE";
1356 0 : case ARMISD::VEXT: return "ARMISD::VEXT";
1357 0 : case ARMISD::VREV64: return "ARMISD::VREV64";
1358 0 : case ARMISD::VREV32: return "ARMISD::VREV32";
1359 0 : case ARMISD::VREV16: return "ARMISD::VREV16";
1360 0 : case ARMISD::VZIP: return "ARMISD::VZIP";
1361 0 : case ARMISD::VUZP: return "ARMISD::VUZP";
1362 0 : case ARMISD::VTRN: return "ARMISD::VTRN";
1363 0 : case ARMISD::VTBL1: return "ARMISD::VTBL1";
1364 0 : case ARMISD::VTBL2: return "ARMISD::VTBL2";
1365 0 : case ARMISD::VMULLs: return "ARMISD::VMULLs";
1366 0 : case ARMISD::VMULLu: return "ARMISD::VMULLu";
1367 0 : case ARMISD::UMAAL: return "ARMISD::UMAAL";
1368 0 : case ARMISD::UMLAL: return "ARMISD::UMLAL";
1369 0 : case ARMISD::SMLAL: return "ARMISD::SMLAL";
1370 0 : case ARMISD::SMLALBB: return "ARMISD::SMLALBB";
1371 0 : case ARMISD::SMLALBT: return "ARMISD::SMLALBT";
1372 0 : case ARMISD::SMLALTB: return "ARMISD::SMLALTB";
1373 0 : case ARMISD::SMLALTT: return "ARMISD::SMLALTT";
1374 0 : case ARMISD::SMULWB: return "ARMISD::SMULWB";
1375 0 : case ARMISD::SMULWT: return "ARMISD::SMULWT";
1376 0 : case ARMISD::SMLALD: return "ARMISD::SMLALD";
1377 0 : case ARMISD::SMLALDX: return "ARMISD::SMLALDX";
1378 0 : case ARMISD::SMLSLD: return "ARMISD::SMLSLD";
1379 0 : case ARMISD::SMLSLDX: return "ARMISD::SMLSLDX";
1380 0 : case ARMISD::SMMLAR: return "ARMISD::SMMLAR";
1381 0 : case ARMISD::SMMLSR: return "ARMISD::SMMLSR";
1382 0 : case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
1383 0 : case ARMISD::BFI: return "ARMISD::BFI";
1384 0 : case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
1385 0 : case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
1386 0 : case ARMISD::VBSL: return "ARMISD::VBSL";
1387 0 : case ARMISD::MEMCPY: return "ARMISD::MEMCPY";
1388 0 : case ARMISD::VLD1DUP: return "ARMISD::VLD1DUP";
1389 0 : case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
1390 0 : case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
1391 0 : case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
1392 0 : case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD";
1393 0 : case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD";
1394 0 : case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD";
1395 0 : case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD";
1396 0 : case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD";
1397 0 : case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD";
1398 0 : case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD";
1399 0 : case ARMISD::VLD1DUP_UPD: return "ARMISD::VLD1DUP_UPD";
1400 0 : case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD";
1401 0 : case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD";
1402 0 : case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD";
1403 0 : case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD";
1404 0 : case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD";
1405 0 : case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD";
1406 0 : case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD";
1407 0 : case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD";
1408 0 : case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";
1409 0 : case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";
1410 : }
1411 0 : return nullptr;
1412 : }
1413 :
1414 10625 : EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
1415 : EVT VT) const {
1416 10625 : if (!VT.isVector())
1417 10438 : return getPointerTy(DL);
1418 187 : return VT.changeVectorElementTypeToInteger();
1419 : }
1420 :
1421 : /// getRegClassFor - Return the register class that should be used for the
1422 : /// specified value type.
1423 218707 : const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const {
1424 : // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1425 : // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1426 : // load / store 4 to 8 consecutive D registers.
1427 218707 : if (Subtarget->hasNEON()) {
1428 139037 : if (VT == MVT::v4i64)
1429 : return &ARM::QQPRRegClass;
1430 139037 : if (VT == MVT::v8i64)
1431 : return &ARM::QQQQPRRegClass;
1432 : }
1433 218673 : return TargetLowering::getRegClassFor(VT);
1434 : }
1435 :
1436 : // memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1437 : // source/dest is aligned and the copy size is large enough. We therefore want
1438 : // to align such objects passed to memory intrinsics.
1439 10913 : bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
1440 : unsigned &PrefAlign) const {
1441 10913 : if (!isa<MemIntrinsic>(CI))
1442 : return false;
1443 664 : MinSize = 8;
1444 : // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1445 : // cycle faster than 4-byte aligned LDM.
1446 664 : PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
1447 664 : return true;
1448 : }
1449 :
1450 : // Create a fast isel object.
1451 : FastISel *
1452 1221 : ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
1453 : const TargetLibraryInfo *libInfo) const {
1454 1221 : return ARM::createFastISel(funcInfo, libInfo);
1455 : }
1456 :
1457 141597 : Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
1458 141597 : unsigned NumVals = N->getNumValues();
1459 141597 : if (!NumVals)
1460 : return Sched::RegPressure;
1461 :
1462 328292 : for (unsigned i = 0; i != NumVals; ++i) {
1463 426746 : EVT VT = N->getValueType(i);
1464 : if (VT == MVT::Glue || VT == MVT::Other)
1465 109095 : continue;
1466 186901 : if (VT.isFloatingPoint() || VT.isVector())
1467 26678 : return Sched::ILP;
1468 : }
1469 :
1470 114919 : if (!N->isMachineOpcode())
1471 : return Sched::RegPressure;
1472 :
1473 : // Load are scheduled for latency even if there instruction itinerary
1474 : // is not available.
1475 73553 : const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1476 147106 : const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1477 :
1478 73553 : if (MCID.getNumDefs() == 0)
1479 : return Sched::RegPressure;
1480 65863 : if (!Itins->isEmpty() &&
1481 32957 : Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
1482 12467 : return Sched::ILP;
1483 :
1484 : return Sched::RegPressure;
1485 : }
1486 :
1487 : //===----------------------------------------------------------------------===//
1488 : // Lowering Code
1489 : //===----------------------------------------------------------------------===//
1490 :
1491 1616 : static bool isSRL16(const SDValue &Op) {
1492 3232 : if (Op.getOpcode() != ISD::SRL)
1493 : return false;
1494 : if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1495 118 : return Const->getZExtValue() == 16;
1496 : return false;
1497 : }
1498 :
1499 228 : static bool isSRA16(const SDValue &Op) {
1500 456 : if (Op.getOpcode() != ISD::SRA)
1501 : return false;
1502 : if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1503 282 : return Const->getZExtValue() == 16;
1504 : return false;
1505 : }
1506 :
1507 111 : static bool isSHL16(const SDValue &Op) {
1508 222 : if (Op.getOpcode() != ISD::SHL)
1509 : return false;
1510 : if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1511 64 : return Const->getZExtValue() == 16;
1512 : return false;
1513 : }
1514 :
1515 : // Check for a signed 16-bit value. We special case SRA because it makes it
1516 : // more simple when also looking for SRAs that aren't sign extending a
1517 : // smaller value. Without the check, we'd need to take extra care with
1518 : // checking order for some operations.
1519 167 : static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
1520 167 : if (isSRA16(Op))
1521 160 : return isSHL16(Op.getOperand(0));
1522 87 : return DAG.ComputeNumSignBits(Op) == 17;
1523 : }
1524 :
1525 : /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
1526 3327 : static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
1527 3327 : switch (CC) {
1528 0 : default: llvm_unreachable("Unknown condition code!");
1529 : case ISD::SETNE: return ARMCC::NE;
1530 932 : case ISD::SETEQ: return ARMCC::EQ;
1531 172 : case ISD::SETGT: return ARMCC::GT;
1532 110 : case ISD::SETGE: return ARMCC::GE;
1533 274 : case ISD::SETLT: return ARMCC::LT;
1534 42 : case ISD::SETLE: return ARMCC::LE;
1535 154 : case ISD::SETUGT: return ARMCC::HI;
1536 22 : case ISD::SETUGE: return ARMCC::HS;
1537 122 : case ISD::SETULT: return ARMCC::LO;
1538 50 : case ISD::SETULE: return ARMCC::LS;
1539 : }
1540 : }
1541 :
1542 : /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
1543 570 : static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
1544 : ARMCC::CondCodes &CondCode2, bool &InvalidOnQNaN) {
1545 570 : CondCode2 = ARMCC::AL;
1546 570 : InvalidOnQNaN = true;
1547 570 : switch (CC) {
1548 0 : default: llvm_unreachable("Unknown FP condition!");
1549 47 : case ISD::SETEQ:
1550 : case ISD::SETOEQ:
1551 47 : CondCode = ARMCC::EQ;
1552 47 : InvalidOnQNaN = false;
1553 47 : break;
1554 61 : case ISD::SETGT:
1555 61 : case ISD::SETOGT: CondCode = ARMCC::GT; break;
1556 44 : case ISD::SETGE:
1557 44 : case ISD::SETOGE: CondCode = ARMCC::GE; break;
1558 113 : case ISD::SETOLT: CondCode = ARMCC::MI; break;
1559 40 : case ISD::SETOLE: CondCode = ARMCC::LS; break;
1560 7 : case ISD::SETONE:
1561 7 : CondCode = ARMCC::MI;
1562 7 : CondCode2 = ARMCC::GT;
1563 7 : InvalidOnQNaN = false;
1564 7 : break;
1565 7 : case ISD::SETO: CondCode = ARMCC::VC; break;
1566 17 : case ISD::SETUO: CondCode = ARMCC::VS; break;
1567 24 : case ISD::SETUEQ:
1568 24 : CondCode = ARMCC::EQ;
1569 24 : CondCode2 = ARMCC::VS;
1570 24 : InvalidOnQNaN = false;
1571 24 : break;
1572 30 : case ISD::SETUGT: CondCode = ARMCC::HI; break;
1573 38 : case ISD::SETUGE: CondCode = ARMCC::PL; break;
1574 31 : case ISD::SETLT:
1575 31 : case ISD::SETULT: CondCode = ARMCC::LT; break;
1576 51 : case ISD::SETLE:
1577 51 : case ISD::SETULE: CondCode = ARMCC::LE; break;
1578 60 : case ISD::SETNE:
1579 : case ISD::SETUNE:
1580 60 : CondCode = ARMCC::NE;
1581 60 : InvalidOnQNaN = false;
1582 60 : break;
1583 : }
1584 570 : }
1585 :
1586 : //===----------------------------------------------------------------------===//
1587 : // Calling Convention Implementation
1588 : //===----------------------------------------------------------------------===//
1589 :
1590 : #include "ARMGenCallingConv.inc"
1591 :
1592 : /// getEffectiveCallingConv - Get the effective calling convention, taking into
1593 : /// account presence of floating point hardware and calling convention
1594 : /// limitations, such as support for variadic functions.
1595 : CallingConv::ID
1596 102120 : ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
1597 : bool isVarArg) const {
1598 102120 : switch (CC) {
1599 0 : default:
1600 0 : report_fatal_error("Unsupported calling convention");
1601 : case CallingConv::ARM_AAPCS:
1602 : case CallingConv::ARM_APCS:
1603 : case CallingConv::GHC:
1604 : return CC;
1605 0 : case CallingConv::PreserveMost:
1606 0 : return CallingConv::PreserveMost;
1607 5288 : case CallingConv::ARM_AAPCS_VFP:
1608 : case CallingConv::Swift:
1609 5288 : return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP;
1610 82084 : case CallingConv::C:
1611 82084 : if (!Subtarget->isAAPCS_ABI())
1612 : return CallingConv::ARM_APCS;
1613 38311 : else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() &&
1614 98471 : getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
1615 : !isVarArg)
1616 11491 : return CallingConv::ARM_AAPCS_VFP;
1617 : else
1618 : return CallingConv::ARM_AAPCS;
1619 880 : case CallingConv::Fast:
1620 : case CallingConv::CXX_FAST_TLS:
1621 880 : if (!Subtarget->isAAPCS_ABI()) {
1622 724 : if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
1623 399 : return CallingConv::Fast;
1624 : return CallingConv::ARM_APCS;
1625 406 : } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
1626 205 : return CallingConv::ARM_AAPCS_VFP;
1627 : else
1628 : return CallingConv::ARM_AAPCS;
1629 : }
1630 : }
1631 :
1632 22361 : CCAssignFn *ARMTargetLowering::CCAssignFnForCall(CallingConv::ID CC,
1633 : bool isVarArg) const {
1634 22361 : return CCAssignFnForNode(CC, false, isVarArg);
1635 : }
1636 :
1637 44982 : CCAssignFn *ARMTargetLowering::CCAssignFnForReturn(CallingConv::ID CC,
1638 : bool isVarArg) const {
1639 44982 : return CCAssignFnForNode(CC, true, isVarArg);
1640 : }
1641 :
1642 : /// CCAssignFnForNode - Selects the correct CCAssignFn for the given
1643 : /// CallingConvention.
1644 67343 : CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
1645 : bool Return,
1646 : bool isVarArg) const {
1647 67343 : switch (getEffectiveCallingConv(CC, isVarArg)) {
1648 0 : default:
1649 0 : report_fatal_error("Unsupported calling convention");
1650 13534 : case CallingConv::ARM_APCS:
1651 13534 : return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
1652 42020 : case CallingConv::ARM_AAPCS:
1653 42020 : return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1654 11501 : case CallingConv::ARM_AAPCS_VFP:
1655 11501 : return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1656 276 : case CallingConv::Fast:
1657 276 : return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
1658 12 : case CallingConv::GHC:
1659 12 : return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
1660 0 : case CallingConv::PreserveMost:
1661 0 : return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1662 : }
1663 : }
1664 :
1665 : /// LowerCallResult - Lower the result values of a call into the
1666 : /// appropriate copies out of appropriate physical registers.
1667 7026 : SDValue ARMTargetLowering::LowerCallResult(
1668 : SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
1669 : const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1670 : SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
1671 : SDValue ThisVal) const {
1672 : // Assign locations to each value returned by this call.
1673 : SmallVector<CCValAssign, 16> RVLocs;
1674 : CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1675 7026 : *DAG.getContext());
1676 7026 : CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
1677 :
1678 : // Copy all of the result registers out of their specified physreg.
1679 12444 : for (unsigned i = 0; i != RVLocs.size(); ++i) {
1680 5418 : CCValAssign VA = RVLocs[i];
1681 :
1682 : // Pass 'this' value directly from the argument to return value, to avoid
1683 : // reg unit interference
1684 5418 : if (i == 0 && isThisReturn) {
1685 : assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
1686 : "unexpected return calling convention register assignment");
1687 54 : InVals.push_back(ThisVal);
1688 54 : continue;
1689 : }
1690 :
1691 5364 : SDValue Val;
1692 5364 : if (VA.needsCustom()) {
1693 : // Handle f64 or half of a v2f64.
1694 : SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1695 229 : InFlag);
1696 229 : Chain = Lo.getValue(1);
1697 229 : InFlag = Lo.getValue(2);
1698 458 : VA = RVLocs[++i]; // skip ahead to next loc
1699 : SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1700 229 : InFlag);
1701 229 : Chain = Hi.getValue(1);
1702 229 : InFlag = Hi.getValue(2);
1703 229 : if (!Subtarget->isLittle())
1704 : std::swap (Lo, Hi);
1705 229 : Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1706 :
1707 229 : if (VA.getLocVT() == MVT::v2f64) {
1708 43 : SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
1709 43 : Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1710 43 : DAG.getConstant(0, dl, MVT::i32));
1711 :
1712 86 : VA = RVLocs[++i]; // skip ahead to next loc
1713 43 : Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1714 43 : Chain = Lo.getValue(1);
1715 43 : InFlag = Lo.getValue(2);
1716 86 : VA = RVLocs[++i]; // skip ahead to next loc
1717 43 : Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1718 43 : Chain = Hi.getValue(1);
1719 43 : InFlag = Hi.getValue(2);
1720 43 : if (!Subtarget->isLittle())
1721 : std::swap (Lo, Hi);
1722 43 : Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1723 43 : Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1724 43 : DAG.getConstant(1, dl, MVT::i32));
1725 : }
1726 : } else {
1727 5135 : Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
1728 5135 : InFlag);
1729 5135 : Chain = Val.getValue(1);
1730 5135 : InFlag = Val.getValue(2);
1731 : }
1732 :
1733 5364 : switch (VA.getLocInfo()) {
1734 0 : default: llvm_unreachable("Unknown loc info!");
1735 : case CCValAssign::Full: break;
1736 : case CCValAssign::BCvt:
1737 622 : Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
1738 622 : break;
1739 : }
1740 :
1741 5364 : InVals.push_back(Val);
1742 : }
1743 :
1744 7026 : return Chain;
1745 : }
1746 :
1747 : /// LowerMemOpCallTo - Store the argument to the stack.
1748 936 : SDValue ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
1749 : SDValue Arg, const SDLoc &dl,
1750 : SelectionDAG &DAG,
1751 : const CCValAssign &VA,
1752 : ISD::ArgFlagsTy Flags) const {
1753 936 : unsigned LocMemOffset = VA.getLocMemOffset();
1754 936 : SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1755 936 : PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
1756 1872 : StackPtr, PtrOff);
1757 : return DAG.getStore(
1758 : Chain, dl, Arg, PtrOff,
1759 936 : MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
1760 : }
1761 :
1762 551 : void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
1763 : SDValue Chain, SDValue &Arg,
1764 : RegsToPassVector &RegsToPass,
1765 : CCValAssign &VA, CCValAssign &NextVA,
1766 : SDValue &StackPtr,
1767 : SmallVectorImpl<SDValue> &MemOpChains,
1768 : ISD::ArgFlagsTy Flags) const {
1769 : SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
1770 551 : DAG.getVTList(MVT::i32, MVT::i32), Arg);
1771 551 : unsigned id = Subtarget->isLittle() ? 0 : 1;
1772 1102 : RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
1773 :
1774 551 : if (NextVA.isRegLoc())
1775 1086 : RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
1776 : else {
1777 : assert(NextVA.isMemLoc());
1778 8 : if (!StackPtr.getNode())
1779 0 : StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
1780 0 : getPointerTy(DAG.getDataLayout()));
1781 :
1782 8 : MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id),
1783 : dl, DAG, NextVA,
1784 8 : Flags));
1785 : }
1786 551 : }
1787 :
1788 : /// LowerCall - Lowering a call into a callseq_start <-
1789 : /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
1790 : /// nodes.
1791 : SDValue
1792 7641 : ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
1793 : SmallVectorImpl<SDValue> &InVals) const {
1794 7641 : SelectionDAG &DAG = CLI.DAG;
1795 7641 : SDLoc &dl = CLI.DL;
1796 7641 : SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1797 7641 : SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1798 7641 : SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1799 7641 : SDValue Chain = CLI.Chain;
1800 7641 : SDValue Callee = CLI.Callee;
1801 : bool &isTailCall = CLI.IsTailCall;
1802 7641 : CallingConv::ID CallConv = CLI.CallConv;
1803 7641 : bool doesNotRet = CLI.DoesNotReturn;
1804 7641 : bool isVarArg = CLI.IsVarArg;
1805 :
1806 7641 : MachineFunction &MF = DAG.getMachineFunction();
1807 7641 : bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
1808 : bool isThisReturn = false;
1809 : bool isSibCall = false;
1810 7641 : auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
1811 :
1812 : // Disable tail calls if they're not supported.
1813 7641 : if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true")
1814 2379 : isTailCall = false;
1815 :
1816 7641 : if (isTailCall) {
1817 : // Check if it's really possible to do a tail call.
1818 693 : isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
1819 693 : isVarArg, isStructRet, MF.getFunction().hasStructRetAttr(),
1820 : Outs, OutVals, Ins, DAG);
1821 693 : if (!isTailCall && CLI.CS && CLI.CS.isMustTailCall())
1822 0 : report_fatal_error("failed to perform tail call elimination on a call "
1823 : "site marked musttail");
1824 : // We don't support GuaranteedTailCallOpt for ARM, only automatically
1825 : // detected sibcalls.
1826 693 : if (isTailCall) {
1827 : ++NumTailCalls;
1828 : isSibCall = true;
1829 : }
1830 : }
1831 :
1832 : // Analyze operands of the call, assigning locations to each operand.
1833 : SmallVector<CCValAssign, 16> ArgLocs;
1834 : CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1835 15282 : *DAG.getContext());
1836 7641 : CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
1837 :
1838 : // Get a count of how many bytes are to be pushed on the stack.
1839 7641 : unsigned NumBytes = CCInfo.getNextStackOffset();
1840 :
1841 : // For tail calls, memory operands are available in our caller's stack.
1842 7641 : if (isSibCall)
1843 : NumBytes = 0;
1844 :
1845 : // Adjust the stack pointer for the new arguments...
1846 : // These operations are automatically eliminated by the prolog/epilog pass
1847 7641 : if (!isSibCall)
1848 7026 : Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
1849 :
1850 : SDValue StackPtr =
1851 15282 : DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
1852 :
1853 : RegsToPassVector RegsToPass;
1854 : SmallVector<SDValue, 8> MemOpChains;
1855 :
1856 : // Walk the register/memloc assignments, inserting copies/loads. In the case
1857 : // of tail call optimization, arguments are handled later.
1858 21450 : for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
1859 21450 : i != e;
1860 : ++i, ++realArgIdx) {
1861 13809 : CCValAssign &VA = ArgLocs[i];
1862 27618 : SDValue Arg = OutVals[realArgIdx];
1863 13809 : ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
1864 13809 : bool isByVal = Flags.isByVal();
1865 :
1866 : // Promote the value if needed.
1867 13809 : switch (VA.getLocInfo()) {
1868 0 : default: llvm_unreachable("Unknown loc info!");
1869 : case CCValAssign::Full: break;
1870 : case CCValAssign::SExt:
1871 0 : Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
1872 0 : break;
1873 : case CCValAssign::ZExt:
1874 0 : Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
1875 0 : break;
1876 : case CCValAssign::AExt:
1877 0 : Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
1878 0 : break;
1879 : case CCValAssign::BCvt:
1880 1184 : Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
1881 1184 : break;
1882 : }
1883 :
1884 : // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
1885 13809 : if (VA.needsCustom()) {
1886 504 : if (VA.getLocVT() == MVT::v2f64) {
1887 : SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1888 48 : DAG.getConstant(0, dl, MVT::i32));
1889 : SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1890 48 : DAG.getConstant(1, dl, MVT::i32));
1891 :
1892 48 : PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
1893 48 : VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1894 :
1895 96 : VA = ArgLocs[++i]; // skip ahead to next loc
1896 48 : if (VA.isRegLoc()) {
1897 47 : PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
1898 47 : VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1899 : } else {
1900 : assert(VA.isMemLoc());
1901 :
1902 1 : MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
1903 1 : dl, DAG, VA, Flags));
1904 : }
1905 : } else {
1906 912 : PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
1907 : StackPtr, MemOpChains, Flags);
1908 : }
1909 13305 : } else if (VA.isRegLoc()) {
1910 11998 : if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
1911 61 : Outs[0].VT == MVT::i32) {
1912 : assert(VA.getLocVT() == MVT::i32 &&
1913 : "unexpected calling convention register assignment");
1914 : assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
1915 : "unexpected use of 'returned'");
1916 : isThisReturn = true;
1917 : }
1918 23996 : RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1919 1307 : } else if (isByVal) {
1920 : assert(VA.isMemLoc());
1921 : unsigned offset = 0;
1922 :
1923 : // True if this byval aggregate will be split between registers
1924 : // and memory.
1925 : unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
1926 369 : unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
1927 :
1928 369 : if (CurByValIdx < ByValArgsCount) {
1929 :
1930 : unsigned RegBegin, RegEnd;
1931 : CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
1932 :
1933 : EVT PtrVT =
1934 356 : DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
1935 : unsigned int i, j;
1936 1719 : for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
1937 1363 : SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
1938 1363 : SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
1939 : SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
1940 : MachinePointerInfo(),
1941 1363 : DAG.InferPtrAlignment(AddArg));
1942 1363 : MemOpChains.push_back(Load.getValue(1));
1943 1363 : RegsToPass.push_back(std::make_pair(j, Load));
1944 : }
1945 :
1946 : // If parameter size outsides register area, "offset" value
1947 : // helps us to calculate stack slot for remained part properly.
1948 356 : offset = RegEnd - RegBegin;
1949 :
1950 : CCInfo.nextInRegsParam();
1951 : }
1952 :
1953 369 : if (Flags.getByValSize() > 4*offset) {
1954 360 : auto PtrVT = getPointerTy(DAG.getDataLayout());
1955 360 : unsigned LocMemOffset = VA.getLocMemOffset();
1956 360 : SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1957 360 : SDValue Dst = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, StkPtrOff);
1958 360 : SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
1959 360 : SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
1960 360 : SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
1961 360 : MVT::i32);
1962 : SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), dl,
1963 360 : MVT::i32);
1964 :
1965 360 : SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
1966 360 : SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
1967 360 : MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
1968 720 : Ops));
1969 : }
1970 938 : } else if (!isSibCall) {
1971 : assert(VA.isMemLoc());
1972 :
1973 927 : MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
1974 927 : dl, DAG, VA, Flags));
1975 : }
1976 : }
1977 :
1978 7641 : if (!MemOpChains.empty())
1979 713 : Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
1980 :
1981 : // Build a sequence of copy-to-reg nodes chained together with token chain
1982 : // and flag operands which copy the outgoing args into the appropriate regs.
1983 7641 : SDValue InFlag;
1984 : // Tail call byval lowering might overwrite argument registers so in case of
1985 : // tail call optimization the copies to registers are lowered later.
1986 7641 : if (!isTailCall)
1987 20760 : for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1988 13734 : Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1989 27468 : RegsToPass[i].second, InFlag);
1990 13734 : InFlag = Chain.getValue(1);
1991 : }
1992 :
1993 : // For tail calls lower the arguments to the 'real' stack slot.
1994 7641 : if (isTailCall) {
1995 : // Force all the incoming stack arguments to be loaded from the stack
1996 : // before any new outgoing arguments are stored to the stack, because the
1997 : // outgoing stack slots may alias the incoming argument stack slots, and
1998 : // the alias isn't otherwise explicit. This is slightly more conservative
1999 : // than necessary, because it means that each store effectively depends
2000 : // on every argument instead of just those arguments it would clobber.
2001 :
2002 : // Do not flag preceding copytoreg stuff together with the following stuff.
2003 615 : InFlag = SDValue();
2004 1336 : for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2005 721 : Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2006 1442 : RegsToPass[i].second, InFlag);
2007 721 : InFlag = Chain.getValue(1);
2008 : }
2009 615 : InFlag = SDValue();
2010 : }
2011 :
2012 : // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
2013 : // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
2014 : // node so that legalize doesn't hack it.
2015 : bool isDirect = false;
2016 :
2017 7641 : const TargetMachine &TM = getTargetMachine();
2018 7641 : const Module *Mod = MF.getFunction().getParent();
2019 : const GlobalValue *GV = nullptr;
2020 : if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
2021 4351 : GV = G->getGlobal();
2022 : bool isStub =
2023 7641 : !TM.shouldAssumeDSOLocal(*Mod, GV) && Subtarget->isTargetMachO();
2024 :
2025 7641 : bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
2026 : bool isLocalARMFunc = false;
2027 7641 : ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2028 7641 : auto PtrVt = getPointerTy(DAG.getDataLayout());
2029 :
2030 7641 : if (Subtarget->genLongCalls()) {
2031 : assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&
2032 : "long-calls codegen is not position independent!");
2033 : // Handle a global address or an external symbol. If it's not one of
2034 : // those, the target's already in a register, so we don't need to do
2035 : // anything extra.
2036 : if (isa<GlobalAddressSDNode>(Callee)) {
2037 : // Create a constant pool entry for the callee address
2038 : unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2039 : ARMConstantPoolValue *CPV =
2040 7 : ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
2041 :
2042 : // Get the address of the callee into a register
2043 7 : SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2044 7 : CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2045 7 : Callee = DAG.getLoad(
2046 : PtrVt, dl, DAG.getEntryNode(), CPAddr,
2047 7 : MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2048 : } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2049 0 : const char *Sym = S->getSymbol();
2050 :
2051 : // Create a constant pool entry for the callee address
2052 : unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2053 : ARMConstantPoolValue *CPV =
2054 0 : ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
2055 : ARMPCLabelIndex, 0);
2056 : // Get the address of the callee into a register
2057 0 : SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2058 0 : CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2059 0 : Callee = DAG.getLoad(
2060 : PtrVt, dl, DAG.getEntryNode(), CPAddr,
2061 0 : MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2062 : }
2063 : } else if (isa<GlobalAddressSDNode>(Callee)) {
2064 : // If we're optimizing for minimum size and the function is called three or
2065 : // more times in this block, we can improve codesize by calling indirectly
2066 : // as BLXr has a 16-bit encoding.
2067 4344 : auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2068 4344 : auto *BB = CLI.CS.getParent();
2069 : bool PreferIndirect =
2070 4416 : Subtarget->isThumb() && MF.getFunction().optForMinSize() &&
2071 342 : count_if(GV->users(), [&BB](const User *U) {
2072 347 : return isa<Instruction>(U) && cast<Instruction>(U)->getParent() == BB;
2073 : }) > 2;
2074 :
2075 : if (!PreferIndirect) {
2076 : isDirect = true;
2077 4341 : bool isDef = GV->isStrongDefinitionForLinker();
2078 :
2079 : // ARM call to a local ARM function is predicable.
2080 4341 : isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2081 : // tBX takes a register source operand.
2082 4487 : if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2083 : assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
2084 50 : Callee = DAG.getNode(
2085 : ARMISD::WrapperPIC, dl, PtrVt,
2086 100 : DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2087 50 : Callee = DAG.getLoad(
2088 : PtrVt, dl, DAG.getEntryNode(), Callee,
2089 : MachinePointerInfo::getGOT(DAG.getMachineFunction()),
2090 : /* Alignment = */ 0, MachineMemOperand::MODereferenceable |
2091 50 : MachineMemOperand::MOInvariant);
2092 4291 : } else if (Subtarget->isTargetCOFF()) {
2093 : assert(Subtarget->isTargetWindows() &&
2094 : "Windows is the only supported COFF target");
2095 : unsigned TargetFlags = GV->hasDLLImportStorageClass()
2096 52 : ? ARMII::MO_DLLIMPORT
2097 : : ARMII::MO_NO_FLAG;
2098 52 : Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0,
2099 52 : TargetFlags);
2100 52 : if (GV->hasDLLImportStorageClass())
2101 3 : Callee =
2102 3 : DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2103 : DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2104 3 : MachinePointerInfo::getGOT(DAG.getMachineFunction()));
2105 : } else {
2106 4239 : Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0);
2107 : }
2108 : }
2109 : } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2110 : isDirect = true;
2111 : // tBX takes a register source operand.
2112 3198 : const char *Sym = S->getSymbol();
2113 3265 : if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2114 : unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2115 : ARMConstantPoolValue *CPV =
2116 9 : ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
2117 : ARMPCLabelIndex, 4);
2118 9 : SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2119 9 : CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2120 9 : Callee = DAG.getLoad(
2121 : PtrVt, dl, DAG.getEntryNode(), CPAddr,
2122 9 : MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2123 9 : SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2124 9 : Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2125 : } else {
2126 3189 : Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2127 : }
2128 : }
2129 :
2130 : // FIXME: handle tail calls differently.
2131 : unsigned CallOpc;
2132 7641 : if (Subtarget->isThumb()) {
2133 3360 : if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2134 : CallOpc = ARMISD::CALL_NOLINK;
2135 : else
2136 : CallOpc = ARMISD::CALL;
2137 : } else {
2138 4281 : if (!isDirect && !Subtarget->hasV5TOps())
2139 : CallOpc = ARMISD::CALL_NOLINK;
2140 4284 : else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2141 : // Emit regular call when code size is the priority
2142 20 : !MF.getFunction().optForMinSize())
2143 : // "mov lr, pc; b _foo" to avoid confusing the RSP
2144 : CallOpc = ARMISD::CALL_NOLINK;
2145 : else
2146 4248 : CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2147 : }
2148 :
2149 : std::vector<SDValue> Ops;
2150 7641 : Ops.push_back(Chain);
2151 7641 : Ops.push_back(Callee);
2152 :
2153 : // Add argument registers to the end of the list so that they are known live
2154 : // into the call.
2155 22096 : for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2156 14455 : Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2157 43365 : RegsToPass[i].second.getValueType()));
2158 :
2159 : // Add a register mask operand representing the call-preserved registers.
2160 7641 : if (!isTailCall) {
2161 : const uint32_t *Mask;
2162 7026 : const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2163 7026 : if (isThisReturn) {
2164 : // For 'this' returns, use the R0-preserving mask if applicable
2165 54 : Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2166 54 : if (!Mask) {
2167 : // Set isThisReturn to false if the calling convention is not one that
2168 : // allows 'returned' to be modeled in this way, so LowerCallResult does
2169 : // not try to pass 'this' straight through
2170 : isThisReturn = false;
2171 0 : Mask = ARI->getCallPreservedMask(MF, CallConv);
2172 : }
2173 : } else
2174 6972 : Mask = ARI->getCallPreservedMask(MF, CallConv);
2175 :
2176 : assert(Mask && "Missing call preserved mask for calling convention");
2177 14052 : Ops.push_back(DAG.getRegisterMask(Mask));
2178 : }
2179 :
2180 7641 : if (InFlag.getNode())
2181 6200 : Ops.push_back(InFlag);
2182 :
2183 7641 : SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2184 7641 : if (isTailCall) {
2185 615 : MF.getFrameInfo().setHasTailCall();
2186 615 : return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
2187 : }
2188 :
2189 : // Returns a chain and a flag for retval copy to use.
2190 7026 : Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
2191 7026 : InFlag = Chain.getValue(1);
2192 :
2193 7026 : Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
2194 7026 : DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
2195 7026 : if (!Ins.empty())
2196 4490 : InFlag = Chain.getValue(1);
2197 :
2198 : // Handle result values, copying them out of physregs into vregs that we
2199 : // return.
2200 : return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
2201 : InVals, isThisReturn,
2202 7026 : isThisReturn ? OutVals[0] : SDValue());
2203 : }
2204 :
2205 : /// HandleByVal - Every parameter *after* a byval parameter is passed
2206 : /// on the stack. Remember the next parameter register to allocate,
2207 : /// and then confiscate the rest of the parameter registers to insure
2208 : /// this.
2209 419 : void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2210 : unsigned Align) const {
2211 : // Byval (as with any stack) slots are always at least 4 byte aligned.
2212 419 : Align = std::max(Align, 4U);
2213 :
2214 419 : unsigned Reg = State->AllocateReg(GPRArgRegs);
2215 419 : if (!Reg)
2216 : return;
2217 :
2218 399 : unsigned AlignInRegs = Align / 4;
2219 399 : unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2220 415 : for (unsigned i = 0; i < Waste; ++i)
2221 16 : Reg = State->AllocateReg(GPRArgRegs);
2222 :
2223 399 : if (!Reg)
2224 : return;
2225 :
2226 395 : unsigned Excess = 4 * (ARM::R4 - Reg);
2227 :
2228 : // Special case when NSAA != SP and parameter size greater than size of
2229 : // all remained GPR regs. In that case we can't split parameter, we must
2230 : // send it to stack. We also must set NCRN to R4, so waste all
2231 : // remained registers.
2232 395 : const unsigned NSAAOffset = State->getNextStackOffset();
2233 395 : if (NSAAOffset != 0 && Size > Excess) {
2234 15 : while (State->AllocateReg(GPRArgRegs))
2235 : ;
2236 : return;
2237 : }
2238 :
2239 : // First register for byval parameter is the first register that wasn't
2240 : // allocated before this method call, so it would be "reg".
2241 : // If parameter is small enough to be saved in range [reg, r4), then
2242 : // the end (first after last) register would be reg + param-size-in-regs,
2243 : // else parameter would be splitted between registers and stack,
2244 : // end register would be r4 in this case.
2245 : unsigned ByValRegBegin = Reg;
2246 751 : unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
2247 : State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2248 : // Note, first register is allocated in the beginning of function already,
2249 : // allocate remained amount of registers we need.
2250 1439 : for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2251 1049 : State->AllocateReg(GPRArgRegs);
2252 : // A byval parameter that is split between registers and memory needs its
2253 : // size truncated here.
2254 : // In the case where the entire structure fits in registers, we set the
2255 : // size in memory to zero.
2256 414 : Size = std::max<int>(Size - Excess, 0);
2257 : }
2258 :
2259 : /// MatchingStackOffset - Return true if the given stack call argument is
2260 : /// already available in the same position (relatively) of the caller's
2261 : /// incoming argument stack.
2262 : static
2263 31 : bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
2264 : MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
2265 : const TargetInstrInfo *TII) {
2266 31 : unsigned Bytes = Arg.getValueSizeInBits() / 8;
2267 31 : int FI = std::numeric_limits<int>::max();
2268 62 : if (Arg.getOpcode() == ISD::CopyFromReg) {
2269 6 : unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2270 6 : if (!TargetRegisterInfo::isVirtualRegister(VR))
2271 : return false;
2272 6 : MachineInstr *Def = MRI->getVRegDef(VR);
2273 6 : if (!Def)
2274 : return false;
2275 1 : if (!Flags.isByVal()) {
2276 1 : if (!TII->isLoadFromStackSlot(*Def, FI))
2277 : return false;
2278 : } else {
2279 : return false;
2280 : }
2281 : } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2282 11 : if (Flags.isByVal())
2283 : // ByVal argument is passed in as a pointer but it's now being
2284 : // dereferenced. e.g.
2285 : // define @foo(%struct.X* %A) {
2286 : // tail call @bar(%struct.X* byval %A)
2287 : // }
2288 : return false;
2289 11 : SDValue Ptr = Ld->getBasePtr();
2290 : FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2291 : if (!FINode)
2292 : return false;
2293 11 : FI = FINode->getIndex();
2294 : } else
2295 : return false;
2296 :
2297 : assert(FI != std::numeric_limits<int>::max());
2298 12 : if (!MFI.isFixedObjectIndex(FI))
2299 : return false;
2300 24 : return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
2301 : }
2302 :
2303 : /// IsEligibleForTailCallOptimization - Check whether the call is eligible
2304 : /// for tail call optimization. Targets which want to do tail call
2305 : /// optimization should implement this function.
2306 : bool
2307 693 : ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
2308 : CallingConv::ID CalleeCC,
2309 : bool isVarArg,
2310 : bool isCalleeStructRet,
2311 : bool isCallerStructRet,
2312 : const SmallVectorImpl<ISD::OutputArg> &Outs,
2313 : const SmallVectorImpl<SDValue> &OutVals,
2314 : const SmallVectorImpl<ISD::InputArg> &Ins,
2315 : SelectionDAG& DAG) const {
2316 693 : MachineFunction &MF = DAG.getMachineFunction();
2317 693 : const Function &CallerF = MF.getFunction();
2318 : CallingConv::ID CallerCC = CallerF.getCallingConv();
2319 :
2320 : assert(Subtarget->supportsTailCall());
2321 :
2322 : // Tail calls to function pointers cannot be optimized for Thumb1 if the args
2323 : // to the call take up r0-r3. The reason is that there are no legal registers
2324 : // left to hold the pointer to the function to be called.
2325 706 : if (Subtarget->isThumb1Only() && Outs.size() >= 4 &&
2326 : !isa<GlobalAddressSDNode>(Callee.getNode()))
2327 : return false;
2328 :
2329 : // Look for obvious safe cases to perform tail call optimization that do not
2330 : // require ABI changes. This is what gcc calls sibcall.
2331 :
2332 : // Exception-handling functions need a special set of instructions to indicate
2333 : // a return to the hardware. Tail-calling another function would probably
2334 : // break this.
2335 691 : if (CallerF.hasFnAttribute("interrupt"))
2336 : return false;
2337 :
2338 : // Also avoid sibcall optimization if either caller or callee uses struct
2339 : // return semantics.
2340 691 : if (isCalleeStructRet || isCallerStructRet)
2341 : return false;
2342 :
2343 : // Externally-defined functions with weak linkage should not be
2344 : // tail-called on ARM when the OS does not support dynamic
2345 : // pre-emption of symbols, as the AAELF spec requires normal calls
2346 : // to undefined weak functions to be replaced with a NOP or jump to the
2347 : // next instruction. The behaviour of branch instructions in this
2348 : // situation (as used for tail calls) is implementation-defined, so we
2349 : // cannot rely on the linker replacing the tail call with a return.
2350 : if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2351 398 : const GlobalValue *GV = G->getGlobal();
2352 398 : const Triple &TT = getTargetMachine().getTargetTriple();
2353 398 : if (GV->hasExternalWeakLinkage() &&
2354 1 : (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
2355 : return false;
2356 : }
2357 :
2358 : // Check that the call results are passed in the same way.
2359 685 : LLVMContext &C = *DAG.getContext();
2360 685 : if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
2361 : CCAssignFnForReturn(CalleeCC, isVarArg),
2362 : CCAssignFnForReturn(CallerCC, isVarArg)))
2363 : return false;
2364 : // The callee has to preserve all registers the caller needs to preserve.
2365 663 : const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2366 663 : const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2367 663 : if (CalleeCC != CallerCC) {
2368 259 : const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2369 259 : if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2370 : return false;
2371 : }
2372 :
2373 : // If Caller's vararg or byval argument has been split between registers and
2374 : // stack, do not perform tail call, since part of the argument is in caller's
2375 : // local frame.
2376 649 : const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
2377 649 : if (AFI_Caller->getArgRegsSaveSize())
2378 : return false;
2379 :
2380 : // If the callee takes no arguments then go on to check the results of the
2381 : // call.
2382 638 : if (!Outs.empty()) {
2383 : // Check if stack adjustment is needed. For now, do not do this if any
2384 : // argument is passed on the stack.
2385 : SmallVector<CCValAssign, 16> ArgLocs;
2386 957 : CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
2387 490 : CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
2388 490 : if (CCInfo.getNextStackOffset()) {
2389 : // Check if the arguments are already laid out in the right way as
2390 : // the caller's fixed stack objects.
2391 29 : MachineFrameInfo &MFI = MF.getFrameInfo();
2392 29 : const MachineRegisterInfo *MRI = &MF.getRegInfo();
2393 29 : const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2394 151 : for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2395 151 : i != e;
2396 : ++i, ++realArgIdx) {
2397 142 : CCValAssign &VA = ArgLocs[i];
2398 : EVT RegVT = VA.getLocVT();
2399 284 : SDValue Arg = OutVals[realArgIdx];
2400 142 : ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2401 142 : if (VA.getLocInfo() == CCValAssign::Indirect)
2402 20 : return false;
2403 142 : if (VA.needsCustom()) {
2404 : // f64 and vector types are split into multiple registers or
2405 : // register/stack-slot combinations. The types will not match
2406 : // the registers; give up on memory f64 refs until we figure
2407 : // out what to do about this.
2408 18 : if (!VA.isRegLoc())
2409 : return false;
2410 36 : if (!ArgLocs[++i].isRegLoc())
2411 : return false;
2412 : if (RegVT == MVT::v2f64) {
2413 0 : if (!ArgLocs[++i].isRegLoc())
2414 : return false;
2415 0 : if (!ArgLocs[++i].isRegLoc())
2416 : return false;
2417 : }
2418 124 : } else if (!VA.isRegLoc()) {
2419 31 : if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
2420 : MFI, MRI, TII))
2421 : return false;
2422 : }
2423 : }
2424 : }
2425 :
2426 470 : const MachineRegisterInfo &MRI = MF.getRegInfo();
2427 470 : if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
2428 : return false;
2429 : }
2430 :
2431 : return true;
2432 : }
2433 :
2434 : bool
2435 22675 : ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
2436 : MachineFunction &MF, bool isVarArg,
2437 : const SmallVectorImpl<ISD::OutputArg> &Outs,
2438 : LLVMContext &Context) const {
2439 : SmallVector<CCValAssign, 16> RVLocs;
2440 45350 : CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2441 22675 : return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2442 : }
2443 :
2444 12 : static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps,
2445 : const SDLoc &DL, SelectionDAG &DAG) {
2446 12 : const MachineFunction &MF = DAG.getMachineFunction();
2447 12 : const Function &F = MF.getFunction();
2448 :
2449 12 : StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString();
2450 :
2451 : // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
2452 : // version of the "preferred return address". These offsets affect the return
2453 : // instruction if this is a return from PL1 without hypervisor extensions.
2454 : // IRQ/FIQ: +4 "subs pc, lr, #4"
2455 : // SWI: 0 "subs pc, lr, #0"
2456 : // ABORT: +4 "subs pc, lr, #4"
2457 : // UNDEF: +4/+2 "subs pc, lr, #0"
2458 : // UNDEF varies depending on where the exception came from ARM or Thumb
2459 : // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
2460 :
2461 : int64_t LROffset;
2462 : if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
2463 : IntKind == "ABORT")
2464 : LROffset = 4;
2465 : else if (IntKind == "SWI" || IntKind == "UNDEF")
2466 : LROffset = 0;
2467 : else
2468 0 : report_fatal_error("Unsupported interrupt attribute. If present, value "
2469 : "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
2470 :
2471 12 : RetOps.insert(RetOps.begin() + 1,
2472 24 : DAG.getConstant(LROffset, DL, MVT::i32, false));
2473 :
2474 12 : return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
2475 : }
2476 :
2477 : SDValue
2478 13429 : ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2479 : bool isVarArg,
2480 : const SmallVectorImpl<ISD::OutputArg> &Outs,
2481 : const SmallVectorImpl<SDValue> &OutVals,
2482 : const SDLoc &dl, SelectionDAG &DAG) const {
2483 : // CCValAssign - represent the assignment of the return value to a location.
2484 : SmallVector<CCValAssign, 16> RVLocs;
2485 :
2486 : // CCState - Info about the registers and stack slots.
2487 : CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2488 26858 : *DAG.getContext());
2489 :
2490 : // Analyze outgoing return values.
2491 13429 : CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2492 :
2493 13429 : SDValue Flag;
2494 : SmallVector<SDValue, 4> RetOps;
2495 13429 : RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2496 13429 : bool isLittleEndian = Subtarget->isLittle();
2497 :
2498 13429 : MachineFunction &MF = DAG.getMachineFunction();
2499 13429 : ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2500 13429 : AFI->setReturnRegsCount(RVLocs.size());
2501 :
2502 : // Copy the result values into the output registers.
2503 10954 : for (unsigned i = 0, realRVLocIdx = 0;
2504 24383 : i != RVLocs.size();
2505 : ++i, ++realRVLocIdx) {
2506 : CCValAssign &VA = RVLocs[i];
2507 : assert(VA.isRegLoc() && "Can only return in registers!");
2508 :
2509 10954 : SDValue Arg = OutVals[realRVLocIdx];
2510 : bool ReturnF16 = false;
2511 :
2512 10954 : if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) {
2513 : // Half-precision return values can be returned like this:
2514 : //
2515 : // t11 f16 = fadd ...
2516 : // t12: i16 = bitcast t11
2517 : // t13: i32 = zero_extend t12
2518 : // t14: f32 = bitcast t13 <~~~~~~~ Arg
2519 : //
2520 : // to avoid code generation for bitcasts, we simply set Arg to the node
2521 : // that produces the f16 value, t11 in this case.
2522 : //
2523 76 : if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) {
2524 68 : SDValue ZE = Arg.getOperand(0);
2525 68 : if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) {
2526 68 : SDValue BC = ZE.getOperand(0);
2527 68 : if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) {
2528 68 : Arg = BC.getOperand(0);
2529 : ReturnF16 = true;
2530 : }
2531 : }
2532 : }
2533 : }
2534 :
2535 10954 : switch (VA.getLocInfo()) {
2536 0 : default: llvm_unreachable("Unknown loc info!");
2537 : case CCValAssign::Full: break;
2538 2538 : case CCValAssign::BCvt:
2539 2538 : if (!ReturnF16)
2540 2538 : Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2541 : break;
2542 : }
2543 :
2544 10954 : if (VA.needsCustom()) {
2545 1815 : if (VA.getLocVT() == MVT::v2f64) {
2546 : // Extract the first half and return it in two registers.
2547 : SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2548 792 : DAG.getConstant(0, dl, MVT::i32));
2549 : SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
2550 792 : DAG.getVTList(MVT::i32, MVT::i32), Half);
2551 :
2552 792 : Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2553 : HalfGPRs.getValue(isLittleEndian ? 0 : 1),
2554 837 : Flag);
2555 792 : Flag = Chain.getValue(1);
2556 792 : RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2557 792 : VA = RVLocs[++i]; // skip ahead to next loc
2558 792 : Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2559 : HalfGPRs.getValue(isLittleEndian ? 1 : 0),
2560 837 : Flag);
2561 792 : Flag = Chain.getValue(1);
2562 792 : RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2563 1584 : VA = RVLocs[++i]; // skip ahead to next loc
2564 :
2565 : // Extract the 2nd half and fall through to handle it as an f64 value.
2566 792 : Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2567 792 : DAG.getConstant(1, dl, MVT::i32));
2568 : }
2569 : // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
2570 : // available.
2571 : SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2572 1815 : DAG.getVTList(MVT::i32, MVT::i32), Arg);
2573 1815 : Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2574 : fmrrd.getValue(isLittleEndian ? 0 : 1),
2575 1901 : Flag);
2576 1815 : Flag = Chain.getValue(1);
2577 1815 : RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2578 1815 : VA = RVLocs[++i]; // skip ahead to next loc
2579 1815 : Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2580 : fmrrd.getValue(isLittleEndian ? 1 : 0),
2581 1901 : Flag);
2582 : } else
2583 9139 : Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
2584 :
2585 : // Guarantee that all emitted copies are
2586 : // stuck together, avoiding something bad.
2587 10954 : Flag = Chain.getValue(1);
2588 10954 : RetOps.push_back(DAG.getRegister(VA.getLocReg(),
2589 32794 : ReturnF16 ? MVT::f16 : VA.getLocVT()));
2590 : }
2591 13429 : const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2592 : const MCPhysReg *I =
2593 13429 : TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
2594 13429 : if (I) {
2595 600 : for (; *I; ++I) {
2596 585 : if (ARM::GPRRegClass.contains(*I))
2597 105 : RetOps.push_back(DAG.getRegister(*I, MVT::i32));
2598 480 : else if (ARM::DPRRegClass.contains(*I))
2599 480 : RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
2600 : else
2601 0 : llvm_unreachable("Unexpected register class in CSRsViaCopy!");
2602 : }
2603 : }
2604 :
2605 : // Update chain and glue.
2606 13429 : RetOps[0] = Chain;
2607 13429 : if (Flag.getNode())
2608 9925 : RetOps.push_back(Flag);
2609 :
2610 : // CPUs which aren't M-class use a special sequence to return from
2611 : // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
2612 : // though we use "subs pc, lr, #N").
2613 : //
2614 : // M-class CPUs actually use a normal return sequence with a special
2615 : // (hardware-provided) value in LR, so the normal code path works.
2616 13429 : if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") &&
2617 18 : !Subtarget->isMClass()) {
2618 12 : if (Subtarget->isThumb1Only())
2619 0 : report_fatal_error("interrupt attribute is not supported in Thumb1");
2620 12 : return LowerInterruptReturn(RetOps, dl, DAG);
2621 : }
2622 :
2623 13417 : return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps);
2624 : }
2625 :
2626 985 : bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2627 985 : if (N->getNumValues() != 1)
2628 : return false;
2629 985 : if (!N->hasNUsesOfValue(1, 0))
2630 : return false;
2631 :
2632 961 : SDValue TCChain = Chain;
2633 961 : SDNode *Copy = *N->use_begin();
2634 1922 : if (Copy->getOpcode() == ISD::CopyToReg) {
2635 : // If the copy has a glue operand, we conservatively assume it isn't safe to
2636 : // perform a tail call.
2637 786 : if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2638 7 : return false;
2639 255 : TCChain = Copy->getOperand(0);
2640 699 : } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
2641 : SDNode *VMov = Copy;
2642 : // f64 returned in a pair of GPRs.
2643 : SmallPtrSet<SDNode*, 2> Copies;
2644 63 : for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2645 189 : UI != UE; ++UI) {
2646 126 : if (UI->getOpcode() != ISD::CopyToReg)
2647 : return false;
2648 126 : Copies.insert(*UI);
2649 : }
2650 126 : if (Copies.size() > 2)
2651 : return false;
2652 :
2653 63 : for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2654 181 : UI != UE; ++UI) {
2655 126 : SDValue UseChain = UI->getOperand(0);
2656 126 : if (Copies.count(UseChain.getNode()))
2657 : // Second CopyToReg
2658 : Copy = *UI;
2659 : else {
2660 : // We are at the top of this chain.
2661 : // If the copy has a glue operand, we conservatively assume it
2662 : // isn't safe to perform a tail call.
2663 189 : if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue)
2664 8 : return false;
2665 : // First CopyToReg
2666 55 : TCChain = UseChain;
2667 : }
2668 : }
2669 636 : } else if (Copy->getOpcode() == ISD::BITCAST) {
2670 : // f32 returned in a single GPR.
2671 : if (!Copy->hasOneUse())
2672 : return false;
2673 : Copy = *Copy->use_begin();
2674 46 : if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
2675 2 : return false;
2676 : // If the copy has a glue operand, we conservatively assume it isn't safe to
2677 : // perform a tail call.
2678 132 : if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2679 2 : return false;
2680 42 : TCChain = Copy->getOperand(0);
2681 : } else {
2682 : return false;
2683 : }
2684 :
2685 : bool HasRet = false;
2686 352 : for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2687 926 : UI != UE; ++UI) {
2688 1276 : if (UI->getOpcode() != ARMISD::RET_FLAG &&
2689 : UI->getOpcode() != ARMISD::INTRET_FLAG)
2690 : return false;
2691 : HasRet = true;
2692 : }
2693 :
2694 288 : if (!HasRet)
2695 : return false;
2696 :
2697 287 : Chain = TCChain;
2698 287 : return true;
2699 : }
2700 :
2701 142 : bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
2702 142 : if (!Subtarget->supportsTailCall())
2703 : return false;
2704 :
2705 : auto Attr =
2706 121 : CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
2707 121 : if (!CI->isTailCall() || Attr.getValueAsString() == "true")
2708 33 : return false;
2709 :
2710 : return true;
2711 : }
2712 :
2713 : // Trying to write a 64 bit value so need to split into two 32 bit values first,
2714 : // and pass the lower and high parts through.
2715 0 : static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) {
2716 0 : SDLoc DL(Op);
2717 0 : SDValue WriteValue = Op->getOperand(2);
2718 :
2719 : // This function is only supposed to be called for i64 type argument.
2720 : assert(WriteValue.getValueType() == MVT::i64
2721 : && "LowerWRITE_REGISTER called for non-i64 type argument.");
2722 :
2723 : SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2724 0 : DAG.getConstant(0, DL, MVT::i32));
2725 : SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2726 0 : DAG.getConstant(1, DL, MVT::i32));
2727 0 : SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
2728 0 : return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
2729 : }
2730 :
2731 : // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
2732 : // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
2733 : // one of the above mentioned nodes. It has to be wrapped because otherwise
2734 : // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
2735 : // be used to form addressing mode. These wrapped nodes will be selected
2736 : // into MOVi.
2737 1266 : SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
2738 : SelectionDAG &DAG) const {
2739 : EVT PtrVT = Op.getValueType();
2740 : // FIXME there is no actual debug info here
2741 : SDLoc dl(Op);
2742 : ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2743 1266 : SDValue Res;
2744 :
2745 : // When generating execute-only code Constant Pools must be promoted to the
2746 : // global data section. It's a bit ugly that we can't share them across basic
2747 : // blocks, but this way we guarantee that execute-only behaves correct with
2748 : // position-independent addressing modes.
2749 1266 : if (Subtarget->genExecuteOnly()) {
2750 6 : auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
2751 6 : auto T = const_cast<Type*>(CP->getType());
2752 6 : auto C = const_cast<Constant*>(CP->getConstVal());
2753 6 : auto M = const_cast<Module*>(DAG.getMachineFunction().
2754 6 : getFunction().getParent());
2755 : auto GV = new GlobalVariable(
2756 : *M, T, /*isConst=*/true, GlobalVariable::InternalLinkage, C,
2757 6 : Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" +
2758 6 : Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" +
2759 6 : Twine(AFI->createPICLabelUId())
2760 6 : );
2761 : SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
2762 6 : dl, PtrVT);
2763 6 : return LowerGlobalAddress(GA, DAG);
2764 : }
2765 :
2766 1260 : if (CP->isMachineConstantPoolEntry())
2767 0 : Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
2768 0 : CP->getAlignment());
2769 : else
2770 1260 : Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
2771 1260 : CP->getAlignment());
2772 1260 : return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
2773 : }
2774 :
2775 38 : unsigned ARMTargetLowering::getJumpTableEncoding() const {
2776 38 : return MachineJumpTableInfo::EK_Inline;
2777 : }
2778 :
2779 37 : SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
2780 : SelectionDAG &DAG) const {
2781 37 : MachineFunction &MF = DAG.getMachineFunction();
2782 37 : ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2783 : unsigned ARMPCLabelIndex = 0;
2784 : SDLoc DL(Op);
2785 37 : EVT PtrVT = getPointerTy(DAG.getDataLayout());
2786 37 : const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
2787 37 : SDValue CPAddr;
2788 37 : bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
2789 : if (!IsPositionIndependent) {
2790 19 : CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
2791 : } else {
2792 18 : unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
2793 : ARMPCLabelIndex = AFI->createPICLabelUId();
2794 : ARMConstantPoolValue *CPV =
2795 18 : ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
2796 : ARMCP::CPBlockAddress, PCAdj);
2797 18 : CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2798 : }
2799 37 : CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
2800 : SDValue Result = DAG.getLoad(
2801 : PtrVT, DL, DAG.getEntryNode(), CPAddr,
2802 37 : MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2803 37 : if (!IsPositionIndependent)
2804 19 : return Result;
2805 18 : SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
2806 18 : return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
2807 : }
2808 :
2809 : /// Convert a TLS address reference into the correct sequence of loads
2810 : /// and calls to compute the variable's address for Darwin, and return an
2811 : /// SDValue containing the final node.
2812 :
2813 : /// Darwin only has one TLS scheme which must be capable of dealing with the
2814 : /// fully general situation, in the worst case. This means:
2815 : /// + "extern __thread" declaration.
2816 : /// + Defined in a possibly unknown dynamic library.
2817 : ///
2818 : /// The general system is that each __thread variable has a [3 x i32] descriptor
2819 : /// which contains information used by the runtime to calculate the address. The
2820 : /// only part of this the compiler needs to know about is the first word, which
2821 : /// contains a function pointer that must be called with the address of the
2822 : /// entire descriptor in "r0".
2823 : ///
2824 : /// Since this descriptor may be in a different unit, in general access must
2825 : /// proceed along the usual ARM rules. A common sequence to produce is:
2826 : ///
2827 : /// movw rT1, :lower16:_var$non_lazy_ptr
2828 : /// movt rT1, :upper16:_var$non_lazy_ptr
2829 : /// ldr r0, [rT1]
2830 : /// ldr rT2, [r0]
2831 : /// blx rT2
2832 : /// [...address now in r0...]
2833 : SDValue
2834 79 : ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
2835 : SelectionDAG &DAG) const {
2836 : assert(Subtarget->isTargetDarwin() &&
2837 : "This function expects a Darwin target");
2838 : SDLoc DL(Op);
2839 :
2840 : // First step is to get the address of the actua global symbol. This is where
2841 : // the TLS descriptor lives.
2842 79 : SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
2843 :
2844 : // The first entry in the descriptor is a function pointer that we must call
2845 : // to obtain the address of the variable.
2846 79 : SDValue Chain = DAG.getEntryNode();
2847 : SDValue FuncTLVGet = DAG.getLoad(
2848 : MVT::i32, DL, Chain, DescAddr,
2849 : MachinePointerInfo::getGOT(DAG.getMachineFunction()),
2850 : /* Alignment = */ 4,
2851 : MachineMemOperand::MONonTemporal | MachineMemOperand::MODereferenceable |
2852 79 : MachineMemOperand::MOInvariant);
2853 79 : Chain = FuncTLVGet.getValue(1);
2854 :
2855 79 : MachineFunction &F = DAG.getMachineFunction();
2856 79 : MachineFrameInfo &MFI = F.getFrameInfo();
2857 : MFI.setAdjustsStack(true);
2858 :
2859 : // TLS calls preserve all registers except those that absolutely must be
2860 : // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
2861 : // silly).
2862 : auto TRI =
2863 79 : getTargetMachine().getSubtargetImpl(F.getFunction())->getRegisterInfo();
2864 : auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
2865 79 : const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());
2866 :
2867 : // Finally, we can make the call. This is just a degenerate version of a
2868 : // normal AArch64 call node: r0 takes the address of the descriptor, and
2869 : // returns the address of the variable in this thread.
2870 79 : Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
2871 79 : Chain =
2872 79 : DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
2873 : Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
2874 79 : DAG.getRegisterMask(Mask), Chain.getValue(1));
2875 79 : return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
2876 : }
2877 :
2878 : SDValue
2879 7 : ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
2880 : SelectionDAG &DAG) const {
2881 : assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
2882 :
2883 7 : SDValue Chain = DAG.getEntryNode();
2884 7 : EVT PtrVT = getPointerTy(DAG.getDataLayout());
2885 : SDLoc DL(Op);
2886 :
2887 : // Load the current TEB (thread environment block)
2888 : SDValue Ops[] = {Chain,
2889 7 : DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
2890 7 : DAG.getConstant(15, DL, MVT::i32),
2891 7 : DAG.getConstant(0, DL, MVT::i32),
2892 7 : DAG.getConstant(13, DL, MVT::i32),
2893 7 : DAG.getConstant(0, DL, MVT::i32),
2894 14 : DAG.getConstant(2, DL, MVT::i32)};
2895 : SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
2896 7 : DAG.getVTList(MVT::i32, MVT::Other), Ops);
2897 :
2898 7 : SDValue TEB = CurrentTEB.getValue(0);
2899 7 : Chain = CurrentTEB.getValue(1);
2900 :
2901 : // Load the ThreadLocalStoragePointer from the TEB
2902 : // A pointer to the TLS array is located at offset 0x2c from the TEB.
2903 : SDValue TLSArray =
2904 7 : DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
2905 7 : TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
2906 :
2907 : // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
2908 : // offset into the TLSArray.
2909 :
2910 : // Load the TLS index from the C runtime
2911 : SDValue TLSIndex =
2912 7 : DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
2913 7 : TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
2914 7 : TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
2915 :
2916 : SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
2917 7 : DAG.getConstant(2, DL, MVT::i32));
2918 : SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
2919 : DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
2920 7 : MachinePointerInfo());
2921 :
2922 : // Get the offset of the start of the .tls section (section base)
2923 : const auto *GA = cast<GlobalAddressSDNode>(Op);
2924 7 : auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
2925 : SDValue Offset = DAG.getLoad(
2926 : PtrVT, DL, Chain, DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
2927 : DAG.getTargetConstantPool(CPV, PtrVT, 4)),
2928 21 : MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2929 :
2930 7 : return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
2931 : }
2932 :
2933 : // Lower ISD::GlobalTLSAddress using the "general dynamic" model
2934 : SDValue
2935 15 : ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
2936 : SelectionDAG &DAG) const {
2937 : SDLoc dl(GA);
2938 15 : EVT PtrVT = getPointerTy(DAG.getDataLayout());
2939 15 : unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
2940 15 : MachineFunction &MF = DAG.getMachineFunction();
2941 15 : ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2942 : unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2943 : ARMConstantPoolValue *CPV =
2944 15 : ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
2945 : ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
2946 15 : SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2947 15 : Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
2948 15 : Argument = DAG.getLoad(
2949 : PtrVT, dl, DAG.getEntryNode(), Argument,
2950 15 : MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2951 : SDValue Chain = Argument.getValue(1);
2952 :
2953 15 : SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2954 15 : Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
2955 :
2956 : // call __tls_get_addr.
2957 : ArgListTy Args;
2958 : ArgListEntry Entry;
2959 15 : Entry.Node = Argument;
2960 15 : Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
2961 15 : Args.push_back(Entry);
2962 :
2963 : // FIXME: is there useful debug info available here?
2964 15 : TargetLowering::CallLoweringInfo CLI(DAG);
2965 15 : CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
2966 15 : CallingConv::C, Type::getInt32Ty(*DAG.getContext()),
2967 30 : DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
2968 :
2969 15 : std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2970 15 : return CallResult.first;
2971 : }
2972 :
2973 : // Lower ISD::GlobalTLSAddress using the "initial exec" or
2974 : // "local exec" model.
2975 : SDValue
2976 39 : ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
2977 : SelectionDAG &DAG,
2978 : TLSModel::Model model) const {
2979 39 : const GlobalValue *GV = GA->getGlobal();
2980 : SDLoc dl(GA);
2981 39 : SDValue Offset;
2982 39 : SDValue Chain = DAG.getEntryNode();
2983 39 : EVT PtrVT = getPointerTy(DAG.getDataLayout());
2984 : // Get the Thread Pointer
2985 39 : SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
2986 :
2987 39 : if (model == TLSModel::InitialExec) {
2988 18 : MachineFunction &MF = DAG.getMachineFunction();
2989 18 : ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2990 : unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2991 : // Initial exec model.
2992 18 : unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
2993 : ARMConstantPoolValue *CPV =
2994 18 : ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
2995 : ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF,
2996 : true);
2997 18 : Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2998 18 : Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
2999 18 : Offset = DAG.getLoad(
3000 : PtrVT, dl, Chain, Offset,
3001 18 : MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3002 18 : Chain = Offset.getValue(1);
3003 :
3004 18 : SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3005 18 : Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
3006 :
3007 18 : Offset = DAG.getLoad(
3008 : PtrVT, dl, Chain, Offset,
3009 18 : MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3010 : } else {
3011 : // local exec model
3012 : assert(model == TLSModel::LocalExec);
3013 : ARMConstantPoolValue *CPV =
3014 21 : ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF);
3015 21 : Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3016 21 : Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3017 21 : Offset = DAG.getLoad(
3018 : PtrVT, dl, Chain, Offset,
3019 21 : MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3020 : }
3021 :
3022 : // The address of the thread local variable is the add of the thread
3023 : // pointer with the offset of the variable.
3024 39 : return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
3025 : }
3026 :
3027 : SDValue
3028 219 : ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
3029 : GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3030 219 : if (DAG.getTarget().useEmulatedTLS())
3031 79 : return LowerToTLSEmulatedModel(GA, DAG);
3032 :
3033 140 : if (Subtarget->isTargetDarwin())
3034 79 : return LowerGlobalTLSAddressDarwin(Op, DAG);
3035 :
3036 61 : if (Subtarget->isTargetWindows())
3037 7 : return LowerGlobalTLSAddressWindows(Op, DAG);
3038 :
3039 : // TODO: implement the "local dynamic" model
3040 : assert(Subtarget->isTargetELF() && "Only ELF implemented here");
3041 54 : TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal());
3042 :
3043 54 : switch (model) {
3044 15 : case TLSModel::GeneralDynamic:
3045 : case TLSModel::LocalDynamic:
3046 15 : return LowerToTLSGeneralDynamicModel(GA, DAG);
3047 39 : case TLSModel::InitialExec:
3048 : case TLSModel::LocalExec:
3049 39 : return LowerToTLSExecModels(GA, DAG, model);
3050 : }
3051 0 : llvm_unreachable("bogus TLS model");
3052 : }
3053 :
3054 : /// Return true if all users of V are within function F, looking through
3055 : /// ConstantExprs.
3056 148 : static bool allUsersAreInFunction(const Value *V, const Function *F) {
3057 : SmallVector<const User*,4> Worklist;
3058 298 : for (auto *U : V->users())
3059 150 : Worklist.push_back(U);
3060 418 : while (!Worklist.empty()) {
3061 : auto *U = Worklist.pop_back_val();
3062 332 : if (isa<ConstantExpr>(U)) {
3063 366 : for (auto *UU : U->users())
3064 219 : Worklist.push_back(UU);
3065 : continue;
3066 : }
3067 :
3068 : auto *I = dyn_cast<Instruction>(U);
3069 173 : if (!I || I->getParent()->getParent() != F)
3070 : return false;
3071 : }
3072 : return true;
3073 : }
3074 :
3075 1388 : static SDValue promoteToConstantPool(const ARMTargetLowering *TLI,
3076 : const GlobalValue *GV, SelectionDAG &DAG,
3077 : EVT PtrVT, const SDLoc &dl) {
3078 : // If we're creating a pool entry for a constant global with unnamed address,
3079 : // and the global is small enough, we can emit it inline into the constant pool
3080 : // to save ourselves an indirection.
3081 : //
3082 : // This is a win if the constant is only used in one function (so it doesn't
3083 : // need to be duplicated) or duplicating the constant wouldn't increase code
3084 : // size (implying the constant is no larger than 4 bytes).
3085 1388 : const Function &F = DAG.getMachineFunction().getFunction();
3086 :
3087 : // We rely on this decision to inline being idemopotent and unrelated to the
3088 : // use-site. We know that if we inline a variable at one use site, we'll
3089 : // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3090 : // doesn't know about this optimization, so bail out if it's enabled else
3091 : // we could decide to inline here (and thus never emit the GV) but require
3092 : // the GV from fast-isel generated code.
3093 1388 : if (!EnableConstpoolPromotion ||
3094 218 : DAG.getMachineFunction().getTarget().Options.EnableFastISel)
3095 1170 : return SDValue();
3096 :
3097 : auto *GVar = dyn_cast<GlobalVariable>(GV);
3098 218 : if (!GVar || !GVar->hasInitializer() ||
3099 218 : !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3100 : !GVar->hasLocalLinkage())
3101 0 : return SDValue();
3102 :
3103 : // If we inline a value that contains relocations, we move the relocations
3104 : // from .data to .text. This is not allowed in position-independent code.
3105 : auto *Init = GVar->getInitializer();
3106 324 : if ((TLI->isPositionIndependent() || TLI->getSubtarget()->isROPI()) &&
3107 106 : Init->needsRelocation())
3108 6 : return SDValue();
3109 :
3110 : // The constant islands pass can only really deal with alignment requests
3111 : // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
3112 : // any type wanting greater alignment requirements than 4 bytes. We also
3113 : // can only promote constants that are multiples of 4 bytes in size or
3114 : // are paddable to a multiple of 4. Currently we only try and pad constants
3115 : // that are strings for simplicity.
3116 : auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
3117 212 : unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3118 212 : unsigned Align = DAG.getDataLayout().getPreferredAlignment(GVar);
3119 212 : unsigned RequiredPadding = 4 - (Size % 4);
3120 : bool PaddingPossible =
3121 212 : RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3122 212 : if (!PaddingPossible || Align > 4 || Size > ConstpoolPromotionMaxSize ||
3123 : Size == 0)
3124 64 : return SDValue();
3125 :
3126 148 : unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3127 148 : MachineFunction &MF = DAG.getMachineFunction();
3128 148 : ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3129 :
3130 : // We can't bloat the constant pool too much, else the ConstantIslands pass
3131 : // may fail to converge. If we haven't promoted this global yet (it may have
3132 : // multiple uses), and promoting it would increase the constant pool size (Sz
3133 : // > 4), ensure we have space to do so up to MaxTotal.
3134 148 : if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
3135 144 : if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
3136 : ConstpoolPromotionMaxTotal)
3137 0 : return SDValue();
3138 :
3139 : // This is only valid if all users are in a single function; we can't clone
3140 : // the constant in general. The LLVM IR unnamed_addr allows merging
3141 : // constants, but not cloning them.
3142 : //
3143 : // We could potentially allow cloning if we could prove all uses of the
3144 : // constant in the current function don't care about the address, like
3145 : // printf format strings. But that isn't implemented for now.
3146 148 : if (!allUsersAreInFunction(GVar, &F))
3147 62 : return SDValue();
3148 :
3149 : // We're going to inline this global. Pad it out if needed.
3150 86 : if (RequiredPadding != 4) {
3151 48 : StringRef S = CDAInit->getAsString();
3152 :
3153 48 : SmallVector<uint8_t,16> V(S.size());
3154 : std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
3155 132 : while (RequiredPadding--)
3156 84 : V.push_back(0);
3157 48 : Init = ConstantDataArray::get(*DAG.getContext(), V);
3158 : }
3159 :
3160 86 : auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
3161 : SDValue CPAddr =
3162 86 : DAG.getTargetConstantPool(CPVal, PtrVT, /*Align=*/4);
3163 86 : if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
3164 : AFI->markGlobalAsPromotedToConstantPool(GVar);
3165 172 : AFI->setPromotedConstpoolIncrease(AFI->getPromotedConstpoolIncrease() +
3166 86 : PaddedSize - 4);
3167 : }
3168 : ++NumConstpoolPromoted;
3169 86 : return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3170 : }
3171 :
3172 1605 : bool ARMTargetLowering::isReadOnly(const GlobalValue *GV) const {
3173 : if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
3174 4 : GV = GA->getBaseObject();
3175 1605 : return (isa<GlobalVariable>(GV) && cast<GlobalVariable>(GV)->isConstant()) ||
3176 1605 : isa<Function>(GV);
3177 : }
3178 :
3179 2185 : SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
3180 : SelectionDAG &DAG) const {
3181 2185 : switch (Subtarget->getTargetTriple().getObjectFormat()) {
3182 0 : default: llvm_unreachable("unknown object format");
3183 52 : case Triple::COFF:
3184 52 : return LowerGlobalAddressWindows(Op, DAG);
3185 1541 : case Triple::ELF:
3186 1541 : return LowerGlobalAddressELF(Op, DAG);
3187 592 : case Triple::MachO:
3188 592 : return LowerGlobalAddressDarwin(Op, DAG);
3189 : }
3190 : }
3191 :
3192 1541 : SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
3193 : SelectionDAG &DAG) const {
3194 1541 : EVT PtrVT = getPointerTy(DAG.getDataLayout());
3195 : SDLoc dl(Op);
3196 1541 : const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3197 1541 : const TargetMachine &TM = getTargetMachine();
3198 1541 : bool IsRO = isReadOnly(GV);
3199 :
3200 : // promoteToConstantPool only if not generating XO text section
3201 1541 : if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly())
3202 1388 : if (SDValue V = promoteToConstantPool(this, GV, DAG, PtrVT, dl))
3203 86 : return V;
3204 :
3205 1455 : if (isPositionIndependent()) {
3206 123 : bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
3207 : SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3208 246 : UseGOT_PREL ? ARMII::MO_GOT : 0);
3209 123 : SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3210 123 : if (UseGOT_PREL)
3211 61 : Result =
3212 61 : DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3213 61 : MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3214 123 : return Result;
3215 1332 : } else if (Subtarget->isROPI() && IsRO) {
3216 : // PC-relative.
3217 60 : SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
3218 60 : SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3219 60 : return Result;
3220 1272 : } else if (Subtarget->isRWPI() && !IsRO) {
3221 : // SB-relative.
3222 30 : SDValue RelAddr;
3223 30 : if (Subtarget->useMovt(DAG.getMachineFunction())) {
3224 : ++NumMovwMovt;
3225 12 : SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
3226 12 : RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
3227 : } else { // use literal pool for address constant
3228 : ARMConstantPoolValue *CPV =
3229 18 : ARMConstantPoolConstant::Create(GV, ARMCP::SBREL);
3230 18 : SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3231 18 : CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3232 18 : RelAddr = DAG.getLoad(
3233 : PtrVT, dl, DAG.getEntryNode(), CPAddr,
3234 18 : MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3235 : }
3236 30 : SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
3237 30 : SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
3238 30 : return Result;
3239 : }
3240 :
3241 : // If we have T2 ops, we can materialize the address directly via movt/movw
3242 : // pair. This is always cheaper.
3243 1242 : if (Subtarget->useMovt(DAG.getMachineFunction())) {
3244 : ++NumMovwMovt;
3245 : // FIXME: Once remat is capable of dealing with instructions with register
3246 : // operands, expand this into two nodes.
3247 : return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
3248 764 : DAG.getTargetGlobalAddress(GV, dl, PtrVT));
3249 : } else {
3250 478 : SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
3251 478 : CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3252 : return DAG.getLoad(
3253 : PtrVT, dl, DAG.getEntryNode(), CPAddr,
3254 478 : MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3255 : }
3256 : }
3257 :
3258 671 : SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
3259 : SelectionDAG &DAG) const {
3260 : assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3261 : "ROPI/RWPI not currently supported for Darwin");
3262 671 : EVT PtrVT = getPointerTy(DAG.getDataLayout());
3263 : SDLoc dl(Op);
3264 671 : const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3265 :
3266 671 : if (Subtarget->useMovt(DAG.getMachineFunction()))
3267 : ++NumMovwMovt;
3268 :
3269 : // FIXME: Once remat is capable of dealing with instructions with register
3270 : // operands, expand this into multiple nodes
3271 : unsigned Wrapper =
3272 671 : isPositionIndependent() ? ARMISD::WrapperPIC : ARMISD::Wrapper;
3273 :
3274 671 : SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
3275 671 : SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
3276 :
3277 671 : if (Subtarget->isGVIndirectSymbol(GV))
3278 376 : Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3279 376 : MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3280 671 : return Result;
3281 : }
3282 :
3283 52 : SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
3284 : SelectionDAG &DAG) const {
3285 : assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
3286 : assert(Subtarget->useMovt(DAG.getMachineFunction()) &&
3287 : "Windows on ARM expects to use movw/movt");
3288 : assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3289 : "ROPI/RWPI not currently supported for Windows");
3290 :
3291 52 : const TargetMachine &TM = getTargetMachine();
3292 52 : const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3293 : ARMII::TOF TargetFlags = ARMII::MO_NO_FLAG;
3294 52 : if (GV->hasDLLImportStorageClass())
3295 : TargetFlags = ARMII::MO_DLLIMPORT;
3296 49 : else if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
3297 : TargetFlags = ARMII::MO_COFFSTUB;
3298 52 : EVT PtrVT = getPointerTy(DAG.getDataLayout());
3299 : SDValue Result;
3300 : SDLoc DL(Op);
3301 :
3302 : ++NumMovwMovt;
3303 :
3304 : // FIXME: Once remat is capable of dealing with instructions with register
3305 : // operands, expand this into two nodes.
3306 52 : Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
3307 : DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0,
3308 52 : TargetFlags));
3309 52 : if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
3310 6 : Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3311 6 : MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3312 52 : return Result;
3313 : }
3314 :
3315 : SDValue
3316 8 : ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
3317 : SDLoc dl(Op);
3318 8 : SDValue Val = DAG.getConstant(0, dl, MVT::i32);
3319 : return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
3320 : DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
3321 8 : Op.getOperand(1), Val);
3322 : }
3323 :
3324 : SDValue
3325 9 : ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
3326 : SDLoc dl(Op);
3327 : return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
3328 9 : Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
3329 : }
3330 :
3331 32 : SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
3332 : SelectionDAG &DAG) const {
3333 : SDLoc dl(Op);
3334 : return DAG.getNode(ARMISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other,
3335 32 : Op.getOperand(0));
3336 : }
3337 :
3338 : SDValue
3339 2477 : ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
3340 : const ARMSubtarget *Subtarget) const {
3341 4954 : unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3342 : SDLoc dl(Op);
3343 2477 : switch (IntNo) {
3344 2362 : default: return SDValue(); // Don't custom lower most intrinsics.
3345 1 : case Intrinsic::thread_pointer: {
3346 1 : EVT PtrVT = getPointerTy(DAG.getDataLayout());
3347 1 : return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
3348 : }
3349 32 : case Intrinsic::eh_sjlj_lsda: {
3350 32 : MachineFunction &MF = DAG.getMachineFunction();
3351 32 : ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3352 : unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3353 32 : EVT PtrVT = getPointerTy(DAG.getDataLayout());
3354 : SDValue CPAddr;
3355 32 : bool IsPositionIndependent = isPositionIndependent();
3356 32 : unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
3357 : ARMConstantPoolValue *CPV =
3358 32 : ARMConstantPoolConstant::Create(&MF.getFunction(), ARMPCLabelIndex,
3359 : ARMCP::CPLSDA, PCAdj);
3360 32 : CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3361 32 : CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3362 : SDValue Result = DAG.getLoad(
3363 : PtrVT, dl, DAG.getEntryNode(), CPAddr,
3364 32 : MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3365 :
3366 32 : if (IsPositionIndependent) {
3367 25 : SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3368 25 : Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
3369 : }
3370 32 : return Result;
3371 : }
3372 : case Intrinsic::arm_neon_vabs:
3373 16 : return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),
3374 32 : Op.getOperand(1));
3375 15 : case Intrinsic::arm_neon_vmulls:
3376 : case Intrinsic::arm_neon_vmullu: {
3377 : unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
3378 15 : ? ARMISD::VMULLs : ARMISD::VMULLu;
3379 15 : return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3380 30 : Op.getOperand(1), Op.getOperand(2));
3381 : }
3382 8 : case Intrinsic::arm_neon_vminnm:
3383 : case Intrinsic::arm_neon_vmaxnm: {
3384 : unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
3385 8 : ? ISD::FMINNUM : ISD::FMAXNUM;
3386 8 : return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3387 16 : Op.getOperand(1), Op.getOperand(2));
3388 : }
3389 16 : case Intrinsic::arm_neon_vminu:
3390 : case Intrinsic::arm_neon_vmaxu: {
3391 16 : if (Op.getValueType().isFloatingPoint())
3392 0 : return SDValue();
3393 : unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
3394 16 : ? ISD::UMIN : ISD::UMAX;
3395 16 : return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3396 32 : Op.getOperand(1), Op.getOperand(2));
3397 : }
3398 22 : case Intrinsic::arm_neon_vmins:
3399 : case Intrinsic::arm_neon_vmaxs: {
3400 : // v{min,max}s is overloaded between signed integers and floats.
3401 22 : if (!Op.getValueType().isFloatingPoint()) {
3402 : unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3403 12 : ? ISD::SMIN : ISD::SMAX;
3404 12 : return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3405 24 : Op.getOperand(1), Op.getOperand(2));
3406 : }
3407 : unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3408 10 : ? ISD::FMINNAN : ISD::FMAXNAN;
3409 10 : return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3410 20 : Op.getOperand(1), Op.getOperand(2));
3411 : }
3412 : case Intrinsic::arm_neon_vtbl1:
3413 1 : return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
3414 2 : Op.getOperand(1), Op.getOperand(2));
3415 : case Intrinsic::arm_neon_vtbl2:
3416 4 : return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
3417 8 : Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3418 : }
3419 : }
3420 :
3421 21 : static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
3422 : const ARMSubtarget *Subtarget) {
3423 : SDLoc dl(Op);
3424 : ConstantSDNode *SSIDNode = cast<ConstantSDNode>(Op.getOperand(2));
3425 21 : auto SSID = static_cast<SyncScope::ID>(SSIDNode->getZExtValue());
3426 21 : if (SSID == SyncScope::SingleThread)
3427 6 : return Op;
3428 :
3429 15 : if (!Subtarget->hasDataBarrier()) {
3430 : // Some ARMv6 cpus can support data barriers with an mcr instruction.
3431 : // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
3432 : // here.
3433 : assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
3434 : "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
3435 : return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
3436 0 : DAG.getConstant(0, dl, MVT::i32));
3437 : }
3438 :
3439 : ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
3440 15 : AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
3441 : ARM_MB::MemBOpt Domain = ARM_MB::ISH;
3442 15 : if (Subtarget->isMClass()) {
3443 : // Only a full system barrier exists in the M-class architectures.
3444 : Domain = ARM_MB::SY;
3445 13 : } else if (Subtarget->preferISHSTBarriers() &&
3446 : Ord == AtomicOrdering::Release) {
3447 : // Swift happens to implement ISHST barriers in a way that's compatible with
3448 : // Release semantics but weaker than ISH so we'd be fools not to use
3449 : // it. Beware: other processors probably don't!
3450 : Domain = ARM_MB::ISHST;
3451 : }
3452 :
3453 : return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
3454 : DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
3455 15 : DAG.getConstant(Domain, dl, MVT::i32));
3456 : }
3457 :
3458 0 : static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
3459 : const ARMSubtarget *Subtarget) {
3460 : // ARM pre v5TE and Thumb1 does not have preload instructions.
3461 0 : if (!(Subtarget->isThumb2() ||
3462 0 : (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
3463 : // Just preserve the chain.
3464 0 : return Op.getOperand(0);
3465 :
3466 0 : SDLoc dl(Op);
3467 0 : unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
3468 0 : if (!isRead &&
3469 0 : (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
3470 : // ARMv7 with MP extension has PLDW.
3471 0 : return Op.getOperand(0);
3472 :
3473 0 : unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
3474 0 : if (Subtarget->isThumb()) {
3475 : // Invert the bits.
3476 0 : isRead = ~isRead & 1;
3477 0 : isData = ~isData & 1;
3478 : }
3479 :
3480 : return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
3481 : Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
3482 0 : DAG.getConstant(isData, dl, MVT::i32));
3483 : }
3484 :
3485 0 : static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
3486 0 : MachineFunction &MF = DAG.getMachineFunction();
3487 0 : ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
3488 :
3489 : // vastart just stores the address of the VarArgsFrameIndex slot into the
3490 : // memory location argument.
3491 0 : SDLoc dl(Op);
3492 0 : EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
3493 0 : SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3494 0 : const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3495 : return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3496 0 : MachinePointerInfo(SV));
3497 : }
3498 :
3499 1075 : SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
3500 : CCValAssign &NextVA,
3501 : SDValue &Root,
3502 : SelectionDAG &DAG,
3503 : const SDLoc &dl) const {
3504 1075 : MachineFunction &MF = DAG.getMachineFunction();
3505 1075 : ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3506 :
3507 : const TargetRegisterClass *RC;
3508 1075 : if (AFI->isThumb1OnlyFunction())
3509 : RC = &ARM::tGPRRegClass;
3510 : else
3511 : RC = &ARM::GPRRegClass;
3512 :
3513 : // Transform the arguments stored in physical registers into virtual ones.
3514 1075 : unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3515 1075 : SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3516 :
3517 : SDValue ArgValue2;
3518 1075 : if (NextVA.isMemLoc()) {
3519 29 : MachineFrameInfo &MFI = MF.getFrameInfo();
3520 29 : int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);
3521 :
3522 : // Create load node to retrieve arguments from the stack.
3523 58 : SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3524 29 : ArgValue2 = DAG.getLoad(
3525 : MVT::i32, dl, Root, FIN,
3526 29 : MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3527 : } else {
3528 1046 : Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
3529 1046 : ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3530 : }
3531 1075 : if (!Subtarget->isLittle())
3532 : std::swap (ArgValue, ArgValue2);
3533 1075 : return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
3534 : }
3535 :
3536 : // The remaining GPRs hold either the beginning of variable-argument
3537 : // data, or the beginning of an aggregate passed by value (usually
3538 : // byval). Either way, we allocate stack slots adjacent to the data
3539 : // provided by our caller, and store the unallocated registers there.
3540 : // If this is a variadic function, the va_list pointer will begin with
3541 : // these values; otherwise, this reassembles a (byval) structure that
3542 : // was split between registers and memory.
3543 : // Return: The frame index registers were stored into.
3544 81 : int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
3545 : const SDLoc &dl, SDValue &Chain,
3546 : const Value *OrigArg,
3547 : unsigned InRegsParamRecordIdx,
3548 : int ArgOffset, unsigned ArgSize) const {
3549 : // Currently, two use-cases possible:
3550 : // Case #1. Non-var-args function, and we meet first byval parameter.
3551 : // Setup first unallocated register as first byval register;
3552 : // eat all remained registers
3553 : // (these two actions are performed by HandleByVal method).
3554 : // Then, here, we initialize stack frame with
3555 : // "store-reg" instructions.
3556 : // Case #2. Var-args function, that doesn't contain byval parameters.
3557 : // The same: eat all remained unallocated registers,
3558 : // initialize stack frame.
3559 :
3560 81 : MachineFunction &MF = DAG.getMachineFunction();
3561 81 : MachineFrameInfo &MFI = MF.getFrameInfo();
3562 81 : ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3563 : unsigned RBegin, REnd;
3564 81 : if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
3565 : CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
3566 : } else {
3567 : unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3568 47 : RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
3569 : REnd = ARM::R4;
3570 : }
3571 :
3572 81 : if (REnd != RBegin)
3573 67 : ArgOffset = -4 * (ARM::R4 - RBegin);
3574 :
3575 81 : auto PtrVT = getPointerTy(DAG.getDataLayout());
3576 81 : int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
3577 81 : SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
3578 :
3579 : SmallVector<SDValue, 4> MemOps;
3580 : const TargetRegisterClass *RC =
3581 81 : AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
3582 :
3583 261 : for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
3584 180 : unsigned VReg = MF.addLiveIn(Reg, RC);
3585 180 : SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
3586 : SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3587 256 : MachinePointerInfo(OrigArg, 4 * i));
3588 180 : MemOps.push_back(Store);
3589 180 : FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
3590 : }
3591 :
3592 81 : if (!MemOps.empty())
3593 67 : Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3594 81 : return FrameIndex;
3595 : }
3596 :
3597 : // Setup stack frame, the va_list pointer will start from.
3598 34 : void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
3599 : const SDLoc &dl, SDValue &Chain,
3600 : unsigned ArgOffset,
3601 : unsigned TotalArgRegsSaveSize,
3602 : bool ForceMutable) const {
3603 34 : MachineFunction &MF = DAG.getMachineFunction();
3604 34 : ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3605 :
3606 : // Try to store any remaining integer argument regs
3607 : // to their spots on the stack so that they may be loaded by dereferencing
3608 : // the result of va_next.
3609 : // If there is no regs to be stored, just point address after last
3610 : // argument passed via stack.
3611 34 : int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
3612 : CCInfo.getInRegsParamsCount(),
3613 34 : CCInfo.getNextStackOffset(), 4);
3614 : AFI->setVarArgsFrameIndex(FrameIndex);
3615 34 : }
3616 :
3617 13769 : SDValue ARMTargetLowering::LowerFormalArguments(
3618 : SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3619 : const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3620 : SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3621 13769 : MachineFunction &MF = DAG.getMachineFunction();
3622 13769 : MachineFrameInfo &MFI = MF.getFrameInfo();
3623 :
3624 13769 : ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3625 :
3626 : // Assign locations to all of the incoming arguments.
3627 : SmallVector<CCValAssign, 16> ArgLocs;
3628 : CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3629 27538 : *DAG.getContext());
3630 13769 : CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
3631 :
3632 : SmallVector<SDValue, 16> ArgValues;
3633 13769 : SDValue ArgValue;
3634 13769 : Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin();
3635 : unsigned CurArgIdx = 0;
3636 :
3637 : // Initially ArgRegsSaveSize is zero.
3638 : // Then we increase this value each time we meet byval parameter.
3639 : // We also increase this value in case of varargs function.
3640 : AFI->setArgRegsSaveSize(0);
3641 :
3642 : // Calculate the amount of stack space that we need to allocate to store
3643 : // byval and variadic arguments that are passed in registers.
3644 : // We need to know this before we allocate the first byval or variadic
3645 : // argument, as they will be allocated a stack slot below the CFA (Canonical
3646 : // Frame Address, the stack pointer at entry to the function).
3647 13769 : unsigned ArgRegBegin = ARM::R4;
3648 13823 : for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3649 22592 : if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
3650 : break;
3651 :
3652 54 : CCValAssign &VA = ArgLocs[i];
3653 54 : unsigned Index = VA.getValNo();
3654 108 : ISD::ArgFlagsTy Flags = Ins[Index].Flags;
3655 54 : if (!Flags.isByVal())
3656 20 : continue;
3657 :
3658 : assert(VA.isMemLoc() && "unexpected byval pointer in reg");
3659 : unsigned RBegin, REnd;
3660 : CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
3661 34 : ArgRegBegin = std::min(ArgRegBegin, RBegin);
3662 :
3663 : CCInfo.nextInRegsParam();
3664 : }
3665 : CCInfo.rewindByValRegsInfo();
3666 :
3667 : int lastInsIndex = -1;
3668 13769 : if (isVarArg && MFI.hasVAStart()) {
3669 : unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3670 34 : if (RegIdx != array_lengthof(GPRArgRegs))
3671 66 : ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
3672 : }
3673 :
3674 13769 : unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
3675 : AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
3676 13769 : auto PtrVT = getPointerTy(DAG.getDataLayout());
3677 :
3678 36693 : for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3679 22924 : CCValAssign &VA = ArgLocs[i];
3680 45848 : if (Ins[VA.getValNo()].isOrigArg()) {
3681 22665 : std::advance(CurOrigArg,
3682 : Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
3683 : CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
3684 : }
3685 : // Arguments stored in registers.
3686 22924 : if (VA.isRegLoc()) {
3687 : EVT RegVT = VA.getLocVT();
3688 :
3689 21302 : if (VA.needsCustom()) {
3690 : // f64 and vector types are split up into multiple registers or
3691 : // combinations of registers and stack slots.
3692 878 : if (VA.getLocVT() == MVT::v2f64) {
3693 216 : SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
3694 432 : Chain, DAG, dl);
3695 216 : VA = ArgLocs[++i]; // skip ahead to next loc
3696 216 : SDValue ArgValue2;
3697 216 : if (VA.isMemLoc()) {
3698 19 : int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
3699 19 : SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3700 19 : ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
3701 : MachinePointerInfo::getFixedStack(
3702 19 : DAG.getMachineFunction(), FI));
3703 : } else {
3704 197 : ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
3705 197 : Chain, DAG, dl);
3706 : }
3707 216 : ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
3708 216 : ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3709 : ArgValue, ArgValue1,
3710 216 : DAG.getIntPtrConstant(0, dl));
3711 216 : ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3712 : ArgValue, ArgValue2,
3713 216 : DAG.getIntPtrConstant(1, dl));
3714 : } else
3715 1324 : ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
3716 : } else {
3717 : const TargetRegisterClass *RC;
3718 :
3719 :
3720 : if (RegVT == MVT::f16)
3721 : RC = &ARM::HPRRegClass;
3722 : else if (RegVT == MVT::f32)
3723 : RC = &ARM::SPRRegClass;
3724 : else if (RegVT == MVT::f64 || RegVT == MVT::v4f16)
3725 : RC = &ARM::DPRRegClass;
3726 : else if (RegVT == MVT::v2f64 || RegVT == MVT::v8f16)
3727 : RC = &ARM::QPRRegClass;
3728 : else if (RegVT == MVT::i32)
3729 17817 : RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
3730 : : &ARM::GPRRegClass;
3731 : else
3732 0 : llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
3733 :
3734 : // Transform the arguments in physical registers into virtual ones.
3735 20424 : unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3736 20424 : ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3737 : }
3738 :
3739 : // If this is an 8 or 16-bit value, it is really passed promoted
3740 : // to 32 bits. Insert an assert[sz]ext to capture this, then
3741 : // truncate to the right size.
3742 21302 : switch (VA.getLocInfo()) {
3743 0 : default: llvm_unreachable("Unknown loc info!");
3744 : case CCValAssign::Full: break;
3745 : case CCValAssign::BCvt:
3746 2079 : ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
3747 2079 : break;
3748 : case CCValAssign::SExt:
3749 0 : ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3750 0 : DAG.getValueType(VA.getValVT()));
3751 0 : ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3752 0 : break;
3753 : case CCValAssign::ZExt:
3754 0 : ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3755 0 : DAG.getValueType(VA.getValVT()));
3756 0 : ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3757 0 : break;
3758 : }
3759 :
3760 21302 : InVals.push_back(ArgValue);
3761 : } else { // VA.isRegLoc()
3762 : // sanity check
3763 : assert(VA.isMemLoc());
3764 : assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
3765 :
3766 1622 : int index = VA.getValNo();
3767 :
3768 : // Some Ins[] entries become multiple ArgLoc[] entries.
3769 : // Process them only once.
3770 1622 : if (index != lastInsIndex)
3771 : {
3772 3244 : ISD::ArgFlagsTy Flags = Ins[index].Flags;
3773 : // FIXME: For now, all byval parameter objects are marked mutable.
3774 : // This can be changed with more analysis.
3775 : // In case of tail call optimization mark all arguments mutable.
3776 : // Since they could be overwritten by lowering of arguments in case of
3777 : // a tail call.
3778 1622 : if (Flags.isByVal()) {
3779 : assert(Ins[index].isOrigArg() &&
3780 : "Byval arguments cannot be implicit");
3781 47 : unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
3782 :
3783 47 : int FrameIndex = StoreByValRegs(
3784 : CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
3785 47 : VA.getLocMemOffset(), Flags.getByValSize());
3786 94 : InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
3787 : CCInfo.nextInRegsParam();
3788 : } else {
3789 1575 : unsigned FIOffset = VA.getLocMemOffset();
3790 1575 : int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
3791 : FIOffset, true);
3792 :
3793 : // Create load nodes to retrieve arguments from the stack.
3794 1575 : SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3795 3150 : InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
3796 : MachinePointerInfo::getFixedStack(
3797 3150 : DAG.getMachineFunction(), FI)));
3798 : }
3799 : lastInsIndex = index;
3800 : }
3801 : }
3802 : }
3803 :
3804 : // varargs
3805 13769 : if (isVarArg && MFI.hasVAStart())
3806 34 : VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
3807 : CCInfo.getNextStackOffset(),
3808 : TotalArgRegsSaveSize);
3809 :
3810 13769 : AFI->setArgumentStackSize(CCInfo.getNextStackOffset());
3811 :
3812 13769 : return Chain;
3813 : }
3814 :
3815 : /// isFloatingPointZero - Return true if this is +0.0.
3816 0 : static bool isFloatingPointZero(SDValue Op) {
3817 : if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
3818 0 : return CFP->getValueAPF().isPosZero();
3819 : else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
3820 : // Maybe this has already been legalized into the constant pool?
3821 0 : if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
3822 0 : SDValue WrapperOp = Op.getOperand(1).getOperand(0);
3823 : if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
3824 0 : if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
3825 0 : return CFP->getValueAPF().isPosZero();
3826 : }
3827 0 : } else if (Op->getOpcode() == ISD::BITCAST &&
3828 0 : Op->getValueType(0) == MVT::f64) {
3829 : // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
3830 : // created by LowerConstantFP().
3831 0 : SDValue BitcastOp = Op->getOperand(0);
3832 0 : if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
3833 0 : isNullConstant(BitcastOp->getOperand(0)))
3834 0 : return true;
3835 : }
3836 : return false;
3837 : }
3838 :
3839 : /// Returns appropriate ARM CMP (cmp) and corresponding condition code for
3840 : /// the given operands.
3841 3223 : SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
3842 : SDValue &ARMcc, SelectionDAG &DAG,
3843 : const SDLoc &dl) const {
3844 : if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
3845 2641 : unsigned C = RHSC->getZExtValue();
3846 2641 : if (!isLegalICmpImmediate((int32_t)C)) {
3847 : // Constant does not fit, try adjusting it by one.
3848 104 : switch (CC) {
3849 : default: break;
3850 25 : case ISD::SETLT:
3851 : case ISD::SETGE:
3852 25 : if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
3853 4 : CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
3854 4 : RHS = DAG.getConstant(C - 1, dl, MVT::i32);
3855 : }
3856 : break;
3857 3 : case ISD::SETULT:
3858 : case ISD::SETUGE:
3859 3 : if (C != 0 && isLegalICmpImmediate(C-1)) {
3860 1 : CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
3861 1 : RHS = DAG.getConstant(C - 1, dl, MVT::i32);
3862 : }
3863 : break;
3864 49 : case ISD::SETLE:
3865 : case ISD::SETGT:
3866 49 : if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
3867 42 : CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
3868 42 : RHS = DAG.getConstant(C + 1, dl, MVT::i32);
3869 : }
3870 : break;
3871 5 : case ISD::SETULE:
3872 : case ISD::SETUGT:
3873 5 : if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
3874 0 : CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
3875 0 : RHS = DAG.getConstant(C + 1, dl, MVT::i32);
3876 : }
3877 : break;
3878 : }
3879 : }
3880 24 : } else if ((ARM_AM::getShiftOpcForNode(LHS.getOpcode()) != ARM_AM::no_shift) &&
3881 : (ARM_AM::getShiftOpcForNode(RHS.getOpcode()) == ARM_AM::no_shift)) {
3882 : // In ARM and Thumb-2, the compare instructions can shift their second
3883 : // operand.
3884 24 : CC = ISD::getSetCCSwappedOperands(CC);
3885 : std::swap(LHS, RHS);
3886 : }
3887 :
3888 3223 : ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
3889 : ARMISD::NodeType CompareType;
3890 3223 : switch (CondCode) {
3891 : default:
3892 : CompareType = ARMISD::CMP;
3893 : break;
3894 2374 : case ARMCC::EQ:
3895 : case ARMCC::NE:
3896 : // Uses only Z Flag
3897 : CompareType = ARMISD::CMPZ;
3898 2374 : break;
3899 : }
3900 3223 : ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
3901 3223 : return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
3902 : }
3903 :
3904 : /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
3905 599 : SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
3906 : SelectionDAG &DAG, const SDLoc &dl,
3907 : bool InvalidOnQNaN) const {
3908 : assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64);
3909 599 : SDValue Cmp;
3910 599 : SDValue C = DAG.getConstant(InvalidOnQNaN, dl, MVT::i32);
3911 599 : if (!isFloatingPointZero(RHS))
3912 517 : Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS, C);
3913 : else
3914 82 : Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS, C);
3915 599 : return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
3916 : }
3917 :
3918 : /// duplicateCmp - Glue values can have only one use, so this function
3919 : /// duplicates a comparison node.
3920 : SDValue
3921 2 : ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
3922 : unsigned Opc = Cmp.getOpcode();
3923 : SDLoc DL(Cmp);
3924 2 : if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
3925 1 : return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
3926 :
3927 : assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
3928 1 : Cmp = Cmp.getOperand(0);
3929 : Opc = Cmp.getOpcode();
3930 1 : if (Opc == ARMISD::CMPFP)
3931 0 : Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
3932 0 : Cmp.getOperand(1), Cmp.getOperand(2));
3933 : else {
3934 : assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
3935 1 : Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
3936 1 : Cmp.getOperand(1));
3937 : }
3938 1 : return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
3939 : }
3940 :
3941 : // This function returns three things: the arithmetic computation itself
3942 : // (Value), a comparison (OverflowCmp), and a condition code (ARMcc). The
3943 : // comparison and the condition code define the case in which the arithmetic
3944 : // computation *does not* overflow.
3945 : std::pair<SDValue, SDValue>
3946 36 : ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
3947 : SDValue &ARMcc) const {
3948 : assert(Op.getValueType() == MVT::i32 && "Unsupported value type");
3949 :
3950 : SDValue Value, OverflowCmp;
3951 36 : SDValue LHS = Op.getOperand(0);
3952 36 : SDValue RHS = Op.getOperand(1);
3953 : SDLoc dl(Op);
3954 :
3955 : // FIXME: We are currently always generating CMPs because we don't support
3956 : // generating CMN through the backend. This is not as good as the natural
3957 : // CMP case because it causes a register dependency and cannot be folded
3958 : // later.
3959 :
3960 36 : switch (Op.getOpcode()) {
3961 0 : default:
3962 0 : llvm_unreachable("Unknown overflow instruction!");
3963 : case ISD::SADDO:
3964 19 : ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
3965 19 : Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
3966 19 : OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
3967 19 : break;
3968 : case ISD::UADDO:
3969 2 : ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
3970 : // We use ADDC here to correspond to its use in LowerUnsignedALUO.
3971 : // We do not use it in the USUBO case as Value may not be used.
3972 4 : Value = DAG.getNode(ARMISD::ADDC, dl,
3973 2 : DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS)
3974 2 : .getValue(0);
3975 2 : OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
3976 2 : break;
3977 : case ISD::SSUBO:
3978 11 : ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
3979 11 : Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
3980 11 : OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
3981 11 : break;
3982 : case ISD::USUBO:
3983 2 : ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
3984 2 : Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
3985 2 : OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
3986 2 : break;
3987 : case ISD::UMULO:
3988 : // We generate a UMUL_LOHI and then check if the high word is 0.
3989 1 : ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
3990 1 : Value = DAG.getNode(ISD::UMUL_LOHI, dl,
3991 : DAG.getVTList(Op.getValueType(), Op.getValueType()),
3992 1 : LHS, RHS);
3993 1 : OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
3994 1 : DAG.getConstant(0, dl, MVT::i32));
3995 : Value = Value.getValue(0); // We only want the low 32 bits for the result.
3996 1 : break;
3997 : case ISD::SMULO:
3998 : // We generate a SMUL_LOHI and then check if all the bits of the high word
3999 : // are the same as the sign bit of the low word.
4000 1 : ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
4001 1 : Value = DAG.getNode(ISD::SMUL_LOHI, dl,
4002 : DAG.getVTList(Op.getValueType(), Op.getValueType()),
4003 1 : LHS, RHS);
4004 1 : OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
4005 : DAG.getNode(ISD::SRA, dl, Op.getValueType(),
4006 : Value.getValue(0),
4007 1 : DAG.getConstant(31, dl, MVT::i32)));
4008 : Value = Value.getValue(0); // We only want the low 32 bits for the result.
4009 1 : break;
4010 : } // switch (...)
4011 :
4012 36 : return std::make_pair(Value, OverflowCmp);
4013 : }
4014 :
4015 : SDValue
4016 18 : ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const {
4017 : // Let legalize expand this if it isn't a legal type yet.
4018 18 : if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
4019 0 : return SDValue();
4020 :
4021 : SDValue Value, OverflowCmp;
4022 18 : SDValue ARMcc;
4023 18 : std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
4024 18 : SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4025 : SDLoc dl(Op);
4026 : // We use 0 and 1 as false and true values.
4027 18 : SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
4028 18 : SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
4029 18 : EVT VT = Op.getValueType();
4030 :
4031 : SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal,
4032 18 : ARMcc, CCR, OverflowCmp);
4033 :
4034 18 : SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
4035 18 : return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
4036 : }
4037 :
4038 652 : static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry,
4039 : SelectionDAG &DAG) {
4040 : SDLoc DL(BoolCarry);
4041 652 : EVT CarryVT = BoolCarry.getValueType();
4042 :
4043 : // This converts the boolean value carry into the carry flag by doing
4044 : // ARMISD::SUBC Carry, 1
4045 : SDValue Carry = DAG.getNode(ARMISD::SUBC, DL,
4046 : DAG.getVTList(CarryVT, MVT::i32),
4047 652 : BoolCarry, DAG.getConstant(1, DL, CarryVT));
4048 652 : return Carry.getValue(1);
4049 : }
4050 :
4051 1218 : static SDValue ConvertCarryFlagToBooleanCarry(SDValue Flags, EVT VT,
4052 : SelectionDAG &DAG) {
4053 : SDLoc DL(Flags);
4054 :
4055 : // Now convert the carry flag into a boolean carry. We do this
4056 : // using ARMISD:ADDE 0, 0, Carry
4057 : return DAG.getNode(ARMISD::ADDE, DL, DAG.getVTList(VT, MVT::i32),
4058 : DAG.getConstant(0, DL, MVT::i32),
4059 1218 : DAG.getConstant(0, DL, MVT::i32), Flags);
4060 : }
4061 :
4062 653 : SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
4063 : SelectionDAG &DAG) const {
4064 : // Let legalize expand this if it isn't a legal type yet.
4065 653 : if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
4066 0 : return SDValue();
4067 :
4068 653 : SDValue LHS = Op.getOperand(0);
4069 653 : SDValue RHS = Op.getOperand(1);
4070 : SDLoc dl(Op);
4071 :
4072 653 : EVT VT = Op.getValueType();
4073 653 : SDVTList VTs = DAG.getVTList(VT, MVT::i32);
4074 653 : SDValue Value;
4075 653 : SDValue Overflow;
4076 653 : switch (Op.getOpcode()) {
4077 0 : default:
4078 0 : llvm_unreachable("Unknown overflow instruction!");
4079 452 : case ISD::UADDO:
4080 452 : Value = DAG.getNode(ARMISD::ADDC, dl, VTs, LHS, RHS);
4081 : // Convert the carry flag into a boolean value.
4082 452 : Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
4083 452 : break;
4084 201 : case ISD::USUBO: {
4085 201 : Value = DAG.getNode(ARMISD::SUBC, dl, VTs, LHS, RHS);
4086 : // Convert the carry flag into a boolean value.
4087 201 : Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
4088 : // ARMISD::SUBC returns 0 when we have to borrow, so make it an overflow
4089 : // value. So compute 1 - C.
4090 201 : Overflow = DAG.getNode(ISD::SUB, dl, MVT::i32,
4091 201 : DAG.getConstant(1, dl, MVT::i32), Overflow);
4092 201 : break;
4093 : }
4094 : }
4095 :
4096 653 : return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
4097 : }
4098 :
4099 140 : SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
4100 140 : SDValue Cond = Op.getOperand(0);
4101 140 : SDValue SelectTrue = Op.getOperand(1);
4102 140 : SDValue SelectFalse = Op.getOperand(2);
4103 : SDLoc dl(Op);
4104 : unsigned Opc = Cond.getOpcode();
4105 :
4106 140 : if (Cond.getResNo() == 1 &&
4107 1 : (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
4108 : Opc == ISD::USUBO)) {
4109 0 : if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
4110 0 : return SDValue();
4111 :
4112 : SDValue Value, OverflowCmp;
4113 0 : SDValue ARMcc;
4114 0 : std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
4115 0 : SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4116 0 : EVT VT = Op.getValueType();
4117 :
4118 : return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR,
4119 0 : OverflowCmp, DAG);
4120 : }
4121 :
4122 : // Convert:
4123 : //
4124 : // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
4125 : // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
4126 : //
4127 140 : if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
4128 : const ConstantSDNode *CMOVTrue =
4129 : dyn_cast<ConstantSDNode>(Cond.getOperand(0));
4130 : const ConstantSDNode *CMOVFalse =
4131 : dyn_cast<ConstantSDNode>(Cond.getOperand(1));
4132 :
4133 0 : if (CMOVTrue && CMOVFalse) {
4134 0 : unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
4135 0 : unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
4136 :
4137 0 : SDValue True;
4138 0 : SDValue False;
4139 0 : if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
4140 0 : True = SelectTrue;
4141 0 : False = SelectFalse;
4142 0 : } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
4143 0 : True = SelectFalse;
4144 0 : False = SelectTrue;
4145 : }
4146 :
4147 0 : if (True.getNode() && False.getNode()) {
4148 0 : EVT VT = Op.getValueType();
4149 0 : SDValue ARMcc = Cond.getOperand(2);
4150 0 : SDValue CCR = Cond.getOperand(3);
4151 0 : SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
4152 : assert(True.getValueType() == VT);
4153 0 : return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG);
4154 : }
4155 : }
4156 : }
4157 :
4158 : // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
4159 : // undefined bits before doing a full-word comparison with zero.
4160 140 : Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
4161 140 : DAG.getConstant(1, dl, Cond.getValueType()));
4162 :
4163 : return DAG.getSelectCC(dl, Cond,
4164 : DAG.getConstant(0, dl, Cond.getValueType()),
4165 140 : SelectTrue, SelectFalse, ISD::SETNE);
4166 : }
4167 :
4168 156 : static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
4169 : bool &swpCmpOps, bool &swpVselOps) {
4170 : // Start by selecting the GE condition code for opcodes that return true for
4171 : // 'equality'
4172 156 : if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
4173 125 : CC == ISD::SETULE)
4174 60 : CondCode = ARMCC::GE;
4175 :
4176 : // and GT for opcodes that return false for 'equality'.
4177 96 : else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
4178 64 : CC == ISD::SETULT)
4179 72 : CondCode = ARMCC::GT;
4180 :
4181 : // Since we are constrained to GE/GT, if the opcode contains 'less', we need
4182 : // to swap the compare operands.
4183 156 : if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
4184 127 : CC == ISD::SETULT)
4185 69 : swpCmpOps = true;
4186 :
4187 : // Both GT and GE are ordered comparisons, and return false for 'unordered'.
4188 : // If we have an unordered opcode, we need to swap the operands to the VSEL
4189 : // instruction (effectively negating the condition).
4190 : //
4191 : // This also has the effect of swapping which one of 'less' or 'greater'
4192 : // returns true, so we also swap the compare operands. It also switches
4193 : // whether we return true for 'equality', so we compensate by picking the
4194 : // opposite condition code to our original choice.
4195 156 : if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
4196 : CC == ISD::SETUGT) {
4197 62 : swpCmpOps = !swpCmpOps;
4198 62 : swpVselOps = !swpVselOps;
4199 92 : CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
4200 : }
4201 :
4202 : // 'ordered' is 'anything but unordered', so use the VS condition code and
4203 : // swap the VSEL operands.
4204 156 : if (CC == ISD::SETO) {
4205 2 : CondCode = ARMCC::VS;
4206 2 : swpVselOps = true;
4207 : }
4208 :
4209 : // 'unordered or not equal' is 'anything but equal', so use the EQ condition
4210 : // code and swap the VSEL operands.
4211 156 : if (CC == ISD::SETUNE) {
4212 2 : CondCode = ARMCC::EQ;
4213 2 : swpVselOps = true;
4214 : }
4215 156 : }
4216 :
4217 1854 : SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
4218 : SDValue TrueVal, SDValue ARMcc, SDValue CCR,
4219 : SDValue Cmp, SelectionDAG &DAG) const {
4220 1854 : if (Subtarget->isFPOnlySP() && VT == MVT::f64) {
4221 2 : FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl,
4222 2 : DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
4223 2 : TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl,
4224 2 : DAG.getVTList(MVT::i32, MVT::i32), TrueVal);
4225 :
4226 2 : SDValue TrueLow = TrueVal.getValue(0);
4227 2 : SDValue TrueHigh = TrueVal.getValue(1);
4228 2 : SDValue FalseLow = FalseVal.getValue(0);
4229 2 : SDValue FalseHigh = FalseVal.getValue(1);
4230 :
4231 : SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
4232 2 : ARMcc, CCR, Cmp);
4233 : SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
4234 2 : ARMcc, CCR, duplicateCmp(Cmp, DAG));
4235 :
4236 2 : return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
4237 : } else {
4238 : return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
4239 1852 : Cmp);
4240 : }
4241 : }
4242 :
4243 : static bool isGTorGE(ISD::CondCode CC) {
4244 0 : return CC == ISD::SETGT || CC == ISD::SETGE;
4245 : }
4246 :
4247 : static bool isLTorLE(ISD::CondCode CC) {
4248 0 : return CC == ISD::SETLT || CC == ISD::SETLE;
4249 : }
4250 :
4251 : // See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating.
4252 : // All of these conditions (and their <= and >= counterparts) will do:
4253 : // x < k ? k : x
4254 : // x > k ? x : k
4255 : // k < x ? x : k
4256 : // k > x ? k : x
4257 0 : static bool isLowerSaturate(const SDValue LHS, const SDValue RHS,
4258 : const SDValue TrueVal, const SDValue FalseVal,
4259 : const ISD::CondCode CC, const SDValue K) {
4260 : return (isGTorGE(CC) &&
4261 0 : ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))) ||
4262 : (isLTorLE(CC) &&
4263 0 : ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal)));
4264 : }
4265 :
4266 : // Similar to isLowerSaturate(), but checks for upper-saturating conditions.
4267 0 : static bool isUpperSaturate(const SDValue LHS, const SDValue RHS,
4268 : const SDValue TrueVal, const SDValue FalseVal,
4269 : const ISD::CondCode CC, const SDValue K) {
4270 : return (isGTorGE(CC) &&
4271 0 : ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal))) ||
4272 : (isLTorLE(CC) &&
4273 0 : ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal)));
4274 : }
4275 :
4276 : // Check if two chained conditionals could be converted into SSAT or USAT.
4277 : //
4278 : // SSAT can replace a set of two conditional selectors that bound a number to an
4279 : // interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples:
4280 : //
4281 : // x < -k ? -k : (x > k ? k : x)
4282 : // x < -k ? -k : (x < k ? x : k)
4283 : // x > -k ? (x > k ? k : x) : -k
4284 : // x < k ? (x < -k ? -k : x) : k
4285 : // etc.
4286 : //
4287 : // USAT works similarily to SSAT but bounds on the interval [0, k] where k + 1 is
4288 : // a power of 2.
4289 : //
4290 : // It returns true if the conversion can be done, false otherwise.
4291 : // Additionally, the variable is returned in parameter V, the constant in K and
4292 : // usat is set to true if the conditional represents an unsigned saturation
4293 1287 : static bool isSaturatingConditional(const SDValue &Op, SDValue &V,
4294 : uint64_t &K, bool &usat) {
4295 1287 : SDValue LHS1 = Op.getOperand(0);
4296 1287 : SDValue RHS1 = Op.getOperand(1);
4297 1287 : SDValue TrueVal1 = Op.getOperand(2);
4298 1287 : SDValue FalseVal1 = Op.getOperand(3);
4299 1287 : ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get();
4300 :
4301 1287 : const SDValue Op2 = isa<ConstantSDNode>(TrueVal1) ? FalseVal1 : TrueVal1;
4302 2574 : if (Op2.getOpcode() != ISD::SELECT_CC)
4303 : return false;
4304 :
4305 55 : SDValue LHS2 = Op2.getOperand(0);
4306 55 : SDValue RHS2 = Op2.getOperand(1);
4307 55 : SDValue TrueVal2 = Op2.getOperand(2);
4308 55 : SDValue FalseVal2 = Op2.getOperand(3);
4309 55 : ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get();
4310 :
4311 : // Find out which are the constants and which are the variables
4312 : // in each conditional
4313 : SDValue *K1 = isa<ConstantSDNode>(LHS1) ? &LHS1 : isa<ConstantSDNode>(RHS1)
4314 : ? &RHS1
4315 : : nullptr;
4316 : SDValue *K2 = isa<ConstantSDNode>(LHS2) ? &LHS2 : isa<ConstantSDNode>(RHS2)
4317 : ? &RHS2
4318 : : nullptr;
4319 55 : SDValue K2Tmp = isa<ConstantSDNode>(TrueVal2) ? TrueVal2 : FalseVal2;
4320 55 : SDValue V1Tmp = (K1 && *K1 == LHS1) ? RHS1 : LHS1;
4321 55 : SDValue V2Tmp = (K2 && *K2 == LHS2) ? RHS2 : LHS2;
4322 55 : SDValue V2 = (K2Tmp == TrueVal2) ? FalseVal2 : TrueVal2;
4323 :
4324 : // We must detect cases where the original operations worked with 16- or
4325 : // 8-bit values. In such case, V2Tmp != V2 because the comparison operations
4326 : // must work with sign-extended values but the select operations return
4327 : // the original non-extended value.
4328 : SDValue V2TmpReg = V2Tmp;
4329 55 : if (V2Tmp->getOpcode() == ISD::SIGN_EXTEND_INREG)
4330 9 : V2TmpReg = V2Tmp->getOperand(0);
4331 :
4332 : // Check that the registers and the constants have the correct values
4333 : // in both conditionals
4334 55 : if (!K1 || !K2 || *K1 == Op2 || *K2 != K2Tmp || V1Tmp != V2Tmp ||
4335 : V2TmpReg != V2)
4336 : return false;
4337 :
4338 : // Figure out which conditional is saturating the lower/upper bound.
4339 : const SDValue *LowerCheckOp =
4340 34 : isLowerSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1)
4341 34 : ? &Op
4342 16 : : isLowerSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2)
4343 16 : ? &Op2
4344 : : nullptr;
4345 : const SDValue *UpperCheckOp =
4346 34 : isUpperSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1)
4347 34 : ? &Op
4348 18 : : isUpperSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2)
4349 18 : ? &Op2
4350 : : nullptr;
4351 :
4352 34 : if (!UpperCheckOp || !LowerCheckOp || LowerCheckOp == UpperCheckOp)
4353 : return false;
4354 :
4355 : // Check that the constant in the lower-bound check is
4356 : // the opposite of the constant in the upper-bound check
4357 : // in 1's complement.
4358 28 : int64_t Val1 = cast<ConstantSDNode>(*K1)->getSExtValue();
4359 56 : int64_t Val2 = cast<ConstantSDNode>(*K2)->getSExtValue();
4360 28 : int64_t PosVal = std::max(Val1, Val2);
4361 28 : int64_t NegVal = std::min(Val1, Val2);
4362 :
4363 28 : if (((Val1 > Val2 && UpperCheckOp == &Op) ||
4364 43 : (Val1 < Val2 && UpperCheckOp == &Op2)) &&
4365 28 : isPowerOf2_64(PosVal + 1)) {
4366 :
4367 : // Handle the difference between USAT (unsigned) and SSAT (signed) saturation
4368 28 : if (Val1 == ~Val2)
4369 9 : usat = false;
4370 19 : else if (NegVal == 0)
4371 16 : usat = true;
4372 : else
4373 : return false;
4374 :
4375 25 : V = V2;
4376 25 : K = (uint64_t)PosVal; // At this point, PosVal is guaranteed to be positive
4377 :
4378 25 : return true;
4379 : }
4380 :
4381 : return false;
4382 : }
4383 :
4384 : // Check if a condition of the type x < k ? k : x can be converted into a
4385 : // bit operation instead of conditional moves.
4386 : // Currently this is allowed given:
4387 : // - The conditions and values match up
4388 : // - k is 0 or -1 (all ones)
4389 : // This function will not check the last condition, thats up to the caller
4390 : // It returns true if the transformation can be made, and in such case
4391 : // returns x in V, and k in SatK.
4392 1546 : static bool isLowerSaturatingConditional(const SDValue &Op, SDValue &V,
4393 : SDValue &SatK)
4394 : {
4395 1546 : SDValue LHS = Op.getOperand(0);
4396 1546 : SDValue RHS = Op.getOperand(1);
4397 1546 : ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
4398 1546 : SDValue TrueVal = Op.getOperand(2);
4399 1546 : SDValue FalseVal = Op.getOperand(3);
4400 :
4401 : SDValue *K = isa<ConstantSDNode>(LHS) ? &LHS : isa<ConstantSDNode>(RHS)
4402 : ? &RHS
4403 : : nullptr;
4404 :
4405 : // No constant operation in comparison, early out
4406 : if (!K)
4407 : return false;
4408 :
4409 910 : SDValue KTmp = isa<ConstantSDNode>(TrueVal) ? TrueVal : FalseVal;
4410 910 : V = (KTmp == TrueVal) ? FalseVal : TrueVal;
4411 910 : SDValue VTmp = (K && *K == LHS) ? RHS : LHS;
4412 :
4413 : // If the constant on left and right side, or variable on left and right,
4414 : // does not match, early out
4415 : if (*K != KTmp || V != VTmp)
4416 : return false;
4417 :
4418 76 : if (isLowerSaturate(LHS, RHS, TrueVal, FalseVal, CC, *K)) {
4419 38 : SatK = *K;
4420 38 : return true;
4421 : }
4422 :
4423 : return false;
4424 : }
4425 :
4426 1873 : SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
4427 3746 : EVT VT = Op.getValueType();
4428 : SDLoc dl(Op);
4429 :
4430 : // Try to convert two saturating conditional selects into a single SSAT
4431 1873 : SDValue SatValue;
4432 : uint64_t SatConstant;
4433 : bool SatUSat;
4434 3160 : if (((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2()) &&
4435 1287 : isSaturatingConditional(Op, SatValue, SatConstant, SatUSat)) {
4436 25 : if (SatUSat)
4437 : return DAG.getNode(ARMISD::USAT, dl, VT, SatValue,
4438 32 : DAG.getConstant(countTrailingOnes(SatConstant), dl, VT));
4439 : else
4440 : return DAG.getNode(ARMISD::SSAT, dl, VT, SatValue,
4441 18 : DAG.getConstant(countTrailingOnes(SatConstant), dl, VT));
4442 : }
4443 :
4444 : // Try to convert expressions of the form x < k ? k : x (and similar forms)
4445 : // into more efficient bit operations, which is possible when k is 0 or -1
4446 : // On ARM and Thumb-2 which have flexible operand 2 this will result in
4447 : // single instructions. On Thumb the shift and the bit operation will be two
4448 : // instructions.
4449 : // Only allow this transformation on full-width (32-bit) operations
4450 1848 : SDValue LowerSatConstant;
4451 1546 : if (VT == MVT::i32 &&
4452 1546 : isLowerSaturatingConditional(Op, SatValue, LowerSatConstant)) {
4453 : SDValue ShiftV = DAG.getNode(ISD::SRA, dl, VT, SatValue,
4454 38 : DAG.getConstant(31, dl, VT));
4455 38 : if (isNullConstant(LowerSatConstant)) {
4456 : SDValue NotShiftV = DAG.getNode(ISD::XOR, dl, VT, ShiftV,
4457 17 : DAG.getAllOnesConstant(dl, VT));
4458 17 : return DAG.getNode(ISD::AND, dl, VT, SatValue, NotShiftV);
4459 21 : } else if (isAllOnesConstant(LowerSatConstant))
4460 6 : return DAG.getNode(ISD::OR, dl, VT, SatValue, ShiftV);
4461 : }
4462 :
4463 1825 : SDValue LHS = Op.getOperand(0);
4464 1825 : SDValue RHS = Op.getOperand(1);
4465 1825 : ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
4466 1825 : SDValue TrueVal = Op.getOperand(2);
4467 1825 : SDValue FalseVal = Op.getOperand(3);
4468 :
4469 1825 : if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
4470 56 : DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
4471 : dl);
4472 :
4473 : // If softenSetCCOperands only returned one value, we should compare it to
4474 : // zero.
4475 28 : if (!RHS.getNode()) {
4476 4 : RHS = DAG.getConstant(0, dl, LHS.getValueType());
4477 2 : CC = ISD::SETNE;
4478 : }
4479 : }
4480 :
4481 1825 : if (LHS.getValueType() == MVT::i32) {
4482 : // Try to generate VSEL on ARMv8.
4483 : // The VSEL instruction can't use all the usual ARM condition
4484 : // codes: it only has two bits to select the condition code, so it's
4485 : // constrained to use only GE, GT, VS and EQ.
4486 : //
4487 : // To implement all the various ISD::SETXXX opcodes, we sometimes need to
4488 : // swap the operands of the previous compare instruction (effectively
4489 : // inverting the compare condition, swapping 'less' and 'greater') and
4490 : // sometimes need to swap the operands to the VSEL (which inverts the
4491 : // condition in the sense of firing whenever the previous condition didn't)
4492 1306 : if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
4493 : TrueVal.getValueType() == MVT::f64)) {
4494 16 : ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
4495 16 : if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
4496 12 : CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
4497 6 : CC = ISD::getSetCCInverse(CC, true);
4498 : std::swap(TrueVal, FalseVal);
4499 : }
4500 : }
4501 :
4502 1306 : SDValue ARMcc;
4503 1306 : SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4504 1306 : SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
4505 1306 : return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
4506 : }
4507 :
4508 : ARMCC::CondCodes CondCode, CondCode2;
4509 : bool InvalidOnQNaN;
4510 519 : FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN);
4511 :
4512 : // Normalize the fp compare. If RHS is zero we keep it there so we match
4513 : // CMPFPw0 instead of CMPFP.
4514 519 : if (Subtarget->hasFPARMv8() && !isFloatingPointZero(RHS) &&
4515 : (TrueVal.getValueType() == MVT::f16 ||
4516 : TrueVal.getValueType() == MVT::f32 ||
4517 : TrueVal.getValueType() == MVT::f64)) {
4518 156 : bool swpCmpOps = false;
4519 156 : bool swpVselOps = false;
4520 156 : checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
4521 :
4522 156 : if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
4523 20 : CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
4524 156 : if (swpCmpOps)
4525 : std::swap(LHS, RHS);
4526 156 : if (swpVselOps)
4527 : std::swap(TrueVal, FalseVal);
4528 : }
4529 : }
4530 :
4531 519 : SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4532 519 : SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
4533 519 : SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4534 519 : SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
4535 519 : if (CondCode2 != ARMCC::AL) {
4536 29 : SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
4537 : // FIXME: Needs another CMP because flag can have but one use.
4538 29 : SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
4539 29 : Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
4540 : }
4541 519 : return Result;
4542 : }
4543 :
4544 : /// canChangeToInt - Given the fp compare operand, return true if it is suitable
4545 : /// to morph to an integer compare sequence.
4546 0 : static bool canChangeToInt(SDValue Op, bool &SeenZero,
4547 : const ARMSubtarget *Subtarget) {
4548 : SDNode *N = Op.getNode();
4549 : if (!N->hasOneUse())
4550 : // Otherwise it requires moving the value from fp to integer registers.
4551 0 : return false;
4552 0 : if (!N->getNumValues())
4553 0 : return false;
4554 0 : EVT VT = Op.getValueType();
4555 0 : if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
4556 : // f32 case is generally profitable. f64 case only makes sense when vcmpe +
4557 : // vmrs are very slow, e.g. cortex-a8.
4558 0 : return false;
4559 :
4560 0 : if (isFloatingPointZero(Op)) {
4561 0 : SeenZero = true;
4562 0 : return true;
4563 : }
4564 : return ISD::isNormalLoad(N);
4565 : }
4566 :
4567 0 : static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
4568 0 : if (isFloatingPointZero(Op))
4569 0 : return DAG.getConstant(0, SDLoc(Op), MVT::i32);
4570 :
4571 : if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
4572 0 : return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(),
4573 0 : Ld->getPointerInfo(), Ld->getAlignment(),
4574 0 : Ld->getMemOperand()->getFlags());
4575 :
4576 0 : llvm_unreachable("Unknown VFP cmp argument!");
4577 : }
4578 :
4579 0 : static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
4580 : SDValue &RetVal1, SDValue &RetVal2) {
4581 0 : SDLoc dl(Op);
4582 :
4583 0 : if (isFloatingPointZero(Op)) {
4584 0 : RetVal1 = DAG.getConstant(0, dl, MVT::i32);
4585 0 : RetVal2 = DAG.getConstant(0, dl, MVT::i32);
4586 0 : return;
4587 : }
4588 :
4589 : if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
4590 0 : SDValue Ptr = Ld->getBasePtr();
4591 0 : RetVal1 =
4592 0 : DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
4593 0 : Ld->getAlignment(), Ld->getMemOperand()->getFlags());
4594 :
4595 0 : EVT PtrType = Ptr.getValueType();
4596 0 : unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
4597 : SDValue NewPtr = DAG.getNode(ISD::ADD, dl,
4598 0 : PtrType, Ptr, DAG.getConstant(4, dl, PtrType));
4599 0 : RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr,
4600 : Ld->getPointerInfo().getWithOffset(4), NewAlign,
4601 0 : Ld->getMemOperand()->getFlags());
4602 0 : return;
4603 : }
4604 :
4605 0 : llvm_unreachable("Unknown VFP cmp argument!");
4606 : }
4607 :
4608 : /// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
4609 : /// f32 and even f64 comparisons to integer ones.
4610 : SDValue
4611 3 : ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
4612 3 : SDValue Chain = Op.getOperand(0);
4613 3 : ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
4614 3 : SDValue LHS = Op.getOperand(2);
4615 3 : SDValue RHS = Op.getOperand(3);
4616 3 : SDValue Dest = Op.getOperand(4);
4617 : SDLoc dl(Op);
4618 :
4619 3 : bool LHSSeenZero = false;
4620 3 : bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
4621 3 : bool RHSSeenZero = false;
4622 3 : bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
4623 3 : if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
4624 : // If unsafe fp math optimization is enabled and there are no other uses of
4625 : // the CMP operands, and the condition code is EQ or NE, we can optimize it
4626 : // to an integer comparison.
4627 2 : if (CC == ISD::SETOEQ)
4628 : CC = ISD::SETEQ;
4629 2 : else if (CC == ISD::SETUNE)
4630 : CC = ISD::SETNE;
4631 :
4632 2 : SDValue Mask = DAG.getConstant(0x7fffffff, dl, MVT::i32);
4633 2 : SDValue ARMcc;
4634 : if (LHS.getValueType() == MVT::f32) {
4635 1 : LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
4636 1 : bitcastf32Toi32(LHS, DAG), Mask);
4637 1 : RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
4638 1 : bitcastf32Toi32(RHS, DAG), Mask);
4639 1 : SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
4640 1 : SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4641 : return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
4642 1 : Chain, Dest, ARMcc, CCR, Cmp);
4643 : }
4644 :
4645 1 : SDValue LHS1, LHS2;
4646 1 : SDValue RHS1, RHS2;
4647 1 : expandf64Toi32(LHS, DAG, LHS1, LHS2);
4648 1 : expandf64Toi32(RHS, DAG, RHS1, RHS2);
4649 1 : LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
4650 1 : RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
4651 1 : ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
4652 1 : ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4653 1 : SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
4654 1 : SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
4655 1 : return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops);
4656 : }
4657 :
4658 1 : return SDValue();
4659 : }
4660 :
4661 281 : SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
4662 281 : SDValue Chain = Op.getOperand(0);
4663 281 : SDValue Cond = Op.getOperand(1);
4664 281 : SDValue Dest = Op.getOperand(2);
4665 : SDLoc dl(Op);
4666 :
4667 : // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
4668 : // instruction.
4669 : unsigned Opc = Cond.getOpcode();
4670 281 : bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&
4671 0 : !Subtarget->isThumb1Only();
4672 281 : if (Cond.getResNo() == 1 &&
4673 6 : (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
4674 0 : Opc == ISD::USUBO || OptimizeMul)) {
4675 : // Only lower legal XALUO ops.
4676 6 : if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
4677 0 : return SDValue();
4678 :
4679 : // The actual operation with overflow check.
4680 : SDValue Value, OverflowCmp;
4681 6 : SDValue ARMcc;
4682 6 : std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
4683 :
4684 : // Reverse the condition code.
4685 : ARMCC::CondCodes CondCode =
4686 12 : (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
4687 : CondCode = ARMCC::getOppositeCondition(CondCode);
4688 6 : ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
4689 6 : SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4690 :
4691 : return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
4692 6 : OverflowCmp);
4693 : }
4694 :
4695 275 : return SDValue();
4696 : }
4697 :
4698 1955 : SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
4699 1955 : SDValue Chain = Op.getOperand(0);
4700 1955 : ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
4701 1955 : SDValue LHS = Op.getOperand(2);
4702 1955 : SDValue RHS = Op.getOperand(3);
4703 1955 : SDValue Dest = Op.getOperand(4);
4704 : SDLoc dl(Op);
4705 :
4706 1955 : if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
4707 0 : DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
4708 : dl);
4709 :
4710 : // If softenSetCCOperands only returned one value, we should compare it to
4711 : // zero.
4712 0 : if (!RHS.getNode()) {
4713 0 : RHS = DAG.getConstant(0, dl, LHS.getValueType());
4714 0 : CC = ISD::SETNE;
4715 : }
4716 : }
4717 :
4718 : // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
4719 : // instruction.
4720 1955 : unsigned Opc = LHS.getOpcode();
4721 1955 : bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&
4722 4 : !Subtarget->isThumb1Only();
4723 14 : if (LHS.getResNo() == 1 && (isOneConstant(RHS) || isNullConstant(RHS)) &&
4724 14 : (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
4725 1959 : Opc == ISD::USUBO || OptimizeMul) &&
4726 12 : (CC == ISD::SETEQ || CC == ISD::SETNE)) {
4727 : // Only lower legal XALUO ops.
4728 12 : if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
4729 0 : return SDValue();
4730 :
4731 : // The actual operation with overflow check.
4732 : SDValue Value, OverflowCmp;
4733 12 : SDValue ARMcc;
4734 12 : std::tie(Value, OverflowCmp) = getARMXALUOOp(LHS.getValue(0), DAG, ARMcc);
4735 :
4736 12 : if ((CC == ISD::SETNE) != isOneConstant(RHS)) {
4737 : // Reverse the condition code.
4738 : ARMCC::CondCodes CondCode =
4739 0 : (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
4740 : CondCode = ARMCC::getOppositeCondition(CondCode);
4741 0 : ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
4742 : }
4743 12 : SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4744 :
4745 : return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
4746 12 : OverflowCmp);
4747 : }
4748 :
4749 1943 : if (LHS.getValueType() == MVT::i32) {
4750 1890 : SDValue ARMcc;
4751 1890 : SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
4752 1890 : SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4753 : return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
4754 1890 : Chain, Dest, ARMcc, CCR, Cmp);
4755 : }
4756 :
4757 109 : if (getTargetMachine().Options.UnsafeFPMath &&
4758 3 : (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
4759 55 : CC == ISD::SETNE || CC == ISD::SETUNE)) {
4760 3 : if (SDValue Result = OptimizeVFPBrcond(Op, DAG))
4761 2 : return Result;
4762 : }
4763 :
4764 : ARMCC::CondCodes CondCode, CondCode2;
4765 : bool InvalidOnQNaN;
4766 51 : FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN);
4767 :
4768 51 : SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4769 51 : SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
4770 51 : SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4771 51 : SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
4772 51 : SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
4773 51 : SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
4774 51 : if (CondCode2 != ARMCC::AL) {
4775 2 : ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
4776 2 : SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
4777 2 : Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
4778 : }
4779 51 : return Res;
4780 : }
4781 :
4782 38 : SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
4783 38 : SDValue Chain = Op.getOperand(0);
4784 38 : SDValue Table = Op.getOperand(1);
4785 38 : SDValue Index = Op.getOperand(2);
4786 : SDLoc dl(Op);
4787 :
4788 38 : EVT PTy = getPointerTy(DAG.getDataLayout());
4789 : JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
4790 38 : SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
4791 38 : Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);
4792 38 : Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy));
4793 38 : SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Index);
4794 53 : if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) {
4795 : // Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table
4796 : // which does another jump to the destination. This also makes it easier
4797 : // to translate it to TBB / TBH later (Thumb2 only).
4798 : // FIXME: This might not work if the function is extremely large.
4799 : return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
4800 19 : Addr, Op.getOperand(2), JTI);
4801 : }
4802 19 : if (isPositionIndependent() || Subtarget->isROPI()) {
4803 11 : Addr =
4804 11 : DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
4805 11 : MachinePointerInfo::getJumpTable(DAG.getMachineFunction()));
4806 11 : Chain = Addr.getValue(1);
4807 11 : Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Addr);
4808 11 : return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
4809 : } else {
4810 8 : Addr =
4811 8 : DAG.getLoad(PTy, dl, Chain, Addr,
4812 8 : MachinePointerInfo::getJumpTable(DAG.getMachineFunction()));
4813 8 : Chain = Addr.getValue(1);
4814 8 : return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
4815 : }
4816 : }
4817 :
4818 91 : static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
4819 91 : EVT VT = Op.getValueType();
4820 : SDLoc dl(Op);
4821 :
4822 169 : if (Op.getValueType().getVectorElementType() == MVT::i32) {
4823 156 : if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
4824 72 : return Op;
4825 6 : return DAG.UnrollVectorOp(Op.getNode());
4826 : }
4827 :
4828 : const bool HasFullFP16 =
4829 13 : static_cast<const ARMSubtarget&>(DAG.getSubtarget()).hasFullFP16();
4830 :
4831 13 : EVT NewTy;
4832 13 : const EVT OpTy = Op.getOperand(0).getValueType();
4833 : if (OpTy == MVT::v4f32)
4834 1 : NewTy = MVT::v4i32;
4835 6 : else if (OpTy == MVT::v4f16 && HasFullFP16)
4836 6 : NewTy = MVT::v4i16;
4837 6 : else if (OpTy == MVT::v8f16 && HasFullFP16)
4838 6 : NewTy = MVT::v8i16;
4839 : else
4840 0 : llvm_unreachable("Invalid type for custom lowering!");
4841 :
4842 : if (VT != MVT::v4i16 && VT != MVT::v8i16)
4843 0 : return DAG.UnrollVectorOp(Op.getNode());
4844 :
4845 13 : Op = DAG.getNode(Op.getOpcode(), dl, NewTy, Op.getOperand(0));
4846 13 : return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
4847 : }
4848 :
4849 117 : SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
4850 117 : EVT VT = Op.getValueType();
4851 117 : if (VT.isVector())
4852 91 : return LowerVectorFP_TO_INT(Op, DAG);
4853 26 : if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) {
4854 : RTLIB::Libcall LC;
4855 10 : if (Op.getOpcode() == ISD::FP_TO_SINT)
4856 4 : LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(),
4857 : Op.getValueType());
4858 : else
4859 6 : LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(),
4860 : Op.getValueType());
4861 10 : return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
4862 10 : /*isSigned*/ false, SDLoc(Op)).first;
4863 : }
4864 :
4865 16 : return Op;
4866 : }
4867 :
4868 160 : static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
4869 160 : EVT VT = Op.getValueType();
4870 : SDLoc dl(Op);
4871 :
4872 320 : if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
4873 141 : if (VT.getVectorElementType() == MVT::f32)
4874 129 : return Op;
4875 12 : return DAG.UnrollVectorOp(Op.getNode());
4876 : }
4877 :
4878 : assert((Op.getOperand(0).getValueType() == MVT::v4i16 ||
4879 : Op.getOperand(0).getValueType() == MVT::v8i16) &&
4880 : "Invalid type for custom lowering!");
4881 :
4882 : const bool HasFullFP16 =
4883 19 : static_cast<const ARMSubtarget&>(DAG.getSubtarget()).hasFullFP16();
4884 :
4885 19 : EVT DestVecType;
4886 : if (VT == MVT::v4f32)
4887 7 : DestVecType = MVT::v4i32;
4888 6 : else if (VT == MVT::v4f16 && HasFullFP16)
4889 6 : DestVecType = MVT::v4i16;
4890 6 : else if (VT == MVT::v8f16 && HasFullFP16)
4891 6 : DestVecType = MVT::v8i16;
4892 : else
4893 0 : return DAG.UnrollVectorOp(Op.getNode());
4894 :
4895 : unsigned CastOpc;
4896 : unsigned Opc;
4897 19 : switch (Op.getOpcode()) {
4898 0 : default: llvm_unreachable("Invalid opcode!");
4899 : case ISD::SINT_TO_FP:
4900 : CastOpc = ISD::SIGN_EXTEND;
4901 : Opc = ISD::SINT_TO_FP;
4902 : break;
4903 10 : case ISD::UINT_TO_FP:
4904 : CastOpc = ISD::ZERO_EXTEND;
4905 : Opc = ISD::UINT_TO_FP;
4906 10 : break;
4907 : }
4908 :
4909 19 : Op = DAG.getNode(CastOpc, dl, DestVecType, Op.getOperand(0));
4910 19 : return DAG.getNode(Opc, dl, VT, Op);
4911 : }
4912 :
4913 186 : SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
4914 186 : EVT VT = Op.getValueType();
4915 186 : if (VT.isVector())
4916 160 : return LowerVectorINT_TO_FP(Op, DAG);
4917 26 : if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) {
4918 : RTLIB::Libcall LC;
4919 10 : if (Op.getOpcode() == ISD::SINT_TO_FP)
4920 12 : LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),
4921 : Op.getValueType());
4922 : else
4923 8 : LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(),
4924 : Op.getValueType());
4925 10 : return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
4926 10 : /*isSigned*/ false, SDLoc(Op)).first;
4927 : }
4928 :
4929 16 : return Op;
4930 : }
4931 :
4932 23 : SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
4933 : // Implement fcopysign with a fabs and a conditional fneg.
4934 23 : SDValue Tmp0 = Op.getOperand(0);
4935 23 : SDValue Tmp1 = Op.getOperand(1);
4936 : SDLoc dl(Op);
4937 : EVT VT = Op.getValueType();
4938 : EVT SrcVT = Tmp1.getValueType();
4939 23 : bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
4940 : Tmp0.getOpcode() == ARMISD::VMOVDRR;
4941 21 : bool UseNEON = !InGPR && Subtarget->hasNEON();
4942 :
4943 : if (UseNEON) {
4944 : // Use VBSL to copy the sign bit.
4945 : unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80);
4946 : SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
4947 24 : DAG.getTargetConstant(EncodedVal, dl, MVT::i32));
4948 : EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
4949 : if (VT == MVT::f64)
4950 5 : Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,
4951 : DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
4952 5 : DAG.getConstant(32, dl, MVT::i32));
4953 : else /*if (VT == MVT::f32)*/
4954 7 : Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
4955 : if (SrcVT == MVT::f32) {
4956 5 : Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
4957 : if (VT == MVT::f64)
4958 0 : Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
4959 : DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
4960 0 : DAG.getConstant(32, dl, MVT::i32));
4961 : } else if (VT == MVT::f32)
4962 2 : Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64,
4963 : DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
4964 2 : DAG.getConstant(32, dl, MVT::i32));
4965 12 : Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
4966 12 : Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
4967 :
4968 : SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff),
4969 12 : dl, MVT::i32);
4970 12 : AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
4971 : SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
4972 12 : DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
4973 :
4974 : SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
4975 : DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
4976 12 : DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
4977 : if (VT == MVT::f32) {
4978 7 : Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
4979 7 : Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
4980 7 : DAG.getConstant(0, dl, MVT::i32));
4981 : } else {
4982 5 : Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
4983 : }
4984 :
4985 12 : return Res;
4986 : }
4987 :
4988 : // Bitcast operand 1 to i32.
4989 : if (SrcVT == MVT::f64)
4990 6 : Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
4991 6 : Tmp1).getValue(1);
4992 11 : Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
4993 :
4994 : // Or in the signbit with integer operations.
4995 11 : SDValue Mask1 = DAG.getConstant(0x80000000, dl, MVT::i32);
4996 11 : SDValue Mask2 = DAG.getConstant(0x7fffffff, dl, MVT::i32);
4997 11 : Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
4998 : if (VT == MVT::f32) {
4999 5 : Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
5000 5 : DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
5001 : return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
5002 5 : DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
5003 : }
5004 :
5005 : // f64: Or the high part with signbit and then combine two parts.
5006 6 : Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
5007 6 : Tmp0);
5008 6 : SDValue Lo = Tmp0.getValue(0);
5009 6 : SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
5010 6 : Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
5011 6 : return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
5012 : }
5013 :
5014 12 : SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
5015 12 : MachineFunction &MF = DAG.getMachineFunction();
5016 12 : MachineFrameInfo &MFI = MF.getFrameInfo();
5017 : MFI.setReturnAddressIsTaken(true);
5018 :
5019 12 : if (verifyReturnAddressArgumentIsConstant(Op, DAG))
5020 0 : return SDValue();
5021 :
5022 12 : EVT VT = Op.getValueType();
5023 : SDLoc dl(Op);
5024 12 : unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
5025 12 : if (Depth) {
5026 4 : SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
5027 4 : SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
5028 : return DAG.getLoad(VT, dl, DAG.getEntryNode(),
5029 : DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
5030 4 : MachinePointerInfo());
5031 : }
5032 :
5033 : // Return LR, which contains the return address. Mark it an implicit live-in.
5034 16 : unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
5035 8 : return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
5036 : }
5037 :
5038 48 : SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
5039 48 : const ARMBaseRegisterInfo &ARI =
5040 : *static_cast<const ARMBaseRegisterInfo*>(RegInfo);
5041 48 : MachineFunction &MF = DAG.getMachineFunction();
5042 48 : MachineFrameInfo &MFI = MF.getFrameInfo();
5043 : MFI.setFrameAddressIsTaken(true);
5044 :
5045 48 : EVT VT = Op.getValueType();
5046 : SDLoc dl(Op); // FIXME probably not meaningful
5047 48 : unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
5048 48 : unsigned FrameReg = ARI.getFrameRegister(MF);
5049 48 : SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
5050 60 : while (Depth--)
5051 12 : FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
5052 12 : MachinePointerInfo());
5053 48 : return FrameAddr;
5054 : }
5055 :
5056 : // FIXME? Maybe this could be a TableGen attribute on some registers and
5057 : // this table could be generated automatically from RegInfo.
5058 13 : unsigned ARMTargetLowering::getRegisterByName(const char* RegName, EVT VT,
5059 : SelectionDAG &DAG) const {
5060 9 : unsigned Reg = StringSwitch<unsigned>(RegName)
5061 13 : .Case("sp", ARM::SP)
5062 : .Default(0);
5063 4 : if (Reg)
5064 4 : return Reg;
5065 9 : report_fatal_error(Twine("Invalid register name \""
5066 : + StringRef(RegName) + "\"."));
5067 : }
5068 :
5069 : // Result is 64 bit value so split into two 32 bit values and return as a
5070 : // pair of values.
5071 2 : static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl<SDValue> &Results,
5072 : SelectionDAG &DAG) {
5073 : SDLoc DL(N);
5074 :
5075 : // This function is only supposed to be called for i64 type destination.
5076 : assert(N->getValueType(0) == MVT::i64
5077 : && "ExpandREAD_REGISTER called for non-i64 type result.");
5078 :
5079 : SDValue Read = DAG.getNode(ISD::READ_REGISTER, DL,
5080 : DAG.getVTList(MVT::i32, MVT::i32, MVT::Other),
5081 : N->getOperand(0),
5082 4 : N->getOperand(1));
5083 :
5084 2 : Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Read.getValue(0),
5085 4 : Read.getValue(1)));
5086 2 : Results.push_back(Read.getOperand(0));
5087 2 : }
5088 :
5089 : /// \p BC is a bitcast that is about to be turned into a VMOVDRR.
5090 : /// When \p DstVT, the destination type of \p BC, is on the vector
5091 : /// register bank and the source of bitcast, \p Op, operates on the same bank,
5092 : /// it might be possible to combine them, such that everything stays on the
5093 : /// vector register bank.
5094 : /// \p return The node that would replace \p BT, if the combine
5095 : /// is possible.
5096 64 : static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC,
5097 : SelectionDAG &DAG) {
5098 64 : SDValue Op = BC->getOperand(0);
5099 128 : EVT DstVT = BC->getValueType(0);
5100 :
5101 : // The only vector instruction that can produce a scalar (remember,
5102 : // since the bitcast was about to be turned into VMOVDRR, the source
5103 : // type is i64) from a vector is EXTRACT_VECTOR_ELT.
5104 : // Moreover, we can do this combine only if there is one use.
5105 : // Finally, if the destination type is not a vector, there is not
5106 : // much point on forcing everything on the vector bank.
5107 70 : if (!DstVT.isVector() || Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
5108 : !Op.hasOneUse())
5109 59 : return SDValue();
5110 :
5111 : // If the index is not constant, we will introduce an additional
5112 : // multiply that will stick.
5113 : // Give up in that case.
5114 : ConstantSDNode *Index = dyn_cast<ConstantSDNode>(Op.getOperand(1));
5115 : if (!Index)
5116 1 : return SDValue();
5117 : unsigned DstNumElt = DstVT.getVectorNumElements();
5118 :
5119 : // Compute the new index.
5120 4 : const APInt &APIntIndex = Index->getAPIntValue();
5121 4 : APInt NewIndex(APIntIndex.getBitWidth(), DstNumElt);
5122 4 : NewIndex *= APIntIndex;
5123 : // Check if the new constant index fits into i32.
5124 4 : if (NewIndex.getBitWidth() > 32)
5125 0 : return SDValue();
5126 :
5127 : // vMTy bitcast(i64 extractelt vNi64 src, i32 index) ->
5128 : // vMTy extractsubvector vNxMTy (bitcast vNi64 src), i32 index*M)
5129 : SDLoc dl(Op);
5130 4 : SDValue ExtractSrc = Op.getOperand(0);
5131 : EVT VecVT = EVT::getVectorVT(
5132 4 : *DAG.getContext(), DstVT.getScalarType(),
5133 8 : ExtractSrc.getValueType().getVectorNumElements() * DstNumElt);
5134 4 : SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtractSrc);
5135 : return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, BitCast,
5136 4 : DAG.getConstant(NewIndex.getZExtValue(), dl, MVT::i32));
5137 : }
5138 :
5139 : /// ExpandBITCAST - If the target supports VFP, this function is called to
5140 : /// expand a bit convert where either the source or destination type is i64 to
5141 : /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
5142 : /// operand type is illegal (e.g., v2f32 for a target that doesn't support
5143 : /// vectors), since the legalizer won't know what to do with that.
5144 921 : static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG,
5145 : const ARMSubtarget *Subtarget) {
5146 921 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5147 : SDLoc dl(N);
5148 921 : SDValue Op = N->getOperand(0);
5149 :
5150 : // This function is only supposed to be called for i64 types, either as the
5151 : // source or destination of the bit convert.
5152 921 : EVT SrcVT = Op.getValueType();
5153 921 : EVT DstVT = N->getValueType(0);
5154 921 : const bool HasFullFP16 = Subtarget->hasFullFP16();
5155 :
5156 : if (SrcVT == MVT::f32 && DstVT == MVT::i32) {
5157 : // FullFP16: half values are passed in S-registers, and we don't
5158 : // need any of the bitcast and moves:
5159 : //
5160 : // t2: f32,ch = CopyFromReg t0, Register:f32 %0
5161 : // t5: i32 = bitcast t2
5162 : // t18: f16 = ARMISD::VMOVhr t5
5163 178 : if (Op.getOpcode() != ISD::CopyFromReg ||
5164 : Op.getValueType() != MVT::f32)
5165 4 : return SDValue();
5166 :
5167 174 : auto Move = N->use_begin();
5168 174 : if (Move->getOpcode() != ARMISD::VMOVhr)
5169 0 : return SDValue();
5170 :
5171 174 : SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1) };
5172 348 : SDValue Copy = DAG.getNode(ISD::CopyFromReg, SDLoc(Op), MVT::f16, Ops);
5173 174 : DAG.ReplaceAllUsesWith(*Move, &Copy);
5174 174 : return Copy;
5175 : }
5176 :
5177 : if (SrcVT == MVT::i16 && DstVT == MVT::f16) {
5178 546 : if (!HasFullFP16)
5179 0 : return SDValue();
5180 : // SoftFP: read half-precision arguments:
5181 : //
5182 : // t2: i32,ch = ...
5183 : // t7: i16 = truncate t2 <~~~~ Op
5184 : // t8: f16 = bitcast t7 <~~~~ N
5185 : //
5186 546 : if (Op.getOperand(0).getValueType() == MVT::i32)
5187 546 : return DAG.getNode(ARMISD::VMOVhr, SDLoc(Op),
5188 1092 : MVT::f16, Op.getOperand(0));
5189 :
5190 0 : return SDValue();
5191 : }
5192 :
5193 : // Half-precision return values
5194 : if (SrcVT == MVT::f16 && DstVT == MVT::i16) {
5195 100 : if (!HasFullFP16)
5196 0 : return SDValue();
5197 : //
5198 : // t11: f16 = fadd t8, t10
5199 : // t12: i16 = bitcast t11 <~~~ SDNode N
5200 : // t13: i32 = zero_extend t12
5201 : // t16: ch,glue = CopyToReg t0, Register:i32 %r0, t13
5202 : // t17: ch = ARMISD::RET_FLAG t16, Register:i32 %r0, t16:1
5203 : //
5204 : // transform this into:
5205 : //
5206 : // t20: i32 = ARMISD::VMOVrh t11
5207 : // t16: ch,glue = CopyToReg t0, Register:i32 %r0, t20
5208 : //
5209 100 : auto ZeroExtend = N->use_begin();
5210 100 : if (N->use_size() != 1 || ZeroExtend->getOpcode() != ISD::ZERO_EXTEND ||
5211 100 : ZeroExtend->getValueType(0) != MVT::i32)
5212 0 : return SDValue();
5213 :
5214 100 : auto Copy = ZeroExtend->use_begin();
5215 100 : if (Copy->getOpcode() == ISD::CopyToReg &&
5216 200 : Copy->use_begin()->getOpcode() == ARMISD::RET_FLAG) {
5217 200 : SDValue Cvt = DAG.getNode(ARMISD::VMOVrh, SDLoc(Op), MVT::i32, Op);
5218 100 : DAG.ReplaceAllUsesWith(*ZeroExtend, &Cvt);
5219 100 : return Cvt;
5220 : }
5221 0 : return SDValue();
5222 : }
5223 :
5224 : if (!(SrcVT == MVT::i64 || DstVT == MVT::i64))
5225 0 : return SDValue();
5226 :
5227 : // Turn i64->f64 into VMOVDRR.
5228 : if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
5229 : // Do not force values to GPRs (this is what VMOVDRR does for the inputs)
5230 : // if we can combine the bitcast with its source.
5231 64 : if (SDValue Val = CombineVMOVDRRCandidateWithVecOp(N, DAG))
5232 4 : return Val;
5233 :
5234 : SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
5235 60 : DAG.getConstant(0, dl, MVT::i32));
5236 : SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
5237 60 : DAG.getConstant(1, dl, MVT::i32));
5238 : return DAG.getNode(ISD::BITCAST, dl, DstVT,
5239 60 : DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
5240 : }
5241 :
5242 : // Turn f64->i64 into VMOVRRD.
5243 : if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
5244 33 : SDValue Cvt;
5245 51 : if (DAG.getDataLayout().isBigEndian() && SrcVT.isVector() &&
5246 : SrcVT.getVectorNumElements() > 1)
5247 16 : Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
5248 : DAG.getVTList(MVT::i32, MVT::i32),
5249 16 : DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op));
5250 : else
5251 17 : Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
5252 17 : DAG.getVTList(MVT::i32, MVT::i32), Op);
5253 : // Merge the pieces into a single i64 value.
5254 33 : return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
5255 : }
5256 :
5257 0 : return SDValue();
5258 : }
5259 :
5260 : /// getZeroVector - Returns a vector of specified type with all zero elements.
5261 : /// Zero vectors are used to represent vector negation and in those cases
5262 : /// will be implemented with the NEON VNEG instruction. However, VNEG does
5263 : /// not support i64 elements, so sometimes the zero vectors will need to be
5264 : /// explicitly constructed. Regardless, use a canonical VMOV to create the
5265 : /// zero vector.
5266 71 : static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
5267 : assert(VT.isVector() && "Expected a vector type");
5268 : // The canonical modified immediate encoding of a zero vector is....0!
5269 71 : SDValue EncodedVal = DAG.getTargetConstant(0, dl, MVT::i32);
5270 71 : EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
5271 71 : SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
5272 71 : return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
5273 : }
5274 :
5275 : /// LowerShiftRightParts - Lower SRA_PARTS, which returns two
5276 : /// i32 values and take a 2 x i32 value to shift plus a shift amount.
5277 6 : SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
5278 : SelectionDAG &DAG) const {
5279 : assert(Op.getNumOperands() == 3 && "Not a double-shift!");
5280 6 : EVT VT = Op.getValueType();
5281 6 : unsigned VTBits = VT.getSizeInBits();
5282 : SDLoc dl(Op);
5283 6 : SDValue ShOpLo = Op.getOperand(0);
5284 6 : SDValue ShOpHi = Op.getOperand(1);
5285 6 : SDValue ShAmt = Op.getOperand(2);
5286 6 : SDValue ARMcc;
5287 6 : SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5288 6 : unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
5289 :
5290 : assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
5291 :
5292 : SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
5293 6 : DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
5294 6 : SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
5295 : SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
5296 6 : DAG.getConstant(VTBits, dl, MVT::i32));
5297 6 : SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
5298 6 : SDValue LoSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
5299 6 : SDValue LoBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
5300 : SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
5301 6 : ISD::SETGE, ARMcc, DAG, dl);
5302 : SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift,
5303 6 : ARMcc, CCR, CmpLo);
5304 :
5305 6 : SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
5306 : SDValue HiBigShift = Opc == ISD::SRA
5307 : ? DAG.getNode(Opc, dl, VT, ShOpHi,
5308 6 : DAG.getConstant(VTBits - 1, dl, VT))
5309 6 : : DAG.getConstant(0, dl, VT);
5310 : SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
5311 6 : ISD::SETGE, ARMcc, DAG, dl);
5312 : SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
5313 6 : ARMcc, CCR, CmpHi);
5314 :
5315 6 : SDValue Ops[2] = { Lo, Hi };
5316 6 : return DAG.getMergeValues(Ops, dl);
5317 : }
5318 :
5319 : /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
5320 : /// i32 values and take a 2 x i32 value to shift plus a shift amount.
5321 3 : SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
5322 : SelectionDAG &DAG) const {
5323 : assert(Op.getNumOperands() == 3 && "Not a double-shift!");
5324 3 : EVT VT = Op.getValueType();
5325 3 : unsigned VTBits = VT.getSizeInBits();
5326 : SDLoc dl(Op);
5327 3 : SDValue ShOpLo = Op.getOperand(0);
5328 3 : SDValue ShOpHi = Op.getOperand(1);
5329 3 : SDValue ShAmt = Op.getOperand(2);
5330 3 : SDValue ARMcc;
5331 3 : SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5332 :
5333 : assert(Op.getOpcode() == ISD::SHL_PARTS);
5334 : SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
5335 3 : DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
5336 3 : SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
5337 3 : SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
5338 3 : SDValue HiSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
5339 :
5340 : SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
5341 3 : DAG.getConstant(VTBits, dl, MVT::i32));
5342 3 : SDValue HiBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
5343 : SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
5344 3 : ISD::SETGE, ARMcc, DAG, dl);
5345 : SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
5346 3 : ARMcc, CCR, CmpHi);
5347 :
5348 : SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
5349 3 : ISD::SETGE, ARMcc, DAG, dl);
5350 3 : SDValue LoSmallShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
5351 : SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift,
5352 3 : DAG.getConstant(0, dl, VT), ARMcc, CCR, CmpLo);
5353 :
5354 3 : SDValue Ops[2] = { Lo, Hi };
5355 3 : return DAG.getMergeValues(Ops, dl);
5356 : }
5357 :
5358 3 : SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
5359 : SelectionDAG &DAG) const {
5360 : // The rounding mode is in bits 23:22 of the FPSCR.
5361 : // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
5362 : // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
5363 : // so that the shift + and get folded into a bitfield extract.
5364 : SDLoc dl(Op);
5365 3 : SDValue Ops[] = { DAG.getEntryNode(),
5366 3 : DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32) };
5367 :
5368 3 : SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, MVT::i32, Ops);
5369 : SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
5370 3 : DAG.getConstant(1U << 22, dl, MVT::i32));
5371 : SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
5372 3 : DAG.getConstant(22, dl, MVT::i32));
5373 : return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
5374 3 : DAG.getConstant(3, dl, MVT::i32));
5375 : }
5376 :
5377 48 : static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
5378 : const ARMSubtarget *ST) {
5379 : SDLoc dl(N);
5380 96 : EVT VT = N->getValueType(0);
5381 48 : if (VT.isVector()) {
5382 : assert(ST->hasNEON());
5383 :
5384 : // Compute the least significant set bit: LSB = X & -X
5385 22 : SDValue X = N->getOperand(0);
5386 22 : SDValue NX = DAG.getNode(ISD::SUB, dl, VT, getZeroVector(VT, DAG, dl), X);
5387 22 : SDValue LSB = DAG.getNode(ISD::AND, dl, VT, X, NX);
5388 :
5389 22 : EVT ElemTy = VT.getVectorElementType();
5390 :
5391 : if (ElemTy == MVT::i8) {
5392 : // Compute with: cttz(x) = ctpop(lsb - 1)
5393 : SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
5394 4 : DAG.getTargetConstant(1, dl, ElemTy));
5395 4 : SDValue Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
5396 4 : return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
5397 : }
5398 :
5399 14 : if ((ElemTy == MVT::i16 || ElemTy == MVT::i32) &&
5400 14 : (N->getOpcode() == ISD::CTTZ_ZERO_UNDEF)) {
5401 : // Compute with: cttz(x) = (width - 1) - ctlz(lsb), if x != 0
5402 9 : unsigned NumBits = ElemTy.getSizeInBits();
5403 : SDValue WidthMinus1 =
5404 : DAG.getNode(ARMISD::VMOVIMM, dl, VT,
5405 9 : DAG.getTargetConstant(NumBits - 1, dl, ElemTy));
5406 9 : SDValue CTLZ = DAG.getNode(ISD::CTLZ, dl, VT, LSB);
5407 9 : return DAG.getNode(ISD::SUB, dl, VT, WidthMinus1, CTLZ);
5408 : }
5409 :
5410 : // Compute with: cttz(x) = ctpop(lsb - 1)
5411 :
5412 : // Compute LSB - 1.
5413 9 : SDValue Bits;
5414 : if (ElemTy == MVT::i64) {
5415 : // Load constant 0xffff'ffff'ffff'ffff to register.
5416 : SDValue FF = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
5417 4 : DAG.getTargetConstant(0x1eff, dl, MVT::i32));
5418 4 : Bits = DAG.getNode(ISD::ADD, dl, VT, LSB, FF);
5419 : } else {
5420 : SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
5421 5 : DAG.getTargetConstant(1, dl, ElemTy));
5422 5 : Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
5423 : }
5424 9 : return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
5425 : }
5426 :
5427 26 : if (!ST->hasV6T2Ops())
5428 0 : return SDValue();
5429 :
5430 52 : SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, VT, N->getOperand(0));
5431 26 : return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
5432 : }
5433 :
5434 0 : static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG,
5435 : const ARMSubtarget *ST) {
5436 0 : EVT VT = N->getValueType(0);
5437 : SDLoc DL(N);
5438 :
5439 : assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.");
5440 : assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
5441 : VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
5442 : "Unexpected type for custom ctpop lowering");
5443 :
5444 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5445 0 : EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
5446 0 : SDValue Res = DAG.getBitcast(VT8Bit, N->getOperand(0));
5447 0 : Res = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Res);
5448 :
5449 : // Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
5450 : unsigned EltSize = 8;
5451 0 : unsigned NumElts = VT.is64BitVector() ? 8 : 16;
5452 0 : while (EltSize != VT.getScalarSizeInBits()) {
5453 : SmallVector<SDValue, 8> Ops;
5454 0 : Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddlu, DL,
5455 0 : TLI.getPointerTy(DAG.getDataLayout())));
5456 0 : Ops.push_back(Res);
5457 :
5458 0 : EltSize *= 2;
5459 0 : NumElts /= 2;
5460 0 : MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
5461 0 : Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, WidenVT, Ops);
5462 : }
5463 :
5464 0 : return Res;
5465 : }
5466 :
5467 0 : static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
5468 : const ARMSubtarget *ST) {
5469 0 : EVT VT = N->getValueType(0);
5470 : SDLoc dl(N);
5471 :
5472 0 : if (!VT.isVector())
5473 0 : return SDValue();
5474 :
5475 : // Lower vector shifts on NEON to use VSHL.
5476 : assert(ST->hasNEON() && "unexpected vector shift");
5477 :
5478 : // Left shifts translate directly to the vshiftu intrinsic.
5479 0 : if (N->getOpcode() == ISD::SHL)
5480 : return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
5481 : DAG.getConstant(Intrinsic::arm_neon_vshiftu, dl,
5482 : MVT::i32),
5483 0 : N->getOperand(0), N->getOperand(1));
5484 :
5485 : assert((N->getOpcode() == ISD::SRA ||
5486 : N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode");
5487 :
5488 : // NEON uses the same intrinsics for both left and right shifts. For
5489 : // right shifts, the shift amounts are negative, so negate the vector of
5490 : // shift amounts.
5491 0 : EVT ShiftVT = N->getOperand(1).getValueType();
5492 : SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
5493 : getZeroVector(ShiftVT, DAG, dl),
5494 0 : N->getOperand(1));
5495 0 : Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
5496 : Intrinsic::arm_neon_vshifts :
5497 : Intrinsic::arm_neon_vshiftu);
5498 : return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
5499 : DAG.getConstant(vshiftInt, dl, MVT::i32),
5500 0 : N->getOperand(0), NegatedCount);
5501 : }
5502 :
5503 855 : static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
5504 : const ARMSubtarget *ST) {
5505 855 : EVT VT = N->getValueType(0);
5506 : SDLoc dl(N);
5507 :
5508 : // We can get here for a node like i32 = ISD::SHL i32, i64
5509 : if (VT != MVT::i64)
5510 0 : return SDValue();
5511 :
5512 : assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
5513 : "Unknown shift to lower!");
5514 :
5515 : // We only lower SRA, SRL of 1 here, all others use generic lowering.
5516 1710 : if (!isOneConstant(N->getOperand(1)))
5517 847 : return SDValue();
5518 :
5519 : // If we are in thumb mode, we don't have RRX.
5520 9 : if (ST->isThumb1Only()) return SDValue();
5521 :
5522 : // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
5523 7 : SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
5524 7 : DAG.getConstant(0, dl, MVT::i32));
5525 7 : SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
5526 7 : DAG.getConstant(1, dl, MVT::i32));
5527 :
5528 : // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
5529 : // captures the result into a carry flag.
5530 7 : unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
5531 7 : Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi);
5532 :
5533 : // The low part is an ARMISD::RRX operand, which shifts the carry in.
5534 7 : Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
5535 :
5536 : // Merge the pieces into a single i64 value.
5537 7 : return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
5538 : }
5539 :
5540 0 : static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
5541 0 : SDValue TmpOp0, TmpOp1;
5542 : bool Invert = false;
5543 : bool Swap = false;
5544 : unsigned Opc = 0;
5545 :
5546 0 : SDValue Op0 = Op.getOperand(0);
5547 0 : SDValue Op1 = Op.getOperand(1);
5548 0 : SDValue CC = Op.getOperand(2);
5549 0 : EVT CmpVT = Op0.getValueType().changeVectorElementTypeToInteger();
5550 0 : EVT VT = Op.getValueType();
5551 0 : ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
5552 0 : SDLoc dl(Op);
5553 :
5554 0 : if (Op0.getValueType().getVectorElementType() == MVT::i64 &&
5555 0 : (SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE)) {
5556 : // Special-case integer 64-bit equality comparisons. They aren't legal,
5557 : // but they can be lowered with a few vector instructions.
5558 0 : unsigned CmpElements = CmpVT.getVectorNumElements() * 2;
5559 0 : EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, CmpElements);
5560 0 : SDValue CastOp0 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op0);
5561 0 : SDValue CastOp1 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op1);
5562 : SDValue Cmp = DAG.getNode(ISD::SETCC, dl, SplitVT, CastOp0, CastOp1,
5563 0 : DAG.getCondCode(ISD::SETEQ));
5564 0 : SDValue Reversed = DAG.getNode(ARMISD::VREV64, dl, SplitVT, Cmp);
5565 0 : SDValue Merged = DAG.getNode(ISD::AND, dl, SplitVT, Cmp, Reversed);
5566 0 : Merged = DAG.getNode(ISD::BITCAST, dl, CmpVT, Merged);
5567 0 : if (SetCCOpcode == ISD::SETNE)
5568 0 : Merged = DAG.getNOT(dl, Merged, CmpVT);
5569 0 : Merged = DAG.getSExtOrTrunc(Merged, dl, VT);
5570 0 : return Merged;
5571 : }
5572 :
5573 0 : if (CmpVT.getVectorElementType() == MVT::i64)
5574 : // 64-bit comparisons are not legal in general.
5575 0 : return SDValue();
5576 :
5577 0 : if (Op1.getValueType().isFloatingPoint()) {
5578 0 : switch (SetCCOpcode) {
5579 0 : default: llvm_unreachable("Illegal FP comparison");
5580 0 : case ISD::SETUNE:
5581 : case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH;
5582 : case ISD::SETOEQ:
5583 : case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
5584 0 : case ISD::SETOLT:
5585 : case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH;
5586 : case ISD::SETOGT:
5587 : case ISD::SETGT: Opc = ARMISD::VCGT; break;
5588 0 : case ISD::SETOLE:
5589 : case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH;
5590 : case ISD::SETOGE:
5591 : case ISD::SETGE: Opc = ARMISD::VCGE; break;
5592 0 : case ISD::SETUGE: Swap = true; LLVM_FALLTHROUGH;
5593 : case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break;
5594 0 : case ISD::SETUGT: Swap = true; LLVM_FALLTHROUGH;
5595 : case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break;
5596 0 : case ISD::SETUEQ: Invert = true; LLVM_FALLTHROUGH;
5597 0 : case ISD::SETONE:
5598 : // Expand this to (OLT | OGT).
5599 0 : TmpOp0 = Op0;
5600 0 : TmpOp1 = Op1;
5601 : Opc = ISD::OR;
5602 0 : Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0);
5603 0 : Op1 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp0, TmpOp1);
5604 0 : break;
5605 0 : case ISD::SETUO:
5606 : Invert = true;
5607 : LLVM_FALLTHROUGH;
5608 0 : case ISD::SETO:
5609 : // Expand this to (OLT | OGE).
5610 0 : TmpOp0 = Op0;
5611 0 : TmpOp1 = Op1;
5612 : Opc = ISD::OR;
5613 0 : Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0);
5614 0 : Op1 = DAG.getNode(ARMISD::VCGE, dl, CmpVT, TmpOp0, TmpOp1);
5615 0 : break;
5616 : }
5617 : } else {
5618 : // Integer comparisons.
5619 0 : switch (SetCCOpcode) {
5620 0 : default: llvm_unreachable("Illegal integer comparison");
5621 0 : case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH;
5622 : case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
5623 0 : case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH;
5624 : case ISD::SETGT: Opc = ARMISD::VCGT; break;
5625 0 : case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH;
5626 : case ISD::SETGE: Opc = ARMISD::VCGE; break;
5627 0 : case ISD::SETULT: Swap = true; LLVM_FALLTHROUGH;
5628 : case ISD::SETUGT: Opc = ARMISD::VCGTU; break;
5629 0 : case ISD::SETULE: Swap = true; LLVM_FALLTHROUGH;
5630 : case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
5631 : }
5632 :
5633 : // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
5634 0 : if (Opc == ARMISD::VCEQ) {
5635 : SDValue AndOp;
5636 0 : if (ISD::isBuildVectorAllZeros(Op1.getNode()))
5637 : AndOp = Op0;
5638 0 : else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
5639 : AndOp = Op1;
5640 :
5641 : // Ignore bitconvert.
5642 0 : if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
5643 0 : AndOp = AndOp.getOperand(0);
5644 :
5645 0 : if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
5646 : Opc = ARMISD::VTST;
5647 0 : Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0));
5648 0 : Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1));
5649 0 : Invert = !Invert;
5650 : }
5651 : }
5652 : }
5653 :
5654 0 : if (Swap)
5655 : std::swap(Op0, Op1);
5656 :
5657 : // If one of the operands is a constant vector zero, attempt to fold the
5658 : // comparison to a specialized compare-against-zero form.
5659 0 : SDValue SingleOp;
5660 0 : if (ISD::isBuildVectorAllZeros(Op1.getNode()))
5661 0 : SingleOp = Op0;
5662 0 : else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
5663 0 : if (Opc == ARMISD::VCGE)
5664 : Opc = ARMISD::VCLEZ;
5665 0 : else if (Opc == ARMISD::VCGT)
5666 : Opc = ARMISD::VCLTZ;
5667 0 : SingleOp = Op1;
5668 : }
5669 :
5670 : SDValue Result;
5671 0 : if (SingleOp.getNode()) {
5672 0 : switch (Opc) {
5673 : case ARMISD::VCEQ:
5674 0 : Result = DAG.getNode(ARMISD::VCEQZ, dl, CmpVT, SingleOp); break;
5675 : case ARMISD::VCGE:
5676 0 : Result = DAG.getNode(ARMISD::VCGEZ, dl, CmpVT, SingleOp); break;
5677 : case ARMISD::VCLEZ:
5678 0 : Result = DAG.getNode(ARMISD::VCLEZ, dl, CmpVT, SingleOp); break;
5679 : case ARMISD::VCGT:
5680 0 : Result = DAG.getNode(ARMISD::VCGTZ, dl, CmpVT, SingleOp); break;
5681 : case ARMISD::VCLTZ:
5682 0 : Result = DAG.getNode(ARMISD::VCLTZ, dl, CmpVT, SingleOp); break;
5683 : default:
5684 0 : Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1);
5685 : }
5686 : } else {
5687 0 : Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1);
5688 : }
5689 :
5690 0 : Result = DAG.getSExtOrTrunc(Result, dl, VT);
5691 :
5692 0 : if (Invert)
5693 0 : Result = DAG.getNOT(dl, Result, VT);
5694 :
5695 0 : return Result;
5696 : }
5697 :
5698 0 : static SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) {
5699 0 : SDValue LHS = Op.getOperand(0);
5700 0 : SDValue RHS = Op.getOperand(1);
5701 0 : SDValue Carry = Op.getOperand(2);
5702 0 : SDValue Cond = Op.getOperand(3);
5703 0 : SDLoc DL(Op);
5704 :
5705 : assert(LHS.getSimpleValueType().isInteger() && "SETCCCARRY is integer only.");
5706 :
5707 : // ARMISD::SUBE expects a carry not a borrow like ISD::SUBCARRY so we
5708 : // have to invert the carry first.
5709 0 : Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
5710 0 : DAG.getConstant(1, DL, MVT::i32), Carry);
5711 : // This converts the boolean value carry into the carry flag.
5712 0 : Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
5713 :
5714 0 : SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
5715 0 : SDValue Cmp = DAG.getNode(ARMISD::SUBE, DL, VTs, LHS, RHS, Carry);
5716 :
5717 0 : SDValue FVal = DAG.getConstant(0, DL, MVT::i32);
5718 0 : SDValue TVal = DAG.getConstant(1, DL, MVT::i32);
5719 : SDValue ARMcc = DAG.getConstant(
5720 0 : IntCCToARMCC(cast<CondCodeSDNode>(Cond)->get()), DL, MVT::i32);
5721 0 : SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5722 : SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, ARM::CPSR,
5723 0 : Cmp.getValue(1), SDValue());
5724 : return DAG.getNode(ARMISD::CMOV, DL, Op.getValueType(), FVal, TVal, ARMcc,
5725 0 : CCR, Chain.getValue(1));
5726 : }
5727 :
5728 : /// isNEONModifiedImm - Check if the specified splat value corresponds to a
5729 : /// valid vector constant for a NEON instruction with a "modified immediate"
5730 : /// operand (e.g., VMOV). If so, return the encoded value.
5731 1035 : static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
5732 : unsigned SplatBitSize, SelectionDAG &DAG,
5733 : const SDLoc &dl, EVT &VT, bool is128Bits,
5734 : NEONModImmType type) {
5735 : unsigned OpCmode, Imm;
5736 :
5737 : // SplatBitSize is set to the smallest size that splats the vector, so a
5738 : // zero vector will always have SplatBitSize == 8. However, NEON modified
5739 : // immediate instructions others than VMOV do not support the 8-bit encoding
5740 : // of a zero vector, and the default encoding of zero is supposed to be the
5741 : // 32-bit version.
5742 1035 : if (SplatBits == 0)
5743 : SplatBitSize = 32;
5744 :
5745 953 : switch (SplatBitSize) {
5746 79 : case 8:
5747 79 : if (type != VMOVModImm)
5748 0 : return SDValue();
5749 : // Any 1-byte value is OK. Op=0, Cmode=1110.
5750 : assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
5751 : OpCmode = 0xe;
5752 79 : Imm = SplatBits;
5753 79 : VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
5754 79 : break;
5755 :
5756 34 : case 16:
5757 : // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
5758 34 : VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
5759 34 : if ((SplatBits & ~0xff) == 0) {
5760 : // Value = 0x00nn: Op=x, Cmode=100x.
5761 : OpCmode = 0x8;
5762 16 : Imm = SplatBits;
5763 16 : break;
5764 : }
5765 18 : if ((SplatBits & ~0xff00) == 0) {
5766 : // Value = 0xnn00: Op=x, Cmode=101x.
5767 : OpCmode = 0xa;
5768 6 : Imm = SplatBits >> 8;
5769 6 : break;
5770 : }
5771 12 : return SDValue();
5772 :
5773 826 : case 32:
5774 : // NEON's 32-bit VMOV supports splat values where:
5775 : // * only one byte is nonzero, or
5776 : // * the least significant byte is 0xff and the second byte is nonzero, or
5777 : // * the least significant 2 bytes are 0xff and the third is nonzero.
5778 826 : VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
5779 826 : if ((SplatBits & ~0xff) == 0) {
5780 : // Value = 0x000000nn: Op=x, Cmode=000x.
5781 : OpCmode = 0;
5782 108 : Imm = SplatBits;
5783 108 : break;
5784 : }
5785 718 : if ((SplatBits & ~0xff00) == 0) {
5786 : // Value = 0x0000nn00: Op=x, Cmode=001x.
5787 : OpCmode = 0x2;
5788 6 : Imm = SplatBits >> 8;
5789 6 : break;
5790 : }
5791 712 : if ((SplatBits & ~0xff0000) == 0) {
5792 : // Value = 0x00nn0000: Op=x, Cmode=010x.
5793 : OpCmode = 0x4;
5794 5 : Imm = SplatBits >> 16;
5795 5 : break;
5796 : }
5797 707 : if ((SplatBits & ~0xff000000) == 0) {
5798 : // Value = 0xnn000000: Op=x, Cmode=011x.
5799 : OpCmode = 0x6;
5800 15 : Imm = SplatBits >> 24;
5801 15 : break;
5802 : }
5803 :
5804 : // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
5805 692 : if (type == OtherModImm) return SDValue();
5806 :
5807 672 : if ((SplatBits & ~0xffff) == 0 &&
5808 5 : ((SplatBits | SplatUndef) & 0xff) == 0xff) {
5809 : // Value = 0x0000nnff: Op=x, Cmode=1100.
5810 : OpCmode = 0xc;
5811 3 : Imm = SplatBits >> 8;
5812 3 : break;
5813 : }
5814 :
5815 669 : if ((SplatBits & ~0xffffff) == 0 &&
5816 6 : ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
5817 : // Value = 0x00nnffff: Op=x, Cmode=1101.
5818 : OpCmode = 0xd;
5819 4 : Imm = SplatBits >> 16;
5820 4 : break;
5821 : }
5822 :
5823 : // Note: there are a few 32-bit splat values (specifically: 00ffff00,
5824 : // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
5825 : // VMOV.I32. A (very) minor optimization would be to replicate the value
5826 : // and fall through here to test for a valid 64-bit splat. But, then the
5827 : // caller would also need to check and handle the change in size.
5828 665 : return SDValue();
5829 :
5830 96 : case 64: {
5831 96 : if (type != VMOVModImm)
5832 47 : return SDValue();
5833 : // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
5834 : uint64_t BitMask = 0xff;
5835 : uint64_t Val = 0;
5836 : unsigned ImmMask = 1;
5837 : Imm = 0;
5838 102 : for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
5839 99 : if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
5840 : Val |= BitMask;
5841 23 : Imm |= ImmMask;
5842 76 : } else if ((SplatBits & BitMask) != 0) {
5843 46 : return SDValue();
5844 : }
5845 53 : BitMask <<= 8;
5846 53 : ImmMask <<= 1;
5847 : }
5848 :
5849 3 : if (DAG.getDataLayout().isBigEndian())
5850 : // swap higher and lower 32 bit word
5851 0 : Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4);
5852 :
5853 : // Op=1, Cmode=1110.
5854 : OpCmode = 0x1e;
5855 3 : VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
5856 3 : break;
5857 : }
5858 :
5859 0 : default:
5860 0 : llvm_unreachable("unexpected size for isNEONModifiedImm");
5861 : }
5862 :
5863 : unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
5864 245 : return DAG.getTargetConstant(EncodedVal, dl, MVT::i32);
5865 : }
5866 :
5867 1317 : SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
5868 : const ARMSubtarget *ST) const {
5869 1317 : EVT VT = Op.getValueType();
5870 : bool IsDouble = (VT == MVT::f64);
5871 : ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
5872 1317 : const APFloat &FPVal = CFP->getValueAPF();
5873 :
5874 : // Prevent floating-point constants from using literal loads
5875 : // when execute-only is enabled.
5876 1317 : if (ST->genExecuteOnly()) {
5877 : // If we can represent the constant as an immediate, don't lower it
5878 115 : if (isFPImmLegal(FPVal, VT))
5879 66 : return Op;
5880 : // Otherwise, construct as integer, and move to float register
5881 49 : APInt INTVal = FPVal.bitcastToAPInt();
5882 : SDLoc DL(CFP);
5883 49 : switch (VT.getSimpleVT().SimpleTy) {
5884 0 : default:
5885 0 : llvm_unreachable("Unknown floating point type!");
5886 : break;
5887 : case MVT::f64: {
5888 58 : SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
5889 58 : SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
5890 29 : if (!ST->isLittle())
5891 : std::swap(Lo, Hi);
5892 29 : return DAG.getNode(ARMISD::VMOVDRR, DL, MVT::f64, Lo, Hi);
5893 : }
5894 : case MVT::f32:
5895 : return DAG.getNode(ARMISD::VMOVSR, DL, VT,
5896 20 : DAG.getConstant(INTVal, DL, MVT::i32));
5897 : }
5898 : }
5899 :
5900 1202 : if (!ST->hasVFP3())
5901 28 : return SDValue();
5902 :
5903 : // Use the default (constant pool) lowering for double constants when we have
5904 : // an SP-only FPU
5905 1174 : if (IsDouble && Subtarget->isFPOnlySP())
5906 22 : return SDValue();
5907 :
5908 : // Try splatting with a VMOV.f32...
5909 1152 : int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
5910 :
5911 1152 : if (ImmVal != -1) {
5912 443 : if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
5913 : // We have code in place to select a valid ConstantFP already, no need to
5914 : // do any mangling.
5915 410 : return Op;
5916 : }
5917 :
5918 : // It's a float and we are trying to use NEON operations where
5919 : // possible. Lower it to a splat followed by an extract.
5920 : SDLoc DL(Op);
5921 33 : SDValue NewVal = DAG.getTargetConstant(ImmVal, DL, MVT::i32);
5922 : SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
5923 33 : NewVal);
5924 : return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
5925 33 : DAG.getConstant(0, DL, MVT::i32));
5926 : }
5927 :
5928 : // The rest of our options are NEON only, make sure that's allowed before
5929 : // proceeding..
5930 709 : if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
5931 304 : return SDValue();
5932 :
5933 405 : EVT VMovVT;
5934 405 : uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();
5935 :
5936 : // It wouldn't really be worth bothering for doubles except for one very
5937 : // important value, which does happen to match: 0.0. So make sure we don't do
5938 : // anything stupid.
5939 405 : if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
5940 81 : return SDValue();
5941 :
5942 : // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
5943 324 : SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op),
5944 324 : VMovVT, false, VMOVModImm);
5945 : if (NewVal != SDValue()) {
5946 : SDLoc DL(Op);
5947 : SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
5948 19 : NewVal);
5949 19 : if (IsDouble)
5950 13 : return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
5951 :
5952 : // It's a float: cast and extract a vector element.
5953 : SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
5954 6 : VecConstant);
5955 : return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
5956 6 : DAG.getConstant(0, DL, MVT::i32));
5957 : }
5958 :
5959 : // Finally, try a VMVN.i32
5960 305 : NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT,
5961 305 : false, VMVNModImm);
5962 : if (NewVal != SDValue()) {
5963 : SDLoc DL(Op);
5964 3 : SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
5965 :
5966 3 : if (IsDouble)
5967 2 : return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
5968 :
5969 : // It's a float: cast and extract a vector element.
5970 : SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
5971 1 : VecConstant);
5972 : return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
5973 1 : DAG.getConstant(0, DL, MVT::i32));
5974 : }
5975 :
5976 302 : return SDValue();
5977 : }
5978 :
5979 : // check if an VEXT instruction can handle the shuffle mask when the
5980 : // vector sources of the shuffle are the same.
5981 0 : static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
5982 : unsigned NumElts = VT.getVectorNumElements();
5983 :
5984 : // Assume that the first shuffle index is not UNDEF. Fail if it is.
5985 0 : if (M[0] < 0)
5986 0 : return false;
5987 :
5988 0 : Imm = M[0];
5989 :
5990 : // If this is a VEXT shuffle, the immediate value is the index of the first
5991 : // element. The other shuffle indices must be the successive elements after
5992 : // the first one.
5993 : unsigned ExpectedElt = Imm;
5994 0 : for (unsigned i = 1; i < NumElts; ++i) {
5995 : // Increment the expected index. If it wraps around, just follow it
5996 : // back to index zero and keep going.
5997 0 : ++ExpectedElt;
5998 0 : if (ExpectedElt == NumElts)
5999 : ExpectedElt = 0;
6000 :
6001 0 : if (M[i] < 0) continue; // ignore UNDEF indices
6002 0 : if (ExpectedElt != static_cast<unsigned>(M[i]))
6003 0 : return false;
6004 : }
6005 :
6006 : return true;
6007 : }
6008 :
6009 0 : static bool isVEXTMask(ArrayRef<int> M, EVT VT,
6010 : bool &ReverseVEXT, unsigned &Imm) {
6011 : unsigned NumElts = VT.getVectorNumElements();
6012 0 : ReverseVEXT = false;
6013 :
6014 : // Assume that the first shuffle index is not UNDEF. Fail if it is.
6015 0 : if (M[0] < 0)
6016 0 : return false;
6017 :
6018 0 : Imm = M[0];
6019 :
6020 : // If this is a VEXT shuffle, the immediate value is the index of the first
6021 : // element. The other shuffle indices must be the successive elements after
6022 : // the first one.
6023 : unsigned ExpectedElt = Imm;
6024 0 : for (unsigned i = 1; i < NumElts; ++i) {
6025 : // Increment the expected index. If it wraps around, it may still be
6026 : // a VEXT but the source vectors must be swapped.
6027 0 : ExpectedElt += 1;
6028 0 : if (ExpectedElt == NumElts * 2) {
6029 : ExpectedElt = 0;
6030 0 : ReverseVEXT = true;
6031 : }
6032 :
6033 0 : if (M[i] < 0) continue; // ignore UNDEF indices
6034 0 : if (ExpectedElt != static_cast<unsigned>(M[i]))
6035 0 : return false;
6036 : }
6037 :
6038 : // Adjust the index value if the source operands will be swapped.
6039 0 : if (ReverseVEXT)
6040 0 : Imm -= NumElts;
6041 :
6042 : return true;
6043 : }
6044 :
6045 : /// isVREVMask - Check if a vector shuffle corresponds to a VREV
6046 : /// instruction with the specified blocksize. (The order of the elements
6047 : /// within each block of the vector is reversed.)
6048 0 : static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
6049 : assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
6050 : "Only possible block sizes for VREV are: 16, 32, 64");
6051 :
6052 : unsigned EltSz = VT.getScalarSizeInBits();
6053 0 : if (EltSz == 64)
6054 0 : return false;
6055 :
6056 : unsigned NumElts = VT.getVectorNumElements();
6057 0 : unsigned BlockElts = M[0] + 1;
6058 : // If the first shuffle index is UNDEF, be optimistic.
6059 0 : if (M[0] < 0)
6060 0 : BlockElts = BlockSize / EltSz;
6061 :
6062 0 : if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
6063 0 : return false;
6064 :
6065 0 : for (unsigned i = 0; i < NumElts; ++i) {
6066 0 : if (M[i] < 0) continue; // ignore UNDEF indices
6067 0 : if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
6068 0 : return false;
6069 : }
6070 :
6071 : return true;
6072 : }
6073 :
6074 0 : static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
6075 : // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
6076 : // range, then 0 is placed into the resulting vector. So pretty much any mask
6077 : // of 8 elements can work here.
6078 8 : return VT == MVT::v8i8 && M.size() == 8;
6079 : }
6080 :
6081 : static unsigned SelectPairHalf(unsigned Elements, ArrayRef<int> Mask,
6082 : unsigned Index) {
6083 887 : if (Mask.size() == Elements * 2)
6084 130 : return Index / Elements;
6085 757 : return Mask[Index] == 0 ? 0 : 1;
6086 : }
6087 :
6088 : // Checks whether the shuffle mask represents a vector transpose (VTRN) by
6089 : // checking that pairs of elements in the shuffle mask represent the same index
6090 : // in each vector, incrementing the expected index by 2 at each step.
6091 : // e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 2, 6]
6092 : // v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,c,g}
6093 : // v2={e,f,g,h}
6094 : // WhichResult gives the offset for each element in the mask based on which
6095 : // of the two results it belongs to.
6096 : //
6097 : // The transpose can be represented either as:
6098 : // result1 = shufflevector v1, v2, result1_shuffle_mask
6099 : // result2 = shufflevector v1, v2, result2_shuffle_mask
6100 : // where v1/v2 and the shuffle masks have the same number of elements
6101 : // (here WhichResult (see below) indicates which result is being checked)
6102 : //
6103 : // or as:
6104 : // results = shufflevector v1, v2, shuffle_mask
6105 : // where both results are returned in one vector and the shuffle mask has twice
6106 : // as many elements as v1/v2 (here WhichResult will always be 0 if true) here we
6107 : // want to check the low half and high half of the shuffle mask as if it were
6108 : // the other case
6109 255 : static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
6110 : unsigned EltSz = VT.getScalarSizeInBits();
6111 255 : if (EltSz == 64)
6112 : return false;
6113 :
6114 : unsigned NumElts = VT.getVectorNumElements();
6115 255 : if (M.size() != NumElts && M.size() != NumElts*2)
6116 : return false;
6117 :
6118 : // If the mask is twice as long as the input vector then we need to check the
6119 : // upper and lower parts of the mask with a matching value for WhichResult
6120 : // FIXME: A mask with only even values will be rejected in case the first
6121 : // element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only
6122 : // M[0] is used to determine WhichResult
6123 320 : for (unsigned i = 0; i < M.size(); i += NumElts) {
6124 265 : WhichResult = SelectPairHalf(NumElts, M, i);
6125 494 : for (unsigned j = 0; j < NumElts; j += 2) {
6126 858 : if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
6127 708 : (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult))
6128 : return false;
6129 : }
6130 : }
6131 :
6132 55 : if (M.size() == NumElts*2)
6133 6 : WhichResult = 0;
6134 :
6135 : return true;
6136 : }
6137 :
6138 : /// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
6139 : /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
6140 : /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
6141 92 : static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
6142 : unsigned EltSz = VT.getScalarSizeInBits();
6143 92 : if (EltSz == 64)
6144 : return false;
6145 :
6146 : unsigned NumElts = VT.getVectorNumElements();
6147 92 : if (M.size() != NumElts && M.size() != NumElts*2)
6148 : return false;
6149 :
6150 102 : for (unsigned i = 0; i < M.size(); i += NumElts) {
6151 98 : WhichResult = SelectPairHalf(NumElts, M, i);
6152 153 : for (unsigned j = 0; j < NumElts; j += 2) {
6153 286 : if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
6154 194 : (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult))
6155 : return false;
6156 : }
6157 : }
6158 :
6159 4 : if (M.size() == NumElts*2)
6160 0 : WhichResult = 0;
6161 :
6162 : return true;
6163 : }
6164 :
6165 : // Checks whether the shuffle mask represents a vector unzip (VUZP) by checking
6166 : // that the mask elements are either all even and in steps of size 2 or all odd
6167 : // and in steps of size 2.
6168 : // e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 2, 4, 6]
6169 : // v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,c,e,g}
6170 : // v2={e,f,g,h}
6171 : // Requires similar checks to that of isVTRNMask with
6172 : // respect the how results are returned.
6173 200 : static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
6174 : unsigned EltSz = VT.getScalarSizeInBits();
6175 200 : if (EltSz == 64)
6176 : return false;
6177 :
6178 : unsigned NumElts = VT.getVectorNumElements();
6179 200 : if (M.size() != NumElts && M.size() != NumElts*2)
6180 : return false;
6181 :
6182 279 : for (unsigned i = 0; i < M.size(); i += NumElts) {
6183 206 : WhichResult = SelectPairHalf(NumElts, M, i);
6184 892 : for (unsigned j = 0; j < NumElts; ++j) {
6185 1626 : if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult)
6186 : return false;
6187 : }
6188 : }
6189 :
6190 73 : if (M.size() == NumElts*2)
6191 3 : WhichResult = 0;
6192 :
6193 : // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
6194 73 : if (VT.is64BitVector() && EltSz == 32)
6195 0 : return false;
6196 :
6197 : return true;
6198 : }
6199 :
6200 : /// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
6201 : /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
6202 : /// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
6203 88 : static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
6204 : unsigned EltSz = VT.getScalarSizeInBits();
6205 88 : if (EltSz == 64)
6206 : return false;
6207 :
6208 : unsigned NumElts = VT.getVectorNumElements();
6209 88 : if (M.size() != NumElts && M.size() != NumElts*2)
6210 : return false;
6211 :
6212 88 : unsigned Half = NumElts / 2;
6213 97 : for (unsigned i = 0; i < M.size(); i += NumElts) {
6214 94 : WhichResult = SelectPairHalf(NumElts, M, i);
6215 133 : for (unsigned j = 0; j < NumElts; j += Half) {
6216 : unsigned Idx = WhichResult;
6217 282 : for (unsigned k = 0; k < Half; ++k) {
6218 243 : int MIdx = M[i + j + k];
6219 243 : if (MIdx >= 0 && (unsigned) MIdx != Idx)
6220 : return false;
6221 158 : Idx += 2;
6222 : }
6223 : }
6224 : }
6225 :
6226 3 : if (M.size() == NumElts*2)
6227 0 : WhichResult = 0;
6228 :
6229 : // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
6230 3 : if (VT.is64BitVector() && EltSz == 32)
6231 0 : return false;
6232 :
6233 : return true;
6234 : }
6235 :
6236 : // Checks whether the shuffle mask represents a vector zip (VZIP) by checking
6237 : // that pairs of elements of the shufflemask represent the same index in each
6238 : // vector incrementing sequentially through the vectors.
6239 : // e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 1, 5]
6240 : // v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,b,f}
6241 : // v2={e,f,g,h}
6242 : // Requires similar checks to that of isVTRNMask with respect the how results
6243 : // are returned.
6244 127 : static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
6245 : unsigned EltSz = VT.getScalarSizeInBits();
6246 127 : if (EltSz == 64)
6247 : return false;
6248 :
6249 : unsigned NumElts = VT.getVectorNumElements();
6250 127 : if (M.size() != NumElts && M.size() != NumElts*2)
6251 : return false;
6252 :
6253 168 : for (unsigned i = 0; i < M.size(); i += NumElts) {
6254 133 : WhichResult = SelectPairHalf(NumElts, M, i);
6255 133 : unsigned Idx = WhichResult * NumElts / 2;
6256 320 : for (unsigned j = 0; j < NumElts; j += 2) {
6257 558 : if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
6258 466 : (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx + NumElts))
6259 : return false;
6260 187 : Idx += 1;
6261 : }
6262 : }
6263 :
6264 35 : if (M.size() == NumElts*2)
6265 4 : WhichResult = 0;
6266 :
6267 : // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
6268 35 : if (VT.is64BitVector() && EltSz == 32)
6269 0 : return false;
6270 :
6271 : return true;
6272 : }
6273 :
6274 : /// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
6275 : /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
6276 : /// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
6277 85 : static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
6278 : unsigned EltSz = VT.getScalarSizeInBits();
6279 85 : if (EltSz == 64)
6280 : return false;
6281 :
6282 : unsigned NumElts = VT.getVectorNumElements();
6283 85 : if (M.size() != NumElts && M.size() != NumElts*2)
6284 : return false;
6285 :
6286 92 : for (unsigned i = 0; i < M.size(); i += NumElts) {
6287 91 : WhichResult = SelectPairHalf(NumElts, M, i);
6288 91 : unsigned Idx = WhichResult * NumElts / 2;
6289 129 : for (unsigned j = 0; j < NumElts; j += 2) {
6290 244 : if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
6291 156 : (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx))
6292 : return false;
6293 38 : Idx += 1;
6294 : }
6295 : }
6296 :
6297 1 : if (M.size() == NumElts*2)
6298 0 : WhichResult = 0;
6299 :
6300 : // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
6301 1 : if (VT.is64BitVector() && EltSz == 32)
6302 0 : return false;
6303 :
6304 : return true;
6305 : }
6306 :
6307 : /// Check if \p ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN),
6308 : /// and return the corresponding ARMISD opcode if it is, or 0 if it isn't.
6309 255 : static unsigned isNEONTwoResultShuffleMask(ArrayRef<int> ShuffleMask, EVT VT,
6310 : unsigned &WhichResult,
6311 : bool &isV_UNDEF) {
6312 255 : isV_UNDEF = false;
6313 255 : if (isVTRNMask(ShuffleMask, VT, WhichResult))
6314 : return ARMISD::VTRN;
6315 200 : if (isVUZPMask(ShuffleMask, VT, WhichResult))
6316 : return ARMISD::VUZP;
6317 127 : if (isVZIPMask(ShuffleMask, VT, WhichResult))
6318 : return ARMISD::VZIP;
6319 :
6320 92 : isV_UNDEF = true;
6321 92 : if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
6322 : return ARMISD::VTRN;
6323 88 : if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
6324 : return ARMISD::VUZP;
6325 85 : if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
6326 1 : return ARMISD::VZIP;
6327 :
6328 : return 0;
6329 : }
6330 :
6331 : /// \return true if this is a reverse operation on an vector.
6332 37 : static bool isReverseMask(ArrayRef<int> M, EVT VT) {
6333 : unsigned NumElts = VT.getVectorNumElements();
6334 : // Make sure the mask has the right size.
6335 37 : if (NumElts != M.size())
6336 : return false;
6337 :
6338 : // Look for <15, ..., 3, -1, 1, 0>.
6339 83 : for (unsigned i = 0; i != NumElts; ++i)
6340 162 : if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
6341 : return false;
6342 :
6343 : return true;
6344 : }
6345 :
6346 : // If N is an integer constant that can be moved into a register in one
6347 : // instruction, return an SDValue of such a constant (will become a MOV
6348 : // instruction). Otherwise return null.
6349 0 : static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
6350 : const ARMSubtarget *ST, const SDLoc &dl) {
6351 : uint64_t Val;
6352 : if (!isa<ConstantSDNode>(N))
6353 0 : return SDValue();
6354 0 : Val = cast<ConstantSDNode>(N)->getZExtValue();
6355 :
6356 0 : if (ST->isThumb1Only()) {
6357 0 : if (Val <= 255 || ~Val <= 255)
6358 0 : return DAG.getConstant(Val, dl, MVT::i32);
6359 : } else {
6360 0 : if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
6361 0 : return DAG.getConstant(Val, dl, MVT::i32);
6362 : }
6363 0 : return SDValue();
6364 : }
6365 :
6366 : // If this is a case we can't handle, return null and let the default
6367 : // expansion code take care of it.
6368 1164 : SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
6369 : const ARMSubtarget *ST) const {
6370 : BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
6371 : SDLoc dl(Op);
6372 1164 : EVT VT = Op.getValueType();
6373 :
6374 : APInt SplatBits, SplatUndef;
6375 : unsigned SplatBitSize;
6376 : bool HasAnyUndefs;
6377 1164 : if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
6378 461 : if (SplatUndef.isAllOnesValue())
6379 9 : return DAG.getUNDEF(VT);
6380 :
6381 452 : if (SplatBitSize <= 64) {
6382 : // Check if an immediate VMOV works.
6383 285 : EVT VmovVT;
6384 : SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
6385 : SplatUndef.getZExtValue(), SplatBitSize,
6386 285 : DAG, dl, VmovVT, VT.is128BitVector(),
6387 570 : VMOVModImm);
6388 285 : if (Val.getNode()) {
6389 200 : SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
6390 200 : return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
6391 : }
6392 :
6393 : // Try an immediate VMVN.
6394 85 : uint64_t NegatedImm = (~SplatBits).getZExtValue();
6395 85 : Val = isNEONModifiedImm(NegatedImm,
6396 : SplatUndef.getZExtValue(), SplatBitSize,
6397 85 : DAG, dl, VmovVT, VT.is128BitVector(),
6398 170 : VMVNModImm);
6399 85 : if (Val.getNode()) {
6400 10 : SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
6401 10 : return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
6402 : }
6403 :
6404 : // Use vmov.f32 to materialize other v2f32 and v4f32 splats.
6405 17 : if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
6406 17 : int ImmVal = ARM_AM::getFP32Imm(SplatBits);
6407 17 : if (ImmVal != -1) {
6408 5 : SDValue Val = DAG.getTargetConstant(ImmVal, dl, MVT::i32);
6409 5 : return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
6410 : }
6411 : }
6412 : }
6413 : }
6414 :
6415 : // Scan through the operands to see if only one value is used.
6416 : //
6417 : // As an optimisation, even if more than one value is used it may be more
6418 : // profitable to splat with one value then change some lanes.
6419 : //
6420 : // Heuristically we decide to do this if the vector has a "dominant" value,
6421 : // defined as splatted to more than half of the lanes.
6422 : unsigned NumElts = VT.getVectorNumElements();
6423 : bool isOnlyLowElement = true;
6424 : bool usesOnlyOneValue = true;
6425 : bool hasDominantValue = false;
6426 : bool isConstant = true;
6427 :
6428 : // Map of the number of times a particular SDValue appears in the
6429 : // element list.
6430 : DenseMap<SDValue, unsigned> ValueCounts;
6431 : SDValue Value;
6432 3982 : for (unsigned i = 0; i < NumElts; ++i) {
6433 3042 : SDValue V = Op.getOperand(i);
6434 6084 : if (V.isUndef())
6435 246 : continue;
6436 2796 : if (i > 0)
6437 : isOnlyLowElement = false;
6438 : if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
6439 : isConstant = false;
6440 :
6441 2796 : ValueCounts.insert(std::make_pair(V, 0));
6442 : unsigned &Count = ValueCounts[V];
6443 :
6444 : // Is this value dominant? (takes up more than half of the lanes)
6445 2796 : if (++Count > (NumElts / 2)) {
6446 : hasDominantValue = true;
6447 267 : Value = V;
6448 : }
6449 : }
6450 940 : if (ValueCounts.size() != 1)
6451 : usesOnlyOneValue = false;
6452 940 : if (!Value.getNode() && !ValueCounts.empty())
6453 816 : Value = ValueCounts.begin()->first;
6454 :
6455 940 : if (ValueCounts.empty())
6456 0 : return DAG.getUNDEF(VT);
6457 :
6458 : // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR.
6459 : // Keep going if we are hitting this case.
6460 940 : if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
6461 20 : return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
6462 :
6463 : unsigned EltSize = VT.getScalarSizeInBits();
6464 :
6465 : // Use VDUP for non-constant splats. For f32 constant splats, reduce to
6466 : // i32 and try again.
6467 920 : if (hasDominantValue && EltSize <= 32) {
6468 123 : if (!isConstant) {
6469 102 : SDValue N;
6470 :
6471 : // If we are VDUPing a value that comes directly from a vector, that will
6472 : // cause an unnecessary move to and from a GPR, where instead we could
6473 : // just use VDUPLANE. We can only do this if the lane being extracted
6474 : // is at a constant index, as the VDUP from lane instructions only have
6475 : // constant-index forms.
6476 : ConstantSDNode *constIndex;
6477 102 : if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6478 2 : (constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)))) {
6479 : // We need to create a new undef vector to use for the VDUPLANE if the
6480 : // size of the vector from which we get the value is different than the
6481 : // size of the vector that we need to create. We will insert the element
6482 : // such that the register coalescer will remove unnecessary copies.
6483 2 : if (VT != Value->getOperand(0).getValueType()) {
6484 0 : unsigned index = constIndex->getAPIntValue().getLimitedValue() %
6485 0 : VT.getVectorNumElements();
6486 0 : N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
6487 : DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
6488 : Value, DAG.getConstant(index, dl, MVT::i32)),
6489 0 : DAG.getConstant(index, dl, MVT::i32));
6490 : } else
6491 1 : N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
6492 1 : Value->getOperand(0), Value->getOperand(1));
6493 : } else
6494 101 : N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
6495 :
6496 102 : if (!usesOnlyOneValue) {
6497 : // The dominant value was splatted as 'N', but we now have to insert
6498 : // all differing elements.
6499 15 : for (unsigned I = 0; I < NumElts; ++I) {
6500 12 : if (Op.getOperand(I) == Value)
6501 9 : continue;
6502 : SmallVector<SDValue, 3> Ops;
6503 3 : Ops.push_back(N);
6504 3 : Ops.push_back(Op.getOperand(I));
6505 3 : Ops.push_back(DAG.getConstant(I, dl, MVT::i32));
6506 3 : N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops);
6507 : }
6508 : }
6509 102 : return N;
6510 : }
6511 21 : if (VT.getVectorElementType().isFloatingPoint()) {
6512 : SmallVector<SDValue, 8> Ops;
6513 38 : for (unsigned i = 0; i < NumElts; ++i)
6514 30 : Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32,
6515 60 : Op.getOperand(i)));
6516 8 : EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
6517 8 : SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
6518 8 : Val = LowerBUILD_VECTOR(Val, DAG, ST);
6519 8 : if (Val.getNode())
6520 0 : return DAG.getNode(ISD::BITCAST, dl, VT, Val);
6521 : }
6522 21 : if (usesOnlyOneValue) {
6523 19 : SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
6524 19 : if (isConstant && Val.getNode())
6525 0 : return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
6526 : }
6527 : }
6528 :
6529 : // If all elements are constants and the case above didn't get hit, fall back
6530 : // to the default expansion, which will generate a load from the constant
6531 : // pool.
6532 818 : if (isConstant)
6533 235 : return SDValue();
6534 :
6535 : // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
6536 583 : if (NumElts >= 4) {
6537 157 : SDValue shuffle = ReconstructShuffle(Op, DAG);
6538 : if (shuffle != SDValue())
6539 20 : return shuffle;
6540 : }
6541 :
6542 563 : if (VT.is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) {
6543 : // If we haven't found an efficient lowering, try splitting a 128-bit vector
6544 : // into two 64-bit vectors; we might discover a better way to lower it.
6545 74 : SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElts);
6546 74 : EVT ExtVT = VT.getVectorElementType();
6547 74 : EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElts / 2);
6548 : SDValue Lower =
6549 148 : DAG.getBuildVector(HVT, dl, makeArrayRef(&Ops[0], NumElts / 2));
6550 74 : if (Lower.getOpcode() == ISD::BUILD_VECTOR)
6551 74 : Lower = LowerBUILD_VECTOR(Lower, DAG, ST);
6552 : SDValue Upper = DAG.getBuildVector(
6553 74 : HVT, dl, makeArrayRef(&Ops[NumElts / 2], NumElts / 2));
6554 74 : if (Upper.getOpcode() == ISD::BUILD_VECTOR)
6555 74 : Upper = LowerBUILD_VECTOR(Upper, DAG, ST);
6556 74 : if (Lower && Upper)
6557 73 : return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lower, Upper);
6558 : }
6559 :
6560 : // Vectors with 32- or 64-bit elements can be built by directly assigning
6561 : // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
6562 : // will be legalized.
6563 490 : if (EltSize >= 32) {
6564 : // Do the expansion with floating-point types, since that is what the VFP
6565 : // registers are defined to use, and since i64 is not legal.
6566 483 : EVT EltVT = EVT::getFloatingPointVT(EltSize);
6567 483 : EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
6568 : SmallVector<SDValue, 8> Ops;
6569 1563 : for (unsigned i = 0; i < NumElts; ++i)
6570 1080 : Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
6571 483 : SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
6572 483 : return DAG.getNode(ISD::BITCAST, dl, VT, Val);
6573 : }
6574 :
6575 : // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
6576 : // know the default expansion would otherwise fall back on something even
6577 : // worse. For a vector with one or two non-undef values, that's
6578 : // scalar_to_vector for the elements followed by a shuffle (provided the
6579 : // shuffle is valid for the target) and materialization element by element
6580 : // on the stack followed by a load for everything else.
6581 7 : if (!isConstant && !usesOnlyOneValue) {
6582 4 : SDValue Vec = DAG.getUNDEF(VT);
6583 24 : for (unsigned i = 0 ; i < NumElts; ++i) {
6584 20 : SDValue V = Op.getOperand(i);
6585 20 : if (V.isUndef())
6586 8 : continue;
6587 12 : SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i32);
6588 12 : Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
6589 : }
6590 4 : return Vec;
6591 : }
6592 :
6593 3 : return SDValue();
6594 : }
6595 :
6596 : // Gather data to see if the operation can be modelled as a
6597 : // shuffle in combination with VEXTs.
6598 157 : SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
6599 : SelectionDAG &DAG) const {
6600 : assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
6601 : SDLoc dl(Op);
6602 157 : EVT VT = Op.getValueType();
6603 : unsigned NumElts = VT.getVectorNumElements();
6604 :
6605 : struct ShuffleSourceInfo {
6606 : SDValue Vec;
6607 : unsigned MinElt = std::numeric_limits<unsigned>::max();
6608 : unsigned MaxElt = 0;
6609 :
6610 : // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
6611 : // be compatible with the shuffle we intend to construct. As a result
6612 : // ShuffleVec will be some sliding window into the original Vec.
6613 : SDValue ShuffleVec;
6614 :
6615 : // Code should guarantee that element i in Vec starts at element "WindowBase
6616 : // + i * WindowScale in ShuffleVec".
6617 : int WindowBase = 0;
6618 : int WindowScale = 1;
6619 :
6620 56 : ShuffleSourceInfo(SDValue Vec) : Vec(Vec), ShuffleVec(Vec) {}
6621 :
6622 0 : bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
6623 : };
6624 :
6625 : // First gather all vectors used as an immediate source for this BUILD_VECTOR
6626 : // node.
6627 157 : SmallVector<ShuffleSourceInfo, 2> Sources;
6628 407 : for (unsigned i = 0; i < NumElts; ++i) {
6629 376 : SDValue V = Op.getOperand(i);
6630 376 : if (V.isUndef())
6631 98 : continue;
6632 278 : else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
6633 : // A shuffle can only come from building a vector from various
6634 : // elements of other vectors.
6635 125 : return SDValue();
6636 : } else if (!isa<ConstantSDNode>(V.getOperand(1))) {
6637 : // Furthermore, shuffles require a constant mask, whereas extractelts
6638 : // accept variable indices.
6639 1 : return SDValue();
6640 : }
6641 :
6642 : // Add this element source to the list if it's not already there.
6643 152 : SDValue SourceVec = V.getOperand(0);
6644 152 : auto Source = llvm::find(Sources, SourceVec);
6645 152 : if (Source == Sources.end())
6646 56 : Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
6647 :
6648 : // Update the minimum and maximum lane number seen.
6649 152 : unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
6650 152 : Source->MinElt = std::min(Source->MinElt, EltNo);
6651 251 : Source->MaxElt = std::max(Source->MaxElt, EltNo);
6652 : }
6653 :
6654 : // Currently only do something sane when at most two source vectors
6655 : // are involved.
6656 31 : if (Sources.size() > 2)
6657 0 : return SDValue();
6658 :
6659 : // Find out the smallest element size among result and two sources, and use
6660 : // it as element size to build the shuffle_vector.
6661 31 : EVT SmallestEltTy = VT.getVectorElementType();
6662 84 : for (auto &Source : Sources) {
6663 106 : EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
6664 53 : if (SrcEltTy.bitsLT(SmallestEltTy))
6665 4 : SmallestEltTy = SrcEltTy;
6666 : }
6667 : unsigned ResMultiplier =
6668 31 : VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits();
6669 31 : NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
6670 31 : EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
6671 :
6672 : // If the source vector is too wide or too narrow, we may nevertheless be able
6673 : // to construct a compatible shuffle either by concatenating it with UNDEF or
6674 : // extracting a suitable range of elements.
6675 79 : for (auto &Src : Sources) {
6676 51 : EVT SrcVT = Src.ShuffleVec.getValueType();
6677 :
6678 51 : if (SrcVT.getSizeInBits() == VT.getSizeInBits())
6679 38 : continue;
6680 :
6681 : // This stage of the search produces a source with the same element type as
6682 : // the original, but with a total width matching the BUILD_VECTOR output.
6683 21 : EVT EltVT = SrcVT.getVectorElementType();
6684 21 : unsigned NumSrcElts = VT.getSizeInBits() / EltVT.getSizeInBits();
6685 21 : EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
6686 :
6687 21 : if (SrcVT.getSizeInBits() < VT.getSizeInBits()) {
6688 8 : if (2 * SrcVT.getSizeInBits() != VT.getSizeInBits())
6689 0 : return SDValue();
6690 : // We can pad out the smaller vector for free, so if it's part of a
6691 : // shuffle...
6692 8 : Src.ShuffleVec =
6693 8 : DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
6694 16 : DAG.getUNDEF(Src.ShuffleVec.getValueType()));
6695 8 : continue;
6696 : }
6697 :
6698 13 : if (SrcVT.getSizeInBits() != 2 * VT.getSizeInBits())
6699 0 : return SDValue();
6700 :
6701 13 : if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
6702 : // Span too large for a VEXT to cope
6703 3 : return SDValue();
6704 : }
6705 :
6706 10 : if (Src.MinElt >= NumSrcElts) {
6707 : // The extraction can just take the second half
6708 0 : Src.ShuffleVec =
6709 0 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
6710 0 : DAG.getConstant(NumSrcElts, dl, MVT::i32));
6711 0 : Src.WindowBase = -NumSrcElts;
6712 10 : } else if (Src.MaxElt < NumSrcElts) {
6713 : // The extraction can just take the first half
6714 9 : Src.ShuffleVec =
6715 9 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
6716 9 : DAG.getConstant(0, dl, MVT::i32));
6717 : } else {
6718 : // An actual VEXT is needed
6719 : SDValue VEXTSrc1 =
6720 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
6721 1 : DAG.getConstant(0, dl, MVT::i32));
6722 : SDValue VEXTSrc2 =
6723 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
6724 1 : DAG.getConstant(NumSrcElts, dl, MVT::i32));
6725 :
6726 1 : Src.ShuffleVec = DAG.getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1,
6727 : VEXTSrc2,
6728 1 : DAG.getConstant(Src.MinElt, dl, MVT::i32));
6729 1 : Src.WindowBase = -Src.MinElt;
6730 : }
6731 : }
6732 :
6733 : // Another possible incompatibility occurs from the vector element types. We
6734 : // can fix this by bitcasting the source vectors to the same type we intend
6735 : // for the shuffle.
6736 76 : for (auto &Src : Sources) {
6737 96 : EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
6738 48 : if (SrcEltTy == SmallestEltTy)
6739 36 : continue;
6740 : assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
6741 12 : Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
6742 12 : Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
6743 12 : Src.WindowBase *= Src.WindowScale;
6744 : }
6745 :
6746 : // Final sanity check before we try to actually produce a shuffle.
6747 : LLVM_DEBUG(for (auto Src
6748 : : Sources)
6749 : assert(Src.ShuffleVec.getValueType() == ShuffleVT););
6750 :
6751 : // The stars all align, our next step is to produce the mask for the shuffle.
6752 56 : SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
6753 28 : int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
6754 396 : for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
6755 184 : SDValue Entry = Op.getOperand(i);
6756 184 : if (Entry.isUndef())
6757 52 : continue;
6758 :
6759 132 : auto Src = llvm::find(Sources, Entry.getOperand(0));
6760 132 : int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
6761 :
6762 : // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
6763 : // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
6764 : // segment.
6765 264 : EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
6766 396 : int BitsDefined = std::min(OrigEltTy.getSizeInBits(),
6767 132 : VT.getScalarSizeInBits());
6768 132 : int LanesDefined = BitsDefined / BitsPerShuffleLane;
6769 :
6770 : // This source is expected to fill ResMultiplier lanes of the final shuffle,
6771 : // starting at the appropriate offset.
6772 132 : int *LaneMask = &Mask[i * ResMultiplier];
6773 :
6774 132 : int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
6775 264 : ExtractBase += NumElts * (Src - Sources.begin());
6776 264 : for (int j = 0; j < LanesDefined; ++j)
6777 132 : LaneMask[j] = ExtractBase + j;
6778 : }
6779 :
6780 : // Final check before we try to produce nonsense...
6781 56 : if (!isShuffleMaskLegal(Mask, ShuffleVT))
6782 8 : return SDValue();
6783 :
6784 : // We can't handle more than two sources. This should have already
6785 : // been checked before this point.
6786 : assert(Sources.size() <= 2 && "Too many sources!");
6787 :
6788 20 : SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
6789 52 : for (unsigned i = 0; i < Sources.size(); ++i)
6790 32 : ShuffleOps[i] = Sources[i].ShuffleVec;
6791 :
6792 : SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
6793 20 : ShuffleOps[1], Mask);
6794 20 : return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
6795 : }
6796 :
6797 : /// isShuffleMaskLegal - Targets can use this to indicate that they only
6798 : /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
6799 : /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
6800 : /// are assumed to be legal.
6801 93 : bool ARMTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
6802 93 : if (VT.getVectorNumElements() == 4 &&
6803 25 : (VT.is128BitVector() || VT.is64BitVector())) {
6804 : unsigned PFIndexes[4];
6805 : for (unsigned i = 0; i != 4; ++i) {
6806 : if (M[i] < 0)
6807 : PFIndexes[i] = 8;
6808 : else
6809 : PFIndexes[i] = M[i];
6810 : }
6811 :
6812 : // Compute the index in the perfect shuffle table.
6813 : unsigned PFTableIndex =
6814 : PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
6815 : unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
6816 : unsigned Cost = (PFEntry >> 30);
6817 :
6818 : if (Cost <= 4)
6819 : return true;
6820 : }
6821 :
6822 : bool ReverseVEXT, isV_UNDEF;
6823 : unsigned Imm, WhichResult;
6824 :
6825 : unsigned EltSize = VT.getScalarSizeInBits();
6826 44 : return (EltSize >= 32 ||
6827 86 : ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
6828 84 : isVREVMask(M, VT, 64) ||
6829 83 : isVREVMask(M, VT, 32) ||
6830 80 : isVREVMask(M, VT, 16) ||
6831 39 : isVEXTMask(M, VT, ReverseVEXT, Imm) ||
6832 64 : isVTBLMask(M, VT) ||
6833 96 : isNEONTwoResultShuffleMask(M, VT, WhichResult, isV_UNDEF) ||
6834 28 : ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT)));
6835 : }
6836 :
6837 : /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
6838 : /// the specified operations to build the shuffle.
6839 100 : static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
6840 : SDValue RHS, SelectionDAG &DAG,
6841 : const SDLoc &dl) {
6842 100 : unsigned OpNum = (PFEntry >> 26) & 0x0F;
6843 100 : unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
6844 100 : unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
6845 :
6846 : enum {
6847 : OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
6848 : OP_VREV,
6849 : OP_VDUP0,
6850 : OP_VDUP1,
6851 : OP_VDUP2,
6852 : OP_VDUP3,
6853 : OP_VEXT1,
6854 : OP_VEXT2,
6855 : OP_VEXT3,
6856 : OP_VUZPL, // VUZP, left result
6857 : OP_VUZPR, // VUZP, right result
6858 : OP_VZIPL, // VZIP, left result
6859 : OP_VZIPR, // VZIP, right result
6860 : OP_VTRNL, // VTRN, left result
6861 : OP_VTRNR // VTRN, right result
6862 : };
6863 :
6864 100 : if (OpNum == OP_COPY) {
6865 58 : if (LHSID == (1*9+2)*9+3) return LHS;
6866 : assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
6867 13 : return RHS;
6868 : }
6869 :
6870 : SDValue OpLHS, OpRHS;
6871 42 : OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
6872 42 : OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
6873 42 : EVT VT = OpLHS.getValueType();
6874 :
6875 42 : switch (OpNum) {
6876 0 : default: llvm_unreachable("Unknown shuffle opcode!");
6877 : case OP_VREV:
6878 : // VREV divides the vector in half and swaps within the half.
6879 3 : if (VT.getVectorElementType() == MVT::i32 ||
6880 4 : VT.getVectorElementType() == MVT::f32)
6881 3 : return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
6882 : // vrev <4 x i16> -> VREV32
6883 0 : if (VT.getVectorElementType() == MVT::i16)
6884 0 : return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
6885 : // vrev <4 x i8> -> VREV16
6886 : assert(VT.getVectorElementType() == MVT::i8);
6887 0 : return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
6888 : case OP_VDUP0:
6889 : case OP_VDUP1:
6890 : case OP_VDUP2:
6891 : case OP_VDUP3:
6892 : return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
6893 7 : OpLHS, DAG.getConstant(OpNum-OP_VDUP0, dl, MVT::i32));
6894 : case OP_VEXT1:
6895 : case OP_VEXT2:
6896 : case OP_VEXT3:
6897 : return DAG.getNode(ARMISD::VEXT, dl, VT,
6898 : OpLHS, OpRHS,
6899 17 : DAG.getConstant(OpNum - OP_VEXT1 + 1, dl, MVT::i32));
6900 6 : case OP_VUZPL:
6901 : case OP_VUZPR:
6902 6 : return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
6903 6 : OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
6904 8 : case OP_VZIPL:
6905 : case OP_VZIPR:
6906 8 : return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
6907 8 : OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
6908 1 : case OP_VTRNL:
6909 : case OP_VTRNR:
6910 1 : return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
6911 1 : OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
6912 : }
6913 : }
6914 :
6915 0 : static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
6916 : ArrayRef<int> ShuffleMask,
6917 : SelectionDAG &DAG) {
6918 : // Check to see if we can use the VTBL instruction.
6919 0 : SDValue V1 = Op.getOperand(0);
6920 0 : SDValue V2 = Op.getOperand(1);
6921 0 : SDLoc DL(Op);
6922 :
6923 : SmallVector<SDValue, 8> VTBLMask;
6924 0 : for (ArrayRef<int>::iterator
6925 0 : I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I)
6926 0 : VTBLMask.push_back(DAG.getConstant(*I, DL, MVT::i32));
6927 :
6928 0 : if (V2.getNode()->isUndef())
6929 : return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
6930 0 : DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
6931 :
6932 : return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
6933 0 : DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
6934 : }
6935 :
6936 0 : static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op,
6937 : SelectionDAG &DAG) {
6938 0 : SDLoc DL(Op);
6939 0 : SDValue OpLHS = Op.getOperand(0);
6940 0 : EVT VT = OpLHS.getValueType();
6941 :
6942 : assert((VT == MVT::v8i16 || VT == MVT::v16i8) &&
6943 : "Expect an v8i16/v16i8 type");
6944 0 : OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS);
6945 : // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now,
6946 : // extract the first 8 bytes into the top double word and the last 8 bytes
6947 : // into the bottom double word. The v8i16 case is similar.
6948 0 : unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4;
6949 : return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS,
6950 0 : DAG.getConstant(ExtractNum, DL, MVT::i32));
6951 : }
6952 :
6953 0 : static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
6954 0 : SDValue V1 = Op.getOperand(0);
6955 0 : SDValue V2 = Op.getOperand(1);
6956 0 : SDLoc dl(Op);
6957 0 : EVT VT = Op.getValueType();
6958 : ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
6959 :
6960 : // Convert shuffles that are directly supported on NEON to target-specific
6961 : // DAG nodes, instead of keeping them as shuffles and matching them again
6962 : // during code selection. This is more efficient and avoids the possibility
6963 : // of inconsistencies between legalization and selection.
6964 : // FIXME: floating-point vectors should be canonicalized to integer vectors
6965 : // of the same time so that they get CSEd properly.
6966 0 : ArrayRef<int> ShuffleMask = SVN->getMask();
6967 :
6968 : unsigned EltSize = VT.getScalarSizeInBits();
6969 0 : if (EltSize <= 32) {
6970 0 : if (SVN->isSplat()) {
6971 0 : int Lane = SVN->getSplatIndex();
6972 : // If this is undef splat, generate it via "just" vdup, if possible.
6973 0 : if (Lane == -1) Lane = 0;
6974 :
6975 : // Test if V1 is a SCALAR_TO_VECTOR.
6976 0 : if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
6977 0 : return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
6978 : }
6979 : // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
6980 : // (and probably will turn into a SCALAR_TO_VECTOR once legalization
6981 : // reaches it).
6982 0 : if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
6983 : !isa<ConstantSDNode>(V1.getOperand(0))) {
6984 : bool IsScalarToVector = true;
6985 0 : for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
6986 0 : if (!V1.getOperand(i).isUndef()) {
6987 : IsScalarToVector = false;
6988 : break;
6989 : }
6990 0 : if (IsScalarToVector)
6991 0 : return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
6992 : }
6993 : return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
6994 0 : DAG.getConstant(Lane, dl, MVT::i32));
6995 : }
6996 :
6997 : bool ReverseVEXT;
6998 : unsigned Imm;
6999 0 : if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
7000 0 : if (ReverseVEXT)
7001 : std::swap(V1, V2);
7002 : return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
7003 0 : DAG.getConstant(Imm, dl, MVT::i32));
7004 : }
7005 :
7006 0 : if (isVREVMask(ShuffleMask, VT, 64))
7007 0 : return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
7008 0 : if (isVREVMask(ShuffleMask, VT, 32))
7009 0 : return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
7010 0 : if (isVREVMask(ShuffleMask, VT, 16))
7011 0 : return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
7012 :
7013 0 : if (V2->isUndef() && isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
7014 : return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
7015 0 : DAG.getConstant(Imm, dl, MVT::i32));
7016 : }
7017 :
7018 : // Check for Neon shuffles that modify both input vectors in place.
7019 : // If both results are used, i.e., if there are two shuffles with the same
7020 : // source operands and with masks corresponding to both results of one of
7021 : // these operations, DAG memoization will ensure that a single node is
7022 : // used for both shuffles.
7023 : unsigned WhichResult;
7024 : bool isV_UNDEF;
7025 0 : if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
7026 : ShuffleMask, VT, WhichResult, isV_UNDEF)) {
7027 0 : if (isV_UNDEF)
7028 0 : V2 = V1;
7029 0 : return DAG.getNode(ShuffleOpc, dl, DAG.getVTList(VT, VT), V1, V2)
7030 0 : .getValue(WhichResult);
7031 : }
7032 :
7033 : // Also check for these shuffles through CONCAT_VECTORS: we canonicalize
7034 : // shuffles that produce a result larger than their operands with:
7035 : // shuffle(concat(v1, undef), concat(v2, undef))
7036 : // ->
7037 : // shuffle(concat(v1, v2), undef)
7038 : // because we can access quad vectors (see PerformVECTOR_SHUFFLECombine).
7039 : //
7040 : // This is useful in the general case, but there are special cases where
7041 : // native shuffles produce larger results: the two-result ops.
7042 : //
7043 : // Look through the concat when lowering them:
7044 : // shuffle(concat(v1, v2), undef)
7045 : // ->
7046 : // concat(VZIP(v1, v2):0, :1)
7047 : //
7048 0 : if (V1->getOpcode() == ISD::CONCAT_VECTORS && V2->isUndef()) {
7049 0 : SDValue SubV1 = V1->getOperand(0);
7050 0 : SDValue SubV2 = V1->getOperand(1);
7051 0 : EVT SubVT = SubV1.getValueType();
7052 :
7053 : // We expect these to have been canonicalized to -1.
7054 : assert(llvm::all_of(ShuffleMask, [&](int i) {
7055 : return i < (int)VT.getVectorNumElements();
7056 : }) && "Unexpected shuffle index into UNDEF operand!");
7057 :
7058 0 : if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
7059 : ShuffleMask, SubVT, WhichResult, isV_UNDEF)) {
7060 0 : if (isV_UNDEF)
7061 0 : SubV2 = SubV1;
7062 : assert((WhichResult == 0) &&
7063 : "In-place shuffle of concat can only have one result!");
7064 : SDValue Res = DAG.getNode(ShuffleOpc, dl, DAG.getVTList(SubVT, SubVT),
7065 0 : SubV1, SubV2);
7066 : return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Res.getValue(0),
7067 0 : Res.getValue(1));
7068 : }
7069 : }
7070 : }
7071 :
7072 : // If the shuffle is not directly supported and it has 4 elements, use
7073 : // the PerfectShuffle-generated table to synthesize it from other shuffles.
7074 : unsigned NumElts = VT.getVectorNumElements();
7075 0 : if (NumElts == 4) {
7076 : unsigned PFIndexes[4];
7077 0 : for (unsigned i = 0; i != 4; ++i) {
7078 0 : if (ShuffleMask[i] < 0)
7079 0 : PFIndexes[i] = 8;
7080 : else
7081 0 : PFIndexes[i] = ShuffleMask[i];
7082 : }
7083 :
7084 : // Compute the index in the perfect shuffle table.
7085 0 : unsigned PFTableIndex =
7086 0 : PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
7087 0 : unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
7088 : unsigned Cost = (PFEntry >> 30);
7089 :
7090 : if (Cost <= 4)
7091 0 : return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
7092 : }
7093 :
7094 : // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
7095 0 : if (EltSize >= 32) {
7096 : // Do the expansion with floating-point types, since that is what the VFP
7097 : // registers are defined to use, and since i64 is not legal.
7098 0 : EVT EltVT = EVT::getFloatingPointVT(EltSize);
7099 0 : EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
7100 0 : V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
7101 0 : V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
7102 : SmallVector<SDValue, 8> Ops;
7103 0 : for (unsigned i = 0; i < NumElts; ++i) {
7104 0 : if (ShuffleMask[i] < 0)
7105 0 : Ops.push_back(DAG.getUNDEF(EltVT));
7106 : else
7107 0 : Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
7108 0 : ShuffleMask[i] < (int)NumElts ? V1 : V2,
7109 0 : DAG.getConstant(ShuffleMask[i] & (NumElts-1),
7110 0 : dl, MVT::i32)));
7111 : }
7112 0 : SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
7113 0 : return DAG.getNode(ISD::BITCAST, dl, VT, Val);
7114 : }
7115 :
7116 0 : if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT))
7117 0 : return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG);
7118 :
7119 0 : if (VT == MVT::v8i8)
7120 0 : if (SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG))
7121 0 : return NewOp;
7122 :
7123 0 : return SDValue();
7124 : }
7125 :
7126 0 : static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
7127 : // INSERT_VECTOR_ELT is legal only for immediate indexes.
7128 938 : SDValue Lane = Op.getOperand(2);
7129 : if (!isa<ConstantSDNode>(Lane))
7130 0 : return SDValue();
7131 :
7132 0 : return Op;
7133 : }
7134 :
7135 5529 : static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
7136 : // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
7137 5529 : SDValue Lane = Op.getOperand(1);
7138 : if (!isa<ConstantSDNode>(Lane))
7139 8 : return SDValue();
7140 :
7141 5521 : SDValue Vec = Op.getOperand(0);
7142 847 : if (Op.getValueType() == MVT::i32 && Vec.getScalarValueSizeInBits() < 32) {
7143 : SDLoc dl(Op);
7144 77 : return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
7145 : }
7146 :
7147 5444 : return Op;
7148 : }
7149 :
7150 0 : static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
7151 : // The only time a CONCAT_VECTORS operation can have legal types is when
7152 : // two 64-bit vectors are concatenated to a 128-bit vector.
7153 : assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
7154 : "unexpected CONCAT_VECTORS");
7155 0 : SDLoc dl(Op);
7156 0 : SDValue Val = DAG.getUNDEF(MVT::v2f64);
7157 0 : SDValue Op0 = Op.getOperand(0);
7158 0 : SDValue Op1 = Op.getOperand(1);
7159 0 : if (!Op0.isUndef())
7160 0 : Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
7161 : DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
7162 0 : DAG.getIntPtrConstant(0, dl));
7163 0 : if (!Op1.isUndef())
7164 0 : Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
7165 : DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
7166 0 : DAG.getIntPtrConstant(1, dl));
7167 0 : return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
7168 : }
7169 :
7170 : /// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
7171 : /// element has been zero/sign-extended, depending on the isSigned parameter,
7172 : /// from an integer type half its size.
7173 518 : static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
7174 : bool isSigned) {
7175 : // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
7176 1036 : EVT VT = N->getValueType(0);
7177 18 : if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
7178 5 : SDNode *BVN = N->getOperand(0).getNode();
7179 5 : if (BVN->getValueType(0) != MVT::v4i32 ||
7180 5 : BVN->getOpcode() != ISD::BUILD_VECTOR)
7181 : return false;
7182 5 : unsigned LoElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
7183 5 : unsigned HiElt = 1 - LoElt;
7184 5 : ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
7185 : ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
7186 5 : ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
7187 5 : ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
7188 5 : if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
7189 : return false;
7190 5 : if (isSigned) {
7191 12 : if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
7192 12 : Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
7193 : return true;
7194 : } else {
7195 3 : if (Hi0->isNullValue() && Hi1->isNullValue())
7196 : return true;
7197 : }
7198 0 : return false;
7199 : }
7200 :
7201 513 : if (N->getOpcode() != ISD::BUILD_VECTOR)
7202 : return false;
7203 :
7204 87 : for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
7205 156 : SDNode *Elt = N->getOperand(i).getNode();
7206 : if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
7207 : unsigned EltSize = VT.getScalarSizeInBits();
7208 66 : unsigned HalfSize = EltSize / 2;
7209 66 : if (isSigned) {
7210 34 : if (!isIntN(HalfSize, C->getSExtValue()))
7211 : return false;
7212 : } else {
7213 32 : if (!isUIntN(HalfSize, C->getZExtValue()))
7214 : return false;
7215 : }
7216 : continue;
7217 : }
7218 : return false;
7219 : }
7220 :
7221 : return true;
7222 : }
7223 :
7224 : /// isSignExtended - Check if a node is a vector value that is sign-extended
7225 : /// or a constant BUILD_VECTOR with sign-extended elements.
7226 354 : static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
7227 354 : if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
7228 : return true;
7229 274 : if (isExtendedBUILD_VECTOR(N, DAG, true))
7230 9 : return true;
7231 : return false;
7232 : }
7233 :
7234 : /// isZeroExtended - Check if a node is a vector value that is zero-extended
7235 : /// or a constant BUILD_VECTOR with zero-extended elements.
7236 292 : static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
7237 292 : if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N))
7238 : return true;
7239 244 : if (isExtendedBUILD_VECTOR(N, DAG, false))
7240 5 : return true;
7241 : return false;
7242 : }
7243 :
7244 59 : static EVT getExtensionTo64Bits(const EVT &OrigVT) {
7245 59 : if (OrigVT.getSizeInBits() >= 64)
7246 46 : return OrigVT;
7247 :
7248 : assert(OrigVT.isSimple() && "Expecting a simple value type");
7249 :
7250 : MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
7251 13 : switch (OrigSimpleTy) {
7252 0 : default: llvm_unreachable("Unexpected Vector Type");
7253 : case MVT::v2i8:
7254 : case MVT::v2i16:
7255 9 : return MVT::v2i32;
7256 : case MVT::v4i8:
7257 4 : return MVT::v4i16;
7258 : }
7259 : }
7260 :
7261 : /// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
7262 : /// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
7263 : /// We insert the required extension here to get the vector to fill a D register.
7264 0 : static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG,
7265 : const EVT &OrigTy,
7266 : const EVT &ExtTy,
7267 : unsigned ExtOpcode) {
7268 : // The vector originally had a size of OrigTy. It was then extended to ExtTy.
7269 : // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
7270 : // 64-bits we need to insert a new extension so that it will be 64-bits.
7271 : assert(ExtTy.is128BitVector() && "Unexpected extension size");
7272 0 : if (OrigTy.getSizeInBits() >= 64)
7273 0 : return N;
7274 :
7275 : // Must extend size to at least 64 bits to be used as an operand for VMULL.
7276 0 : EVT NewVT = getExtensionTo64Bits(OrigTy);
7277 :
7278 0 : return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
7279 : }
7280 :
7281 : /// SkipLoadExtensionForVMULL - return a load of the original vector size that
7282 : /// does not do any sign/zero extension. If the original vector is less
7283 : /// than 64 bits, an appropriate extension will be added after the load to
7284 : /// reach a total size of 64 bits. We have to add the extension separately
7285 : /// because ARM does not have a sign/zero extending load for vectors.
7286 59 : static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) {
7287 59 : EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT());
7288 :
7289 : // The load already has the right type.
7290 0 : if (ExtendedTy == LD->getMemoryVT())
7291 46 : return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(),
7292 46 : LD->getBasePtr(), LD->getPointerInfo(),
7293 138 : LD->getAlignment(), LD->getMemOperand()->getFlags());
7294 :
7295 : // We need to create a zextload/sextload. We cannot just create a load
7296 : // followed by a zext/zext node because LowerMUL is also run during normal
7297 : // operation legalization where we can't create illegal types.
7298 13 : return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
7299 13 : LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
7300 : LD->getMemoryVT(), LD->getAlignment(),
7301 39 : LD->getMemOperand()->getFlags());
7302 : }
7303 :
7304 : /// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
7305 : /// extending load, or BUILD_VECTOR with extended elements, return the
7306 : /// unextended value. The unextended vector should be 64 bits so that it can
7307 : /// be used as an operand to a VMULL instruction. If the original vector size
7308 : /// before extension is less than 64 bits we add a an extension to resize
7309 : /// the vector to 64 bits.
7310 106 : static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
7311 212 : if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
7312 : return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,
7313 76 : N->getOperand(0)->getValueType(0),
7314 : N->getValueType(0),
7315 38 : N->getOpcode());
7316 :
7317 : if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
7318 : assert((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) &&
7319 : "Expected extending load");
7320 :
7321 59 : SDValue newLoad = SkipLoadExtensionForVMULL(LD, DAG);
7322 59 : DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), newLoad.getValue(1));
7323 : unsigned Opcode = ISD::isSEXTLoad(LD) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
7324 : SDValue extLoad =
7325 118 : DAG.getNode(Opcode, SDLoc(newLoad), LD->getValueType(0), newLoad);
7326 59 : DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 0), extLoad);
7327 :
7328 59 : return newLoad;
7329 : }
7330 :
7331 : // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will
7332 : // have been legalized as a BITCAST from v4i32.
7333 9 : if (N->getOpcode() == ISD::BITCAST) {
7334 4 : SDNode *BVN = N->getOperand(0).getNode();
7335 : assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&
7336 : BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
7337 4 : unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
7338 : return DAG.getBuildVector(
7339 4 : MVT::v2i32, SDLoc(N),
7340 12 : {BVN->getOperand(LowElt), BVN->getOperand(LowElt + 2)});
7341 : }
7342 : // Construct a new BUILD_VECTOR with elements truncated to half the size.
7343 : assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
7344 10 : EVT VT = N->getValueType(0);
7345 5 : unsigned EltSize = VT.getScalarSizeInBits() / 2;
7346 : unsigned NumElts = VT.getVectorNumElements();
7347 5 : MVT TruncVT = MVT::getIntegerVT(EltSize);
7348 : SmallVector<SDValue, 8> Ops;
7349 : SDLoc dl(N);
7350 33 : for (unsigned i = 0; i != NumElts; ++i) {
7351 28 : ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
7352 28 : const APInt &CInt = C->getAPIntValue();
7353 : // Element types smaller than 32 bits are not legal, so use i32 elements.
7354 : // The values are implicitly truncated so sext vs. zext doesn't matter.
7355 28 : Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
7356 : }
7357 5 : return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
7358 : }
7359 :
7360 16 : static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
7361 16 : unsigned Opcode = N->getOpcode();
7362 16 : if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
7363 0 : SDNode *N0 = N->getOperand(0).getNode();
7364 0 : SDNode *N1 = N->getOperand(1).getNode();
7365 0 : return N0->hasOneUse() && N1->hasOneUse() &&
7366 0 : isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
7367 : }
7368 : return false;
7369 : }
7370 :
7371 6 : static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
7372 6 : unsigned Opcode = N->getOpcode();
7373 6 : if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
7374 0 : SDNode *N0 = N->getOperand(0).getNode();
7375 0 : SDNode *N1 = N->getOperand(1).getNode();
7376 0 : return N0->hasOneUse() && N1->hasOneUse() &&
7377 0 : isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
7378 : }
7379 : return false;
7380 : }
7381 :
7382 177 : static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
7383 : // Multiplications are only custom-lowered for 128-bit vectors so that
7384 : // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
7385 177 : EVT VT = Op.getValueType();
7386 : assert(VT.is128BitVector() && VT.isInteger() &&
7387 : "unexpected type for custom-lowering ISD::MUL");
7388 177 : SDNode *N0 = Op.getOperand(0).getNode();
7389 177 : SDNode *N1 = Op.getOperand(1).getNode();
7390 : unsigned NewOpc = 0;
7391 : bool isMLA = false;
7392 177 : bool isN0SExt = isSignExtended(N0, DAG);
7393 177 : bool isN1SExt = isSignExtended(N1, DAG);
7394 177 : if (isN0SExt && isN1SExt)
7395 : NewOpc = ARMISD::VMULLs;
7396 : else {
7397 146 : bool isN0ZExt = isZeroExtended(N0, DAG);
7398 146 : bool isN1ZExt = isZeroExtended(N1, DAG);
7399 146 : if (isN0ZExt && isN1ZExt)
7400 : NewOpc = ARMISD::VMULLu;
7401 124 : else if (isN1SExt || isN1ZExt) {
7402 : // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
7403 : // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
7404 18 : if (isN1SExt && isAddSubSExt(N0, DAG)) {
7405 : NewOpc = ARMISD::VMULLs;
7406 : isMLA = true;
7407 18 : } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
7408 : NewOpc = ARMISD::VMULLu;
7409 : isMLA = true;
7410 18 : } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
7411 : std::swap(N0, N1);
7412 : NewOpc = ARMISD::VMULLu;
7413 : isMLA = true;
7414 : }
7415 : }
7416 :
7417 146 : if (!NewOpc) {
7418 : if (VT == MVT::v2i64)
7419 : // Fall through to expand this. It is not legal.
7420 0 : return SDValue();
7421 : else
7422 : // Other vector multiplications are legal.
7423 124 : return Op;
7424 : }
7425 : }
7426 :
7427 : // Legalize to a VMULL instruction.
7428 : SDLoc DL(Op);
7429 53 : SDValue Op0;
7430 53 : SDValue Op1 = SkipExtensionForVMULL(N1, DAG);
7431 53 : if (!isMLA) {
7432 53 : Op0 = SkipExtensionForVMULL(N0, DAG);
7433 : assert(Op0.getValueType().is64BitVector() &&
7434 : Op1.getValueType().is64BitVector() &&
7435 : "unexpected types for extended operands to VMULL");
7436 53 : return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
7437 : }
7438 :
7439 : // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
7440 : // isel lowering to take advantage of no-stall back to back vmul + vmla.
7441 : // vmull q0, d4, d6
7442 : // vmlal q0, d5, d6
7443 : // is faster than
7444 : // vaddl q0, d4, d5
7445 : // vmovl q1, d6
7446 : // vmul q0, q0, q1
7447 0 : SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG);
7448 0 : SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG);
7449 0 : EVT Op1VT = Op1.getValueType();
7450 : return DAG.getNode(N0->getOpcode(), DL, VT,
7451 : DAG.getNode(NewOpc, DL, VT,
7452 : DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
7453 : DAG.getNode(NewOpc, DL, VT,
7454 0 : DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
7455 : }
7456 :
7457 4 : static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, const SDLoc &dl,
7458 : SelectionDAG &DAG) {
7459 : // TODO: Should this propagate fast-math-flags?
7460 :
7461 : // Convert to float
7462 : // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
7463 : // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
7464 4 : X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
7465 4 : Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
7466 4 : X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
7467 4 : Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
7468 : // Get reciprocal estimate.
7469 : // float4 recip = vrecpeq_f32(yf);
7470 4 : Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
7471 : DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
7472 4 : Y);
7473 : // Because char has a smaller range than uchar, we can actually get away
7474 : // without any newton steps. This requires that we use a weird bias
7475 : // of 0xb000, however (again, this has been exhaustively tested).
7476 : // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
7477 4 : X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
7478 4 : X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
7479 4 : Y = DAG.getConstant(0xb000, dl, MVT::v4i32);
7480 4 : X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
7481 4 : X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
7482 : // Convert back to short.
7483 4 : X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
7484 4 : X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
7485 4 : return X;
7486 : }
7487 :
7488 8 : static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl,
7489 : SelectionDAG &DAG) {
7490 : // TODO: Should this propagate fast-math-flags?
7491 :
7492 : SDValue N2;
7493 : // Convert to float.
7494 : // float4 yf = vcvt_f32_s32(vmovl_s16(y));
7495 : // float4 xf = vcvt_f32_s32(vmovl_s16(x));
7496 8 : N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
7497 8 : N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
7498 8 : N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
7499 8 : N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
7500 :
7501 : // Use reciprocal estimate and one refinement step.
7502 : // float4 recip = vrecpeq_f32(yf);
7503 : // recip *= vrecpsq_f32(yf, recip);
7504 8 : N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
7505 : DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
7506 8 : N1);
7507 8 : N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
7508 : DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
7509 8 : N1, N2);
7510 8 : N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
7511 : // Because short has a smaller range than ushort, we can actually get away
7512 : // with only a single newton step. This requires that we use a weird bias
7513 : // of 89, however (again, this has been exhaustively tested).
7514 : // float4 result = as_float4(as_int4(xf*recip) + 0x89);
7515 8 : N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
7516 8 : N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
7517 8 : N1 = DAG.getConstant(0x89, dl, MVT::v4i32);
7518 8 : N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
7519 8 : N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
7520 : // Convert back to integer and return.
7521 : // return vmovn_s32(vcvt_s32_f32(result));
7522 8 : N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
7523 8 : N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
7524 8 : return N0;
7525 : }
7526 :
7527 0 : static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
7528 : EVT VT = Op.getValueType();
7529 : assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
7530 : "unexpected type for custom-lowering ISD::SDIV");
7531 :
7532 0 : SDLoc dl(Op);
7533 0 : SDValue N0 = Op.getOperand(0);
7534 0 : SDValue N1 = Op.getOperand(1);
7535 0 : SDValue N2, N3;
7536 :
7537 0 : if (VT == MVT::v8i8) {
7538 0 : N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
7539 0 : N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
7540 :
7541 0 : N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
7542 0 : DAG.getIntPtrConstant(4, dl));
7543 0 : N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
7544 0 : DAG.getIntPtrConstant(4, dl));
7545 0 : N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
7546 0 : DAG.getIntPtrConstant(0, dl));
7547 0 : N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
7548 0 : DAG.getIntPtrConstant(0, dl));
7549 :
7550 0 : N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
7551 0 : N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
7552 :
7553 0 : N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
7554 0 : N0 = LowerCONCAT_VECTORS(N0, DAG);
7555 :
7556 0 : N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
7557 0 : return N0;
7558 : }
7559 0 : return LowerSDIV_v4i16(N0, N1, dl, DAG);
7560 : }
7561 :
7562 0 : static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
7563 : // TODO: Should this propagate fast-math-flags?
7564 : EVT VT = Op.getValueType();
7565 : assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
7566 : "unexpected type for custom-lowering ISD::UDIV");
7567 :
7568 0 : SDLoc dl(Op);
7569 0 : SDValue N0 = Op.getOperand(0);
7570 0 : SDValue N1 = Op.getOperand(1);
7571 0 : SDValue N2, N3;
7572 :
7573 0 : if (VT == MVT::v8i8) {
7574 0 : N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
7575 0 : N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
7576 :
7577 0 : N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
7578 0 : DAG.getIntPtrConstant(4, dl));
7579 0 : N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
7580 0 : DAG.getIntPtrConstant(4, dl));
7581 0 : N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
7582 0 : DAG.getIntPtrConstant(0, dl));
7583 0 : N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
7584 0 : DAG.getIntPtrConstant(0, dl));
7585 :
7586 0 : N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
7587 0 : N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
7588 :
7589 0 : N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
7590 0 : N0 = LowerCONCAT_VECTORS(N0, DAG);
7591 :
7592 0 : N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,
7593 : DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, dl,
7594 : MVT::i32),
7595 0 : N0);
7596 0 : return N0;
7597 : }
7598 :
7599 : // v4i16 sdiv ... Convert to float.
7600 : // float4 yf = vcvt_f32_s32(vmovl_u16(y));
7601 : // float4 xf = vcvt_f32_s32(vmovl_u16(x));
7602 0 : N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
7603 0 : N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
7604 0 : N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
7605 0 : SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
7606 :
7607 : // Use reciprocal estimate and two refinement steps.
7608 : // float4 recip = vrecpeq_f32(yf);
7609 : // recip *= vrecpsq_f32(yf, recip);
7610 : // recip *= vrecpsq_f32(yf, recip);
7611 0 : N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
7612 : DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
7613 0 : BN1);
7614 0 : N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
7615 : DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
7616 0 : BN1, N2);
7617 0 : N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
7618 0 : N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
7619 : DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
7620 0 : BN1, N2);
7621 0 : N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
7622 : // Simply multiplying by the reciprocal estimate can leave us a few ulps
7623 : // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
7624 : // and that it will never cause us to return an answer too large).
7625 : // float4 result = as_float4(as_int4(xf*recip) + 2);
7626 0 : N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
7627 0 : N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
7628 0 : N1 = DAG.getConstant(2, dl, MVT::v4i32);
7629 0 : N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
7630 0 : N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
7631 : // Convert back to integer and return.
7632 : // return vmovn_u32(vcvt_s32_f32(result));
7633 0 : N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
7634 0 : N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
7635 0 : return N0;
7636 : }
7637 :
7638 0 : static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) {
7639 : SDNode *N = Op.getNode();
7640 0 : EVT VT = N->getValueType(0);
7641 0 : SDVTList VTs = DAG.getVTList(VT, MVT::i32);
7642 :
7643 0 : SDValue Carry = Op.getOperand(2);
7644 :
7645 0 : SDLoc DL(Op);
7646 :
7647 0 : SDValue Result;
7648 0 : if (Op.getOpcode() == ISD::ADDCARRY) {
7649 : // This converts the boolean value carry into the carry flag.
7650 0 : Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
7651 :
7652 : // Do the addition proper using the carry flag we wanted.
7653 0 : Result = DAG.getNode(ARMISD::ADDE, DL, VTs, Op.getOperand(0),
7654 0 : Op.getOperand(1), Carry);
7655 :
7656 : // Now convert the carry flag into a boolean value.
7657 0 : Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
7658 : } else {
7659 : // ARMISD::SUBE expects a carry not a borrow like ISD::SUBCARRY so we
7660 : // have to invert the carry first.
7661 0 : Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
7662 0 : DAG.getConstant(1, DL, MVT::i32), Carry);
7663 : // This converts the boolean value carry into the carry flag.
7664 0 : Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
7665 :
7666 : // Do the subtraction proper using the carry flag we wanted.
7667 0 : Result = DAG.getNode(ARMISD::SUBE, DL, VTs, Op.getOperand(0),
7668 0 : Op.getOperand(1), Carry);
7669 :
7670 : // Now convert the carry flag into a boolean value.
7671 0 : Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
7672 : // But the carry returned by ARMISD::SUBE is not a borrow as expected
7673 : // by ISD::SUBCARRY, so compute 1 - C.
7674 0 : Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
7675 0 : DAG.getConstant(1, DL, MVT::i32), Carry);
7676 : }
7677 :
7678 : // Return both values.
7679 0 : return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Carry);
7680 : }
7681 :
7682 5 : SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
7683 : assert(Subtarget->isTargetDarwin());
7684 :
7685 : // For iOS, we want to call an alternative entry point: __sincos_stret,
7686 : // return values are passed via sret.
7687 : SDLoc dl(Op);
7688 5 : SDValue Arg = Op.getOperand(0);
7689 5 : EVT ArgVT = Arg.getValueType();
7690 5 : Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
7691 5 : auto PtrVT = getPointerTy(DAG.getDataLayout());
7692 :
7693 5 : MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
7694 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7695 :
7696 : // Pair of floats / doubles used to pass the result.
7697 5 : Type *RetTy = StructType::get(ArgTy, ArgTy);
7698 5 : auto &DL = DAG.getDataLayout();
7699 :
7700 : ArgListTy Args;
7701 5 : bool ShouldUseSRet = Subtarget->isAPCS_ABI();
7702 : SDValue SRet;
7703 5 : if (ShouldUseSRet) {
7704 : // Create stack object for sret.
7705 2 : const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
7706 2 : const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy);
7707 2 : int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
7708 2 : SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy(DL));
7709 :
7710 : ArgListEntry Entry;
7711 2 : Entry.Node = SRet;
7712 2 : Entry.Ty = RetTy->getPointerTo();
7713 2 : Entry.IsSExt = false;
7714 2 : Entry.IsZExt = false;
7715 2 : Entry.IsSRet = true;
7716 2 : Args.push_back(Entry);
7717 2 : RetTy = Type::getVoidTy(*DAG.getContext());
7718 : }
7719 :
7720 : ArgListEntry Entry;
7721 5 : Entry.Node = Arg;
7722 5 : Entry.Ty = ArgTy;
7723 : Entry.IsSExt = false;
7724 : Entry.IsZExt = false;
7725 5 : Args.push_back(Entry);
7726 :
7727 : RTLIB::Libcall LC =
7728 : (ArgVT == MVT::f64) ? RTLIB::SINCOS_STRET_F64 : RTLIB::SINCOS_STRET_F32;
7729 : const char *LibcallName = getLibcallName(LC);
7730 : CallingConv::ID CC = getLibcallCallingConv(LC);
7731 5 : SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL));
7732 :
7733 10 : TargetLowering::CallLoweringInfo CLI(DAG);
7734 : CLI.setDebugLoc(dl)
7735 5 : .setChain(DAG.getEntryNode())
7736 5 : .setCallee(CC, RetTy, Callee, std::move(Args))
7737 : .setDiscardResult(ShouldUseSRet);
7738 5 : std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
7739 :
7740 5 : if (!ShouldUseSRet)
7741 3 : return CallResult.first;
7742 :
7743 : SDValue LoadSin =
7744 2 : DAG.getLoad(ArgVT, dl, CallResult.second, SRet, MachinePointerInfo());
7745 :
7746 : // Address of cos field.
7747 : SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet,
7748 2 : DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl));
7749 : SDValue LoadCos =
7750 2 : DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, MachinePointerInfo());
7751 :
7752 2 : SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
7753 : return DAG.getNode(ISD::MERGE_VALUES, dl, Tys,
7754 2 : LoadSin.getValue(0), LoadCos.getValue(0));
7755 : }
7756 :
7757 27 : SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG,
7758 : bool Signed,
7759 : SDValue &Chain) const {
7760 27 : EVT VT = Op.getValueType();
7761 : assert((VT == MVT::i32 || VT == MVT::i64) &&
7762 : "unexpected type for custom lowering DIV");
7763 : SDLoc dl(Op);
7764 :
7765 27 : const auto &DL = DAG.getDataLayout();
7766 : const auto &TLI = DAG.getTargetLoweringInfo();
7767 :
7768 : const char *Name = nullptr;
7769 27 : if (Signed)
7770 : Name = (VT == MVT::i32) ? "__rt_sdiv" : "__rt_sdiv64";
7771 : else
7772 : Name = (VT == MVT::i32) ? "__rt_udiv" : "__rt_udiv64";
7773 :
7774 27 : SDValue ES = DAG.getExternalSymbol(Name, TLI.getPointerTy(DL));
7775 :
7776 : ARMTargetLowering::ArgListTy Args;
7777 :
7778 81 : for (auto AI : {1, 0}) {
7779 : ArgListEntry Arg;
7780 54 : Arg.Node = Op.getOperand(AI);
7781 108 : Arg.Ty = Arg.Node.getValueType().getTypeForEVT(*DAG.getContext());
7782 54 : Args.push_back(Arg);
7783 : }
7784 :
7785 27 : CallLoweringInfo CLI(DAG);
7786 : CLI.setDebugLoc(dl)
7787 27 : .setChain(Chain)
7788 27 : .setCallee(CallingConv::ARM_AAPCS_VFP, VT.getTypeForEVT(*DAG.getContext()),
7789 27 : ES, std::move(Args));
7790 :
7791 27 : return LowerCallTo(CLI).first;
7792 : }
7793 :
7794 23 : SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG,
7795 : bool Signed) const {
7796 : assert(Op.getValueType() == MVT::i32 &&
7797 : "unexpected type for custom lowering DIV");
7798 : SDLoc dl(Op);
7799 :
7800 : SDValue DBZCHK = DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other,
7801 23 : DAG.getEntryNode(), Op.getOperand(1));
7802 :
7803 23 : return LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
7804 : }
7805 :
7806 45 : static SDValue WinDBZCheckDenominator(SelectionDAG &DAG, SDNode *N, SDValue InChain) {
7807 : SDLoc DL(N);
7808 45 : SDValue Op = N->getOperand(1);
7809 45 : if (N->getValueType(0) == MVT::i32)
7810 39 : return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain, Op);
7811 : SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Op,
7812 6 : DAG.getConstant(0, DL, MVT::i32));
7813 : SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Op,
7814 6 : DAG.getConstant(1, DL, MVT::i32));
7815 : return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain,
7816 6 : DAG.getNode(ISD::OR, DL, MVT::i32, Lo, Hi));
7817 : }
7818 :
7819 4 : void ARMTargetLowering::ExpandDIV_Windows(
7820 : SDValue Op, SelectionDAG &DAG, bool Signed,
7821 : SmallVectorImpl<SDValue> &Results) const {
7822 4 : const auto &DL = DAG.getDataLayout();
7823 : const auto &TLI = DAG.getTargetLoweringInfo();
7824 :
7825 : assert(Op.getValueType() == MVT::i64 &&
7826 : "unexpected type for custom lowering DIV");
7827 : SDLoc dl(Op);
7828 :
7829 4 : SDValue DBZCHK = WinDBZCheckDenominator(DAG, Op.getNode(), DAG.getEntryNode());
7830 :
7831 4 : SDValue Result = LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
7832 :
7833 4 : SDValue Lower = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Result);
7834 : SDValue Upper = DAG.getNode(ISD::SRL, dl, MVT::i64, Result,
7835 4 : DAG.getConstant(32, dl, TLI.getPointerTy(DL)));
7836 4 : Upper = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Upper);
7837 :
7838 4 : Results.push_back(Lower);
7839 4 : Results.push_back(Upper);
7840 4 : }
7841 :
7842 0 : static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
7843 24 : if (isStrongerThanMonotonic(cast<AtomicSDNode>(Op)->getOrdering()))
7844 : // Acquire/Release load/store is not legal for targets without a dmb or
7845 : // equivalent available.
7846 0 : return SDValue();
7847 :
7848 : // Monotonic load/store is legal for all targets.
7849 0 : return Op;
7850 : }
7851 :
7852 0 : static void ReplaceREADCYCLECOUNTER(SDNode *N,
7853 : SmallVectorImpl<SDValue> &Results,
7854 : SelectionDAG &DAG,
7855 : const ARMSubtarget *Subtarget) {
7856 : SDLoc DL(N);
7857 : // Under Power Management extensions, the cycle-count is:
7858 : // mrc p15, #0, <Rt>, c9, c13, #0
7859 0 : SDValue Ops[] = { N->getOperand(0), // Chain
7860 0 : DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
7861 0 : DAG.getConstant(15, DL, MVT::i32),
7862 0 : DAG.getConstant(0, DL, MVT::i32),
7863 0 : DAG.getConstant(9, DL, MVT::i32),
7864 0 : DAG.getConstant(13, DL, MVT::i32),
7865 0 : DAG.getConstant(0, DL, MVT::i32)
7866 0 : };
7867 :
7868 : SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
7869 0 : DAG.getVTList(MVT::i32, MVT::Other), Ops);
7870 0 : Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Cycles32,
7871 0 : DAG.getConstant(0, DL, MVT::i32)));
7872 0 : Results.push_back(Cycles32.getValue(1));
7873 0 : }
7874 :
7875 14 : static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) {
7876 14 : SDLoc dl(V.getNode());
7877 14 : SDValue VLo = DAG.getAnyExtOrTrunc(V, dl, MVT::i32);
7878 : SDValue VHi = DAG.getAnyExtOrTrunc(
7879 : DAG.getNode(ISD::SRL, dl, MVT::i64, V, DAG.getConstant(32, dl, MVT::i32)),
7880 14 : dl, MVT::i32);
7881 14 : bool isBigEndian = DAG.getDataLayout().isBigEndian();
7882 14 : if (isBigEndian)
7883 : std::swap (VLo, VHi);
7884 : SDValue RegClass =
7885 14 : DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
7886 14 : SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32);
7887 14 : SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32);
7888 14 : const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
7889 : return SDValue(
7890 14 : DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
7891 : }
7892 :
7893 7 : static void ReplaceCMP_SWAP_64Results(SDNode *N,
7894 : SmallVectorImpl<SDValue> & Results,
7895 : SelectionDAG &DAG) {
7896 : assert(N->getValueType(0) == MVT::i64 &&
7897 : "AtomicCmpSwap on types less than 64 should be legal");
7898 7 : SDValue Ops[] = {N->getOperand(1),
7899 7 : createGPRPairNode(DAG, N->getOperand(2)),
7900 14 : createGPRPairNode(DAG, N->getOperand(3)),
7901 14 : N->getOperand(0)};
7902 14 : SDNode *CmpSwap = DAG.getMachineNode(
7903 7 : ARM::CMP_SWAP_64, SDLoc(N),
7904 : DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other), Ops);
7905 :
7906 7 : MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
7907 14 : DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
7908 :
7909 7 : bool isBigEndian = DAG.getDataLayout().isBigEndian();
7910 :
7911 14 : Results.push_back(
7912 7 : DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_1 : ARM::gsub_0,
7913 20 : SDLoc(N), MVT::i32, SDValue(CmpSwap, 0)));
7914 7 : Results.push_back(
7915 7 : DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_0 : ARM::gsub_1,
7916 20 : SDLoc(N), MVT::i32, SDValue(CmpSwap, 0)));
7917 7 : Results.push_back(SDValue(CmpSwap, 2));
7918 7 : }
7919 :
7920 0 : static SDValue LowerFPOWI(SDValue Op, const ARMSubtarget &Subtarget,
7921 : SelectionDAG &DAG) {
7922 0 : const auto &TLI = DAG.getTargetLoweringInfo();
7923 :
7924 : assert(Subtarget.getTargetTriple().isOSMSVCRT() &&
7925 : "Custom lowering is MSVCRT specific!");
7926 :
7927 0 : SDLoc dl(Op);
7928 0 : SDValue Val = Op.getOperand(0);
7929 : MVT Ty = Val->getSimpleValueType(0);
7930 0 : SDValue Exponent = DAG.getNode(ISD::SINT_TO_FP, dl, Ty, Op.getOperand(1));
7931 : SDValue Callee = DAG.getExternalSymbol(Ty == MVT::f32 ? "powf" : "pow",
7932 0 : TLI.getPointerTy(DAG.getDataLayout()));
7933 :
7934 : TargetLowering::ArgListTy Args;
7935 : TargetLowering::ArgListEntry Entry;
7936 :
7937 0 : Entry.Node = Val;
7938 0 : Entry.Ty = Val.getValueType().getTypeForEVT(*DAG.getContext());
7939 0 : Entry.IsZExt = true;
7940 0 : Args.push_back(Entry);
7941 :
7942 0 : Entry.Node = Exponent;
7943 0 : Entry.Ty = Exponent.getValueType().getTypeForEVT(*DAG.getContext());
7944 0 : Entry.IsZExt = true;
7945 0 : Args.push_back(Entry);
7946 :
7947 0 : Type *LCRTy = Val.getValueType().getTypeForEVT(*DAG.getContext());
7948 :
7949 : // In the in-chain to the call is the entry node If we are emitting a
7950 : // tailcall, the chain will be mutated if the node has a non-entry input
7951 : // chain.
7952 : SDValue InChain = DAG.getEntryNode();
7953 0 : SDValue TCChain = InChain;
7954 :
7955 0 : const Function &F = DAG.getMachineFunction().getFunction();
7956 0 : bool IsTC = TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
7957 : F.getReturnType() == LCRTy;
7958 : if (IsTC)
7959 0 : InChain = TCChain;
7960 :
7961 0 : TargetLowering::CallLoweringInfo CLI(DAG);
7962 : CLI.setDebugLoc(dl)
7963 0 : .setChain(InChain)
7964 0 : .setCallee(CallingConv::ARM_AAPCS_VFP, LCRTy, Callee, std::move(Args))
7965 : .setTailCall(IsTC);
7966 0 : std::pair<SDValue, SDValue> CI = TLI.LowerCallTo(CLI);
7967 :
7968 : // Return the chain (the DAG root) if it is a tail call
7969 0 : return !CI.second.getNode() ? DAG.getRoot() : CI.first;
7970 : }
7971 :
7972 23027 : SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
7973 : LLVM_DEBUG(dbgs() << "Lowering node: "; Op.dump());
7974 23027 : switch (Op.getOpcode()) {
7975 0 : default: llvm_unreachable("Don't know how to custom lower this!");
7976 2 : case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG);
7977 1266 : case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
7978 37 : case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
7979 2179 : case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
7980 219 : case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
7981 140 : case ISD::SELECT: return LowerSELECT(Op, DAG);
7982 1873 : case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
7983 281 : case ISD::BRCOND: return LowerBRCOND(Op, DAG);
7984 1955 : case ISD::BR_CC: return LowerBR_CC(Op, DAG);
7985 38 : case ISD::BR_JT: return LowerBR_JT(Op, DAG);
7986 34 : case ISD::VASTART: return LowerVASTART(Op, DAG);
7987 21 : case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget);
7988 32 : case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget);
7989 186 : case ISD::SINT_TO_FP:
7990 186 : case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
7991 117 : case ISD::FP_TO_SINT:
7992 117 : case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
7993 23 : case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
7994 12 : case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
7995 44 : case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
7996 8 : case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
7997 9 : case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
7998 32 : case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
7999 2477 : case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
8000 2477 : Subtarget);
8001 788 : case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG, Subtarget);
8002 81 : case ISD::SHL:
8003 : case ISD::SRL:
8004 81 : case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget);
8005 0 : case ISD::SREM: return LowerREM(Op.getNode(), DAG);
8006 0 : case ISD::UREM: return LowerREM(Op.getNode(), DAG);
8007 3 : case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
8008 6 : case ISD::SRL_PARTS:
8009 6 : case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
8010 48 : case ISD::CTTZ:
8011 48 : case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
8012 19 : case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget);
8013 146 : case ISD::SETCC: return LowerVSETCC(Op, DAG);
8014 87 : case ISD::SETCCCARRY: return LowerSETCCCARRY(Op, DAG);
8015 1317 : case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);
8016 1008 : case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
8017 375 : case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
8018 1876 : case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
8019 5529 : case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
8020 0 : case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
8021 3 : case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
8022 177 : case ISD::MUL: return LowerMUL(Op, DAG);
8023 25 : case ISD::SDIV:
8024 71 : if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
8025 19 : return LowerDIV_Windows(Op, DAG, /* Signed */ true);
8026 6 : return LowerSDIV(Op, DAG);
8027 9 : case ISD::UDIV:
8028 24 : if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
8029 4 : return LowerDIV_Windows(Op, DAG, /* Signed */ false);
8030 5 : return LowerUDIV(Op, DAG);
8031 565 : case ISD::ADDCARRY:
8032 565 : case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG);
8033 18 : case ISD::SADDO:
8034 : case ISD::SSUBO:
8035 18 : return LowerSignedALUO(Op, DAG);
8036 653 : case ISD::UADDO:
8037 : case ISD::USUBO:
8038 653 : return LowerUnsignedALUO(Op, DAG);
8039 24 : case ISD::ATOMIC_LOAD:
8040 24 : case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
8041 5 : case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
8042 191 : case ISD::SDIVREM:
8043 191 : case ISD::UDIVREM: return LowerDivRem(Op, DAG);
8044 8 : case ISD::DYNAMIC_STACKALLOC:
8045 16 : if (Subtarget->isTargetWindows())
8046 8 : return LowerDYNAMIC_STACKALLOC(Op, DAG);
8047 0 : llvm_unreachable("Don't know how to custom lower this!");
8048 6 : case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
8049 9 : case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
8050 4 : case ISD::FPOWI: return LowerFPOWI(Op, *Subtarget, DAG);
8051 0 : case ARMISD::WIN__DBZCHK: return SDValue();
8052 : }
8053 : }
8054 :
8055 24 : static void ReplaceLongIntrinsic(SDNode *N, SmallVectorImpl<SDValue> &Results,
8056 : SelectionDAG &DAG) {
8057 48 : unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
8058 : unsigned Opc = 0;
8059 24 : if (IntNo == Intrinsic::arm_smlald)
8060 : Opc = ARMISD::SMLALD;
8061 18 : else if (IntNo == Intrinsic::arm_smlaldx)
8062 : Opc = ARMISD::SMLALDX;
8063 12 : else if (IntNo == Intrinsic::arm_smlsld)
8064 : Opc = ARMISD::SMLSLD;
8065 6 : else if (IntNo == Intrinsic::arm_smlsldx)
8066 : Opc = ARMISD::SMLSLDX;
8067 : else
8068 0 : return;
8069 :
8070 : SDLoc dl(N);
8071 : SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
8072 24 : N->getOperand(3),
8073 24 : DAG.getConstant(0, dl, MVT::i32));
8074 : SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
8075 24 : N->getOperand(3),
8076 24 : DAG.getConstant(1, dl, MVT::i32));
8077 :
8078 : SDValue LongMul = DAG.getNode(Opc, dl,
8079 : DAG.getVTList(MVT::i32, MVT::i32),
8080 24 : N->getOperand(1), N->getOperand(2),
8081 24 : Lo, Hi);
8082 24 : Results.push_back(LongMul.getValue(0));
8083 24 : Results.push_back(LongMul.getValue(1));
8084 : }
8085 :
8086 : /// ReplaceNodeResults - Replace the results of node with an illegal result
8087 : /// type with new values built out of custom code.
8088 1045 : void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
8089 : SmallVectorImpl<SDValue> &Results,
8090 : SelectionDAG &DAG) const {
8091 1045 : SDValue Res;
8092 2090 : switch (N->getOpcode()) {
8093 0 : default:
8094 0 : llvm_unreachable("Don't know how to custom expand this!");
8095 2 : case ISD::READ_REGISTER:
8096 2 : ExpandREAD_REGISTER(N, Results, DAG);
8097 2 : break;
8098 133 : case ISD::BITCAST:
8099 133 : Res = ExpandBITCAST(N, DAG, Subtarget);
8100 133 : break;
8101 855 : case ISD::SRL:
8102 : case ISD::SRA:
8103 855 : Res = Expand64BitShift(N, DAG, Subtarget);
8104 855 : break;
8105 4 : case ISD::SREM:
8106 : case ISD::UREM:
8107 4 : Res = LowerREM(N, DAG);
8108 4 : break;
8109 : case ISD::SDIVREM:
8110 : case ISD::UDIVREM:
8111 14 : Res = LowerDivRem(SDValue(N, 0), DAG);
8112 : assert(Res.getNumOperands() == 2 && "DivRem needs two values");
8113 14 : Results.push_back(Res.getValue(0));
8114 14 : Results.push_back(Res.getValue(1));
8115 51 : return;
8116 2 : case ISD::READCYCLECOUNTER:
8117 2 : ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
8118 2 : return;
8119 4 : case ISD::UDIV:
8120 : case ISD::SDIV:
8121 : assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows");
8122 8 : return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV,
8123 4 : Results);
8124 7 : case ISD::ATOMIC_CMP_SWAP:
8125 7 : ReplaceCMP_SWAP_64Results(N, Results, DAG);
8126 7 : return;
8127 24 : case ISD::INTRINSIC_WO_CHAIN:
8128 24 : return ReplaceLongIntrinsic(N, Results, DAG);
8129 : }
8130 994 : if (Res.getNode())
8131 144 : Results.push_back(Res);
8132 : }
8133 :
8134 : //===----------------------------------------------------------------------===//
8135 : // ARM Scheduler Hooks
8136 : //===----------------------------------------------------------------------===//
8137 :
8138 : /// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
8139 : /// registers the function context.
8140 32 : void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
8141 : MachineBasicBlock *MBB,
8142 : MachineBasicBlock *DispatchBB,
8143 : int FI) const {
8144 : assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
8145 : "ROPI/RWPI not currently supported with SjLj");
8146 32 : const TargetInstrInfo *TII = Subtarget->getInstrInfo();
8147 : DebugLoc dl = MI.getDebugLoc();
8148 32 : MachineFunction *MF = MBB->getParent();
8149 32 : MachineRegisterInfo *MRI = &MF->getRegInfo();
8150 32 : MachineConstantPool *MCP = MF->getConstantPool();
8151 32 : ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>();
8152 32 : const Function &F = MF->getFunction();
8153 :
8154 32 : bool isThumb = Subtarget->isThumb();
8155 : bool isThumb2 = Subtarget->isThumb2();
8156 :
8157 : unsigned PCLabelId = AFI->createPICLabelUId();
8158 32 : unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8;
8159 : ARMConstantPoolValue *CPV =
8160 32 : ARMConstantPoolMBB::Create(F.getContext(), DispatchBB, PCLabelId, PCAdj);
8161 32 : unsigned CPI = MCP->getConstantPoolIndex(CPV, 4);
8162 :
8163 32 : const TargetRegisterClass *TRC = isThumb ? &ARM::tGPRRegClass
8164 : : &ARM::GPRRegClass;
8165 :
8166 : // Grab constant pool and fixed stack memory operands.
8167 : MachineMemOperand *CPMMO =
8168 32 : MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF),
8169 : MachineMemOperand::MOLoad, 4, 4);
8170 :
8171 : MachineMemOperand *FIMMOSt =
8172 32 : MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(*MF, FI),
8173 : MachineMemOperand::MOStore, 4, 4);
8174 :
8175 : // Load the address of the dispatch MBB into the jump buffer.
8176 32 : if (isThumb2) {
8177 : // Incoming value: jbuf
8178 : // ldr.n r5, LCPI1_1
8179 : // orr r5, r5, #1
8180 : // add r5, pc
8181 : // str r5, [$jbuf, #+4] ; &jbuf[1]
8182 11 : unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
8183 22 : BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)
8184 : .addConstantPoolIndex(CPI)
8185 : .addMemOperand(CPMMO)
8186 11 : .add(predOps(ARMCC::AL));
8187 : // Set the low bit because of thumb mode.
8188 11 : unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
8189 22 : BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)
8190 11 : .addReg(NewVReg1, RegState::Kill)
8191 : .addImm(0x01)
8192 11 : .add(predOps(ARMCC::AL))
8193 11 : .add(condCodeOp());
8194 11 : unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
8195 22 : BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3)
8196 11 : .addReg(NewVReg2, RegState::Kill)
8197 11 : .addImm(PCLabelId);
8198 22 : BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))
8199 11 : .addReg(NewVReg3, RegState::Kill)
8200 : .addFrameIndex(FI)
8201 : .addImm(36) // &jbuf[1] :: pc
8202 : .addMemOperand(FIMMOSt)
8203 11 : .add(predOps(ARMCC::AL));
8204 21 : } else if (isThumb) {
8205 : // Incoming value: jbuf
8206 : // ldr.n r1, LCPI1_4
8207 : // add r1, pc
8208 : // mov r2, #1
8209 : // orrs r1, r2
8210 : // add r2, $jbuf, #+4 ; &jbuf[1]
8211 : // str r1, [r2]
8212 4 : unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
8213 8 : BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)
8214 : .addConstantPoolIndex(CPI)
8215 : .addMemOperand(CPMMO)
8216 4 : .add(predOps(ARMCC::AL));
8217 4 : unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
8218 12 : BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2)
8219 4 : .addReg(NewVReg1, RegState::Kill)
8220 4 : .addImm(PCLabelId);
8221 : // Set the low bit because of thumb mode.
8222 4 : unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
8223 8 : BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)
8224 4 : .addReg(ARM::CPSR, RegState::Define)
8225 : .addImm(1)
8226 4 : .add(predOps(ARMCC::AL));
8227 4 : unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
8228 8 : BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)
8229 4 : .addReg(ARM::CPSR, RegState::Define)
8230 4 : .addReg(NewVReg2, RegState::Kill)
8231 4 : .addReg(NewVReg3, RegState::Kill)
8232 4 : .add(predOps(ARMCC::AL));
8233 4 : unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
8234 8 : BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5)
8235 : .addFrameIndex(FI)
8236 : .addImm(36); // &jbuf[1] :: pc
8237 8 : BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
8238 4 : .addReg(NewVReg4, RegState::Kill)
8239 4 : .addReg(NewVReg5, RegState::Kill)
8240 : .addImm(0)
8241 : .addMemOperand(FIMMOSt)
8242 4 : .add(predOps(ARMCC::AL));
8243 : } else {
8244 : // Incoming value: jbuf
8245 : // ldr r1, LCPI1_1
8246 : // add r1, pc, r1
8247 : // str r1, [$jbuf, #+4] ; &jbuf[1]
8248 17 : unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
8249 34 : BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1)
8250 : .addConstantPoolIndex(CPI)
8251 : .addImm(0)
8252 : .addMemOperand(CPMMO)
8253 17 : .add(predOps(ARMCC::AL));
8254 17 : unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
8255 34 : BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)
8256 17 : .addReg(NewVReg1, RegState::Kill)
8257 17 : .addImm(PCLabelId)
8258 17 : .add(predOps(ARMCC::AL));
8259 34 : BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))
8260 17 : .addReg(NewVReg2, RegState::Kill)
8261 : .addFrameIndex(FI)
8262 : .addImm(36) // &jbuf[1] :: pc
8263 : .addMemOperand(FIMMOSt)
8264 17 : .add(predOps(ARMCC::AL));
8265 : }
8266 32 : }
8267 :
8268 32 : void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
8269 : MachineBasicBlock *MBB) const {
8270 32 : const TargetInstrInfo *TII = Subtarget->getInstrInfo();
8271 : DebugLoc dl = MI.getDebugLoc();
8272 32 : MachineFunction *MF = MBB->getParent();
8273 32 : MachineRegisterInfo *MRI = &MF->getRegInfo();
8274 32 : MachineFrameInfo &MFI = MF->getFrameInfo();
8275 32 : int FI = MFI.getFunctionContextIndex();
8276 :
8277 32 : const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass
8278 : : &ARM::GPRnopcRegClass;
8279 :
8280 : // Get a mapping of the call site numbers to all of the landing pads they're
8281 : // associated with.
8282 : DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2>> CallSiteNumToLPad;
8283 32 : unsigned MaxCSNum = 0;
8284 390 : for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E;
8285 : ++BB) {
8286 358 : if (!BB->isEHPad()) continue;
8287 :
8288 : // FIXME: We should assert that the EH_LABEL is the first MI in the landing
8289 : // pad.
8290 : for (MachineBasicBlock::iterator
8291 124 : II = BB->begin(), IE = BB->end(); II != IE; ++II) {
8292 124 : if (!II->isEHLabel()) continue;
8293 :
8294 124 : MCSymbol *Sym = II->getOperand(0).getMCSymbol();
8295 124 : if (!MF->hasCallSiteLandingPad(Sym)) continue;
8296 :
8297 124 : SmallVectorImpl<unsigned> &CallSiteIdxs = MF->getCallSiteLandingPad(Sym);
8298 171 : for (SmallVectorImpl<unsigned>::iterator
8299 : CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end();
8300 295 : CSI != CSE; ++CSI) {
8301 171 : CallSiteNumToLPad[*CSI].push_back(&*BB);
8302 171 : MaxCSNum = std::max(MaxCSNum, *CSI);
8303 : }
8304 : break;
8305 : }
8306 : }
8307 :
8308 : // Get an ordered list of the machine basic blocks for the jump table.
8309 : std::vector<MachineBasicBlock*> LPadList;
8310 : SmallPtrSet<MachineBasicBlock*, 32> InvokeBBs;
8311 32 : LPadList.reserve(CallSiteNumToLPad.size());
8312 203 : for (unsigned I = 1; I <= MaxCSNum; ++I) {
8313 : SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I];
8314 171 : for (SmallVectorImpl<MachineBasicBlock*>::iterator
8315 342 : II = MBBList.begin(), IE = MBBList.end(); II != IE; ++II) {
8316 171 : LPadList.push_back(*II);
8317 171 : InvokeBBs.insert((*II)->pred_begin(), (*II)->pred_end());
8318 : }
8319 : }
8320 :
8321 : assert(!LPadList.empty() &&
8322 : "No landing pad destinations for the dispatch jump table!");
8323 :
8324 : // Create the jump table and associated information.
8325 : MachineJumpTableInfo *JTI =
8326 32 : MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline);
8327 32 : unsigned MJTI = JTI->createJumpTableIndex(LPadList);
8328 :
8329 : // Create the MBBs for the dispatch code.
8330 :
8331 : // Shove the dispatch's address into the return slot in the function context.
8332 32 : MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
8333 : DispatchBB->setIsEHPad();
8334 :
8335 32 : MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
8336 : unsigned trap_opcode;
8337 32 : if (Subtarget->isThumb())
8338 : trap_opcode = ARM::tTRAP;
8339 : else
8340 17 : trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP;
8341 :
8342 32 : BuildMI(TrapBB, dl, TII->get(trap_opcode));
8343 32 : DispatchBB->addSuccessor(TrapBB);
8344 :
8345 32 : MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
8346 32 : DispatchBB->addSuccessor(DispContBB);
8347 :
8348 : // Insert and MBBs.
8349 : MF->insert(MF->end(), DispatchBB);
8350 : MF->insert(MF->end(), DispContBB);
8351 : MF->insert(MF->end(), TrapBB);
8352 :
8353 : // Insert code into the entry block that creates and registers the function
8354 : // context.
8355 32 : SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI);
8356 :
8357 32 : MachineMemOperand *FIMMOLd = MF->getMachineMemOperand(
8358 : MachinePointerInfo::getFixedStack(*MF, FI),
8359 : MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile, 4, 4);
8360 :
8361 : MachineInstrBuilder MIB;
8362 32 : MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));
8363 :
8364 : const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
8365 32 : const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
8366 :
8367 : // Add a register mask with no preserved registers. This results in all
8368 : // registers being marked as clobbered. This can't work if the dispatch block
8369 : // is in a Thumb1 function and is linked with ARM code which uses the FP
8370 : // registers, as there is no way to preserve the FP registers in Thumb1 mode.
8371 32 : MIB.addRegMask(RI.getSjLjDispatchPreservedMask(*MF));
8372 :
8373 32 : bool IsPositionIndependent = isPositionIndependent();
8374 32 : unsigned NumLPads = LPadList.size();
8375 32 : if (Subtarget->isThumb2()) {
8376 11 : unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
8377 11 : BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
8378 : .addFrameIndex(FI)
8379 : .addImm(4)
8380 : .addMemOperand(FIMMOLd)
8381 11 : .add(predOps(ARMCC::AL));
8382 :
8383 11 : if (NumLPads < 256) {
8384 11 : BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
8385 11 : .addReg(NewVReg1)
8386 11 : .addImm(LPadList.size())
8387 11 : .add(predOps(ARMCC::AL));
8388 : } else {
8389 0 : unsigned VReg1 = MRI->createVirtualRegister(TRC);
8390 0 : BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)
8391 0 : .addImm(NumLPads & 0xFFFF)
8392 0 : .add(predOps(ARMCC::AL));
8393 :
8394 : unsigned VReg2 = VReg1;
8395 0 : if ((NumLPads & 0xFFFF0000) != 0) {
8396 0 : VReg2 = MRI->createVirtualRegister(TRC);
8397 0 : BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)
8398 0 : .addReg(VReg1)
8399 0 : .addImm(NumLPads >> 16)
8400 0 : .add(predOps(ARMCC::AL));
8401 : }
8402 :
8403 0 : BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))
8404 0 : .addReg(NewVReg1)
8405 0 : .addReg(VReg2)
8406 0 : .add(predOps(ARMCC::AL));
8407 : }
8408 :
8409 22 : BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))
8410 : .addMBB(TrapBB)
8411 : .addImm(ARMCC::HI)
8412 11 : .addReg(ARM::CPSR);
8413 :
8414 11 : unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
8415 11 : BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT), NewVReg3)
8416 : .addJumpTableIndex(MJTI)
8417 11 : .add(predOps(ARMCC::AL));
8418 :
8419 11 : unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
8420 11 : BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)
8421 11 : .addReg(NewVReg3, RegState::Kill)
8422 11 : .addReg(NewVReg1)
8423 : .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))
8424 11 : .add(predOps(ARMCC::AL))
8425 11 : .add(condCodeOp());
8426 :
8427 22 : BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))
8428 11 : .addReg(NewVReg4, RegState::Kill)
8429 11 : .addReg(NewVReg1)
8430 : .addJumpTableIndex(MJTI);
8431 21 : } else if (Subtarget->isThumb()) {
8432 4 : unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
8433 4 : BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
8434 : .addFrameIndex(FI)
8435 : .addImm(1)
8436 : .addMemOperand(FIMMOLd)
8437 4 : .add(predOps(ARMCC::AL));
8438 :
8439 4 : if (NumLPads < 256) {
8440 4 : BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
8441 4 : .addReg(NewVReg1)
8442 : .addImm(NumLPads)
8443 4 : .add(predOps(ARMCC::AL));
8444 : } else {
8445 0 : MachineConstantPool *ConstantPool = MF->getConstantPool();
8446 0 : Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
8447 0 : const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
8448 :
8449 : // MachineConstantPool wants an explicit alignment.
8450 0 : unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty);
8451 0 : if (Align == 0)
8452 0 : Align = MF->getDataLayout().getTypeAllocSize(C->getType());
8453 0 : unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
8454 :
8455 0 : unsigned VReg1 = MRI->createVirtualRegister(TRC);
8456 0 : BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
8457 0 : .addReg(VReg1, RegState::Define)
8458 : .addConstantPoolIndex(Idx)
8459 0 : .add(predOps(ARMCC::AL));
8460 0 : BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))
8461 0 : .addReg(NewVReg1)
8462 0 : .addReg(VReg1)
8463 0 : .add(predOps(ARMCC::AL));
8464 : }
8465 :
8466 8 : BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))
8467 : .addMBB(TrapBB)
8468 : .addImm(ARMCC::HI)
8469 4 : .addReg(ARM::CPSR);
8470 :
8471 4 : unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
8472 4 : BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)
8473 4 : .addReg(ARM::CPSR, RegState::Define)
8474 4 : .addReg(NewVReg1)
8475 : .addImm(2)
8476 4 : .add(predOps(ARMCC::AL));
8477 :
8478 4 : unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
8479 4 : BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
8480 : .addJumpTableIndex(MJTI)
8481 4 : .add(predOps(ARMCC::AL));
8482 :
8483 4 : unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
8484 4 : BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
8485 4 : .addReg(ARM::CPSR, RegState::Define)
8486 4 : .addReg(NewVReg2, RegState::Kill)
8487 4 : .addReg(NewVReg3)
8488 4 : .add(predOps(ARMCC::AL));
8489 :
8490 4 : MachineMemOperand *JTMMOLd = MF->getMachineMemOperand(
8491 : MachinePointerInfo::getJumpTable(*MF), MachineMemOperand::MOLoad, 4, 4);
8492 :
8493 4 : unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
8494 4 : BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
8495 4 : .addReg(NewVReg4, RegState::Kill)
8496 : .addImm(0)
8497 : .addMemOperand(JTMMOLd)
8498 4 : .add(predOps(ARMCC::AL));
8499 :
8500 : unsigned NewVReg6 = NewVReg5;
8501 4 : if (IsPositionIndependent) {
8502 2 : NewVReg6 = MRI->createVirtualRegister(TRC);
8503 2 : BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
8504 2 : .addReg(ARM::CPSR, RegState::Define)
8505 2 : .addReg(NewVReg5, RegState::Kill)
8506 2 : .addReg(NewVReg3)
8507 2 : .add(predOps(ARMCC::AL));
8508 : }
8509 :
8510 8 : BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))
8511 4 : .addReg(NewVReg6, RegState::Kill)
8512 : .addJumpTableIndex(MJTI);
8513 : } else {
8514 17 : unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
8515 17 : BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
8516 : .addFrameIndex(FI)
8517 : .addImm(4)
8518 : .addMemOperand(FIMMOLd)
8519 17 : .add(predOps(ARMCC::AL));
8520 :
8521 17 : if (NumLPads < 256) {
8522 17 : BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
8523 17 : .addReg(NewVReg1)
8524 : .addImm(NumLPads)
8525 17 : .add(predOps(ARMCC::AL));
8526 0 : } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) {
8527 0 : unsigned VReg1 = MRI->createVirtualRegister(TRC);
8528 0 : BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)
8529 0 : .addImm(NumLPads & 0xFFFF)
8530 0 : .add(predOps(ARMCC::AL));
8531 :
8532 : unsigned VReg2 = VReg1;
8533 0 : if ((NumLPads & 0xFFFF0000) != 0) {
8534 0 : VReg2 = MRI->createVirtualRegister(TRC);
8535 0 : BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)
8536 0 : .addReg(VReg1)
8537 0 : .addImm(NumLPads >> 16)
8538 0 : .add(predOps(ARMCC::AL));
8539 : }
8540 :
8541 0 : BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
8542 0 : .addReg(NewVReg1)
8543 0 : .addReg(VReg2)
8544 0 : .add(predOps(ARMCC::AL));
8545 : } else {
8546 0 : MachineConstantPool *ConstantPool = MF->getConstantPool();
8547 0 : Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
8548 0 : const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
8549 :
8550 : // MachineConstantPool wants an explicit alignment.
8551 0 : unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty);
8552 0 : if (Align == 0)
8553 0 : Align = MF->getDataLayout().getTypeAllocSize(C->getType());
8554 0 : unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
8555 :
8556 0 : unsigned VReg1 = MRI->createVirtualRegister(TRC);
8557 0 : BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
8558 0 : .addReg(VReg1, RegState::Define)
8559 : .addConstantPoolIndex(Idx)
8560 : .addImm(0)
8561 0 : .add(predOps(ARMCC::AL));
8562 0 : BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
8563 0 : .addReg(NewVReg1)
8564 0 : .addReg(VReg1, RegState::Kill)
8565 0 : .add(predOps(ARMCC::AL));
8566 : }
8567 :
8568 34 : BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))
8569 : .addMBB(TrapBB)
8570 : .addImm(ARMCC::HI)
8571 17 : .addReg(ARM::CPSR);
8572 :
8573 17 : unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
8574 17 : BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)
8575 17 : .addReg(NewVReg1)
8576 : .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))
8577 17 : .add(predOps(ARMCC::AL))
8578 17 : .add(condCodeOp());
8579 17 : unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
8580 17 : BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
8581 : .addJumpTableIndex(MJTI)
8582 17 : .add(predOps(ARMCC::AL));
8583 :
8584 17 : MachineMemOperand *JTMMOLd = MF->getMachineMemOperand(
8585 : MachinePointerInfo::getJumpTable(*MF), MachineMemOperand::MOLoad, 4, 4);
8586 17 : unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
8587 17 : BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
8588 17 : .addReg(NewVReg3, RegState::Kill)
8589 17 : .addReg(NewVReg4)
8590 : .addImm(0)
8591 : .addMemOperand(JTMMOLd)
8592 17 : .add(predOps(ARMCC::AL));
8593 :
8594 17 : if (IsPositionIndependent) {
8595 26 : BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
8596 13 : .addReg(NewVReg5, RegState::Kill)
8597 13 : .addReg(NewVReg4)
8598 : .addJumpTableIndex(MJTI);
8599 : } else {
8600 8 : BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr))
8601 4 : .addReg(NewVReg5, RegState::Kill)
8602 : .addJumpTableIndex(MJTI);
8603 : }
8604 : }
8605 :
8606 : // Add the jump table entries as successors to the MBB.
8607 : SmallPtrSet<MachineBasicBlock*, 8> SeenMBBs;
8608 : for (std::vector<MachineBasicBlock*>::iterator
8609 203 : I = LPadList.begin(), E = LPadList.end(); I != E; ++I) {
8610 171 : MachineBasicBlock *CurMBB = *I;
8611 171 : if (SeenMBBs.insert(CurMBB).second)
8612 124 : DispContBB->addSuccessor(CurMBB);
8613 : }
8614 :
8615 : // N.B. the order the invoke BBs are processed in doesn't matter here.
8616 32 : const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF);
8617 : SmallVector<MachineBasicBlock*, 64> MBBLPads;
8618 203 : for (MachineBasicBlock *BB : InvokeBBs) {
8619 :
8620 : // Remove the landing pad successor from the invoke block and replace it
8621 : // with the new dispatch block.
8622 : SmallVector<MachineBasicBlock*, 4> Successors(BB->succ_begin(),
8623 171 : BB->succ_end());
8624 513 : while (!Successors.empty()) {
8625 342 : MachineBasicBlock *SMBB = Successors.pop_back_val();
8626 342 : if (SMBB->isEHPad()) {
8627 171 : BB->removeSuccessor(SMBB);
8628 171 : MBBLPads.push_back(SMBB);
8629 : }
8630 : }
8631 :
8632 171 : BB->addSuccessor(DispatchBB, BranchProbability::getZero());
8633 : BB->normalizeSuccProbs();
8634 :
8635 : // Find the invoke call and mark all of the callee-saved registers as
8636 : // 'implicit defined' so that they're spilled. This prevents code from
8637 : // moving instructions to before the EH block, where they will never be
8638 : // executed.
8639 : for (MachineBasicBlock::reverse_iterator
8640 779 : II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
8641 779 : if (!II->isCall()) continue;
8642 :
8643 : DenseMap<unsigned, bool> DefRegs;
8644 1510 : for (MachineInstr::mop_iterator
8645 171 : OI = II->operands_begin(), OE = II->operands_end();
8646 1681 : OI != OE; ++OI) {
8647 1510 : if (!OI->isReg()) continue;
8648 1079 : DefRegs[OI->getReg()] = true;
8649 : }
8650 :
8651 : MachineInstrBuilder MIB(*MF, &*II);
8652 :
8653 2910 : for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
8654 2739 : unsigned Reg = SavedRegs[i];
8655 4132 : if (Subtarget->isThumb2() &&
8656 2438 : !ARM::tGPRRegClass.contains(Reg) &&
8657 1045 : !ARM::hGPRRegClass.contains(Reg))
8658 1384 : continue;
8659 64 : if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg))
8660 : continue;
8661 1995 : if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg))
8662 : continue;
8663 1355 : if (!DefRegs[Reg])
8664 1188 : MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead);
8665 : }
8666 :
8667 : break;
8668 : }
8669 : }
8670 :
8671 : // Mark all former landing pads as non-landing pads. The dispatch is the only
8672 : // landing pad now.
8673 171 : for (SmallVectorImpl<MachineBasicBlock*>::iterator
8674 203 : I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I)
8675 171 : (*I)->setIsEHPad(false);
8676 :
8677 : // The instruction is gone now.
8678 32 : MI.eraseFromParent();
8679 32 : }
8680 :
8681 : static
8682 : MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
8683 : for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
8684 1 : E = MBB->succ_end(); I != E; ++I)
8685 1 : if (*I != Succ)
8686 : return *I;
8687 0 : llvm_unreachable("Expecting a BB with two successors!");
8688 : }
8689 :
8690 : /// Return the load opcode for a given load size. If load size >= 8,
8691 : /// neon opcode will be returned.
8692 2532 : static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2) {
8693 2532 : if (LdSize >= 8)
8694 83 : return LdSize == 16 ? ARM::VLD1q32wb_fixed
8695 83 : : LdSize == 8 ? ARM::VLD1d32wb_fixed : 0;
8696 2449 : if (IsThumb1)
8697 921 : return LdSize == 4 ? ARM::tLDRi
8698 : : LdSize == 2 ? ARM::tLDRHi
8699 921 : : LdSize == 1 ? ARM::tLDRBi : 0;
8700 1528 : if (IsThumb2)
8701 512 : return LdSize == 4 ? ARM::t2LDR_POST
8702 : : LdSize == 2 ? ARM::t2LDRH_POST
8703 512 : : LdSize == 1 ? ARM::t2LDRB_POST : 0;
8704 1016 : return LdSize == 4 ? ARM::LDR_POST_IMM
8705 : : LdSize == 2 ? ARM::LDRH_POST
8706 : : LdSize == 1 ? ARM::LDRB_POST_IMM : 0;
8707 : }
8708 :
8709 : /// Return the store opcode for a given store size. If store size >= 8,
8710 : /// neon opcode will be returned.
8711 2532 : static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) {
8712 2532 : if (StSize >= 8)
8713 83 : return StSize == 16 ? ARM::VST1q32wb_fixed
8714 83 : : StSize == 8 ? ARM::VST1d32wb_fixed : 0;
8715 2449 : if (IsThumb1)
8716 921 : return StSize == 4 ? ARM::tSTRi
8717 : : StSize == 2 ? ARM::tSTRHi
8718 921 : : StSize == 1 ? ARM::tSTRBi : 0;
8719 1528 : if (IsThumb2)
8720 512 : return StSize == 4 ? ARM::t2STR_POST
8721 : : StSize == 2 ? ARM::t2STRH_POST
8722 512 : : StSize == 1 ? ARM::t2STRB_POST : 0;
8723 1016 : return StSize == 4 ? ARM::STR_POST_IMM
8724 : : StSize == 2 ? ARM::STRH_POST
8725 : : StSize == 1 ? ARM::STRB_POST_IMM : 0;
8726 : }
8727 :
8728 : /// Emit a post-increment load operation with given size. The instructions
8729 : /// will be added to BB at Pos.
8730 2532 : static void emitPostLd(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos,
8731 : const TargetInstrInfo *TII, const DebugLoc &dl,
8732 : unsigned LdSize, unsigned Data, unsigned AddrIn,
8733 : unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
8734 2532 : unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2);
8735 : assert(LdOpc != 0 && "Should have a load opcode");
8736 2532 : if (LdSize >= 8) {
8737 166 : BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
8738 83 : .addReg(AddrOut, RegState::Define)
8739 83 : .addReg(AddrIn)
8740 : .addImm(0)
8741 83 : .add(predOps(ARMCC::AL));
8742 2449 : } else if (IsThumb1) {
8743 : // load + update AddrIn
8744 1842 : BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
8745 921 : .addReg(AddrIn)
8746 : .addImm(0)
8747 921 : .add(predOps(ARMCC::AL));
8748 1842 : BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
8749 921 : .add(t1CondCodeOp())
8750 921 : .addReg(AddrIn)
8751 921 : .addImm(LdSize)
8752 921 : .add(predOps(ARMCC::AL));
8753 1528 : } else if (IsThumb2) {
8754 1024 : BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
8755 512 : .addReg(AddrOut, RegState::Define)
8756 512 : .addReg(AddrIn)
8757 512 : .addImm(LdSize)
8758 512 : .add(predOps(ARMCC::AL));
8759 : } else { // arm
8760 2032 : BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
8761 1016 : .addReg(AddrOut, RegState::Define)
8762 1016 : .addReg(AddrIn)
8763 1016 : .addReg(0)
8764 1016 : .addImm(LdSize)
8765 1016 : .add(predOps(ARMCC::AL));
8766 : }
8767 2532 : }
8768 :
8769 : /// Emit a post-increment store operation with given size. The instructions
8770 : /// will be added to BB at Pos.
8771 2532 : static void emitPostSt(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos,
8772 : const TargetInstrInfo *TII, const DebugLoc &dl,
8773 : unsigned StSize, unsigned Data, unsigned AddrIn,
8774 : unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
8775 2532 : unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2);
8776 : assert(StOpc != 0 && "Should have a store opcode");
8777 2532 : if (StSize >= 8) {
8778 166 : BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
8779 83 : .addReg(AddrIn)
8780 : .addImm(0)
8781 83 : .addReg(Data)
8782 83 : .add(predOps(ARMCC::AL));
8783 2449 : } else if (IsThumb1) {
8784 : // store + update AddrIn
8785 1842 : BuildMI(*BB, Pos, dl, TII->get(StOpc))
8786 921 : .addReg(Data)
8787 921 : .addReg(AddrIn)
8788 : .addImm(0)
8789 921 : .add(predOps(ARMCC::AL));
8790 1842 : BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
8791 921 : .add(t1CondCodeOp())
8792 921 : .addReg(AddrIn)
8793 921 : .addImm(StSize)
8794 921 : .add(predOps(ARMCC::AL));
8795 1528 : } else if (IsThumb2) {
8796 1024 : BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
8797 512 : .addReg(Data)
8798 512 : .addReg(AddrIn)
8799 512 : .addImm(StSize)
8800 512 : .add(predOps(ARMCC::AL));
8801 : } else { // arm
8802 2032 : BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
8803 1016 : .addReg(Data)
8804 1016 : .addReg(AddrIn)
8805 1016 : .addReg(0)
8806 1016 : .addImm(StSize)
8807 1016 : .add(predOps(ARMCC::AL));
8808 : }
8809 2532 : }
8810 :
8811 : MachineBasicBlock *
8812 360 : ARMTargetLowering::EmitStructByval(MachineInstr &MI,
8813 : MachineBasicBlock *BB) const {
8814 : // This pseudo instruction has 3 operands: dst, src, size
8815 : // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
8816 : // Otherwise, we will generate unrolled scalar copies.
8817 360 : const TargetInstrInfo *TII = Subtarget->getInstrInfo();
8818 360 : const BasicBlock *LLVM_BB = BB->getBasicBlock();
8819 360 : MachineFunction::iterator It = ++BB->getIterator();
8820 :
8821 360 : unsigned dest = MI.getOperand(0).getReg();
8822 360 : unsigned src = MI.getOperand(1).getReg();
8823 360 : unsigned SizeVal = MI.getOperand(2).getImm();
8824 360 : unsigned Align = MI.getOperand(3).getImm();
8825 : DebugLoc dl = MI.getDebugLoc();
8826 :
8827 360 : MachineFunction *MF = BB->getParent();
8828 360 : MachineRegisterInfo &MRI = MF->getRegInfo();
8829 : unsigned UnitSize = 0;
8830 : const TargetRegisterClass *TRC = nullptr;
8831 : const TargetRegisterClass *VecTRC = nullptr;
8832 :
8833 360 : bool IsThumb1 = Subtarget->isThumb1Only();
8834 : bool IsThumb2 = Subtarget->isThumb2();
8835 : bool IsThumb = Subtarget->isThumb();
8836 :
8837 360 : if (Align & 1) {
8838 : UnitSize = 1;
8839 287 : } else if (Align & 2) {
8840 : UnitSize = 2;
8841 : } else {
8842 : // Check whether we can use NEON instructions.
8843 225 : if (!MF->getFunction().hasFnAttribute(Attribute::NoImplicitFloat) &&
8844 225 : Subtarget->hasNEON()) {
8845 102 : if ((Align % 16 == 0) && SizeVal >= 16)
8846 : UnitSize = 16;
8847 71 : else if ((Align % 8 == 0) && SizeVal >= 8)
8848 : UnitSize = 8;
8849 : }
8850 : // Can't use NEON instructions.
8851 : if (UnitSize == 0)
8852 : UnitSize = 4;
8853 : }
8854 :
8855 : // Select the correct opcode and register class for unit size load/store
8856 : bool IsNeon = UnitSize >= 8;
8857 360 : TRC = IsThumb ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
8858 360 : if (IsNeon)
8859 57 : VecTRC = UnitSize == 16 ? &ARM::DPairRegClass
8860 26 : : UnitSize == 8 ? &ARM::DPRRegClass
8861 : : nullptr;
8862 :
8863 360 : unsigned BytesLeft = SizeVal % UnitSize;
8864 360 : unsigned LoopSize = SizeVal - BytesLeft;
8865 :
8866 360 : if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) {
8867 : // Use LDR and STR to copy.
8868 : // [scratch, srcOut] = LDR_POST(srcIn, UnitSize)
8869 : // [destOut] = STR_POST(scratch, destIn, UnitSize)
8870 : unsigned srcIn = src;
8871 : unsigned destIn = dest;
8872 2186 : for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
8873 2011 : unsigned srcOut = MRI.createVirtualRegister(TRC);
8874 2011 : unsigned destOut = MRI.createVirtualRegister(TRC);
8875 3966 : unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
8876 4022 : emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut,
8877 : IsThumb1, IsThumb2);
8878 2011 : emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut,
8879 : IsThumb1, IsThumb2);
8880 : srcIn = srcOut;
8881 : destIn = destOut;
8882 : }
8883 :
8884 : // Handle the leftover bytes with LDRB and STRB.
8885 : // [scratch, srcOut] = LDRB_POST(srcIn, 1)
8886 : // [destOut] = STRB_POST(scratch, destIn, 1)
8887 409 : for (unsigned i = 0; i < BytesLeft; i++) {
8888 234 : unsigned srcOut = MRI.createVirtualRegister(TRC);
8889 234 : unsigned destOut = MRI.createVirtualRegister(TRC);
8890 234 : unsigned scratch = MRI.createVirtualRegister(TRC);
8891 468 : emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut,
8892 : IsThumb1, IsThumb2);
8893 234 : emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut,
8894 : IsThumb1, IsThumb2);
8895 : srcIn = srcOut;
8896 : destIn = destOut;
8897 : }
8898 175 : MI.eraseFromParent(); // The instruction is gone now.
8899 175 : return BB;
8900 : }
8901 :
8902 : // Expand the pseudo op to a loop.
8903 : // thisMBB:
8904 : // ...
8905 : // movw varEnd, # --> with thumb2
8906 : // movt varEnd, #
8907 : // ldrcp varEnd, idx --> without thumb2
8908 : // fallthrough --> loopMBB
8909 : // loopMBB:
8910 : // PHI varPhi, varEnd, varLoop
8911 : // PHI srcPhi, src, srcLoop
8912 : // PHI destPhi, dst, destLoop
8913 : // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
8914 : // [destLoop] = STR_POST(scratch, destPhi, UnitSize)
8915 : // subs varLoop, varPhi, #UnitSize
8916 : // bne loopMBB
8917 : // fallthrough --> exitMBB
8918 : // exitMBB:
8919 : // epilogue to handle left-over bytes
8920 : // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
8921 : // [destOut] = STRB_POST(scratch, destLoop, 1)
8922 185 : MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
8923 185 : MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
8924 : MF->insert(It, loopMBB);
8925 : MF->insert(It, exitMBB);
8926 :
8927 : // Transfer the remainder of BB and its successor edges to exitMBB.
8928 185 : exitMBB->splice(exitMBB->begin(), BB,
8929 : std::next(MachineBasicBlock::iterator(MI)), BB->end());
8930 185 : exitMBB->transferSuccessorsAndUpdatePHIs(BB);
8931 :
8932 : // Load an immediate to varEnd.
8933 185 : unsigned varEnd = MRI.createVirtualRegister(TRC);
8934 185 : if (Subtarget->useMovt(*MF)) {
8935 : unsigned Vtmp = varEnd;
8936 138 : if ((LoopSize & 0xFFFF0000) != 0)
8937 0 : Vtmp = MRI.createVirtualRegister(TRC);
8938 209 : BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVi16 : ARM::MOVi16), Vtmp)
8939 138 : .addImm(LoopSize & 0xFFFF)
8940 138 : .add(predOps(ARMCC::AL));
8941 :
8942 138 : if ((LoopSize & 0xFFFF0000) != 0)
8943 0 : BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVTi16 : ARM::MOVTi16), varEnd)
8944 0 : .addReg(Vtmp)
8945 0 : .addImm(LoopSize >> 16)
8946 0 : .add(predOps(ARMCC::AL));
8947 : } else {
8948 47 : MachineConstantPool *ConstantPool = MF->getConstantPool();
8949 47 : Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
8950 47 : const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
8951 :
8952 : // MachineConstantPool wants an explicit alignment.
8953 47 : unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty);
8954 47 : if (Align == 0)
8955 0 : Align = MF->getDataLayout().getTypeAllocSize(C->getType());
8956 47 : unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
8957 :
8958 47 : if (IsThumb)
8959 88 : BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci))
8960 44 : .addReg(varEnd, RegState::Define)
8961 : .addConstantPoolIndex(Idx)
8962 44 : .add(predOps(ARMCC::AL));
8963 : else
8964 6 : BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp))
8965 3 : .addReg(varEnd, RegState::Define)
8966 : .addConstantPoolIndex(Idx)
8967 : .addImm(0)
8968 3 : .add(predOps(ARMCC::AL));
8969 : }
8970 185 : BB->addSuccessor(loopMBB);
8971 :
8972 : // Generate the loop body:
8973 : // varPhi = PHI(varLoop, varEnd)
8974 : // srcPhi = PHI(srcLoop, src)
8975 : // destPhi = PHI(destLoop, dst)
8976 : MachineBasicBlock *entryBB = BB;
8977 : BB = loopMBB;
8978 185 : unsigned varLoop = MRI.createVirtualRegister(TRC);
8979 185 : unsigned varPhi = MRI.createVirtualRegister(TRC);
8980 185 : unsigned srcLoop = MRI.createVirtualRegister(TRC);
8981 185 : unsigned srcPhi = MRI.createVirtualRegister(TRC);
8982 185 : unsigned destLoop = MRI.createVirtualRegister(TRC);
8983 185 : unsigned destPhi = MRI.createVirtualRegister(TRC);
8984 :
8985 370 : BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)
8986 185 : .addReg(varLoop).addMBB(loopMBB)
8987 185 : .addReg(varEnd).addMBB(entryBB);
8988 185 : BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi)
8989 185 : .addReg(srcLoop).addMBB(loopMBB)
8990 185 : .addReg(src).addMBB(entryBB);
8991 370 : BuildMI(BB, dl, TII->get(ARM::PHI), destPhi)
8992 185 : .addReg(destLoop).addMBB(loopMBB)
8993 185 : .addReg(dest).addMBB(entryBB);
8994 :
8995 : // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
8996 : // [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
8997 343 : unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
8998 185 : emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop,
8999 : IsThumb1, IsThumb2);
9000 185 : emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop,
9001 : IsThumb1, IsThumb2);
9002 :
9003 : // Decrement loop variable by UnitSize.
9004 185 : if (IsThumb1) {
9005 150 : BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop)
9006 75 : .add(t1CondCodeOp())
9007 75 : .addReg(varPhi)
9008 75 : .addImm(UnitSize)
9009 75 : .add(predOps(ARMCC::AL));
9010 : } else {
9011 : MachineInstrBuilder MIB =
9012 : BuildMI(*BB, BB->end(), dl,
9013 258 : TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
9014 110 : MIB.addReg(varPhi)
9015 110 : .addImm(UnitSize)
9016 110 : .add(predOps(ARMCC::AL))
9017 110 : .add(condCodeOp());
9018 220 : MIB->getOperand(5).setReg(ARM::CPSR);
9019 220 : MIB->getOperand(5).setIsDef(true);
9020 : }
9021 185 : BuildMI(*BB, BB->end(), dl,
9022 333 : TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc))
9023 185 : .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
9024 :
9025 : // loopMBB can loop back to loopMBB or fall through to exitMBB.
9026 185 : BB->addSuccessor(loopMBB);
9027 185 : BB->addSuccessor(exitMBB);
9028 :
9029 : // Add epilogue to handle BytesLeft.
9030 : BB = exitMBB;
9031 185 : auto StartOfExit = exitMBB->begin();
9032 :
9033 : // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
9034 : // [destOut] = STRB_POST(scratch, destLoop, 1)
9035 : unsigned srcIn = srcLoop;
9036 : unsigned destIn = destLoop;
9037 287 : for (unsigned i = 0; i < BytesLeft; i++) {
9038 102 : unsigned srcOut = MRI.createVirtualRegister(TRC);
9039 102 : unsigned destOut = MRI.createVirtualRegister(TRC);
9040 102 : unsigned scratch = MRI.createVirtualRegister(TRC);
9041 102 : emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut,
9042 : IsThumb1, IsThumb2);
9043 102 : emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut,
9044 : IsThumb1, IsThumb2);
9045 : srcIn = srcOut;
9046 : destIn = destOut;
9047 : }
9048 :
9049 185 : MI.eraseFromParent(); // The instruction is gone now.
9050 185 : return BB;
9051 : }
9052 :
9053 : MachineBasicBlock *
9054 7 : ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI,
9055 : MachineBasicBlock *MBB) const {
9056 7 : const TargetMachine &TM = getTargetMachine();
9057 7 : const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
9058 : DebugLoc DL = MI.getDebugLoc();
9059 :
9060 : assert(Subtarget->isTargetWindows() &&
9061 : "__chkstk is only supported on Windows");
9062 : assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode");
9063 :
9064 : // __chkstk takes the number of words to allocate on the stack in R4, and
9065 : // returns the stack adjustment in number of bytes in R4. This will not
9066 : // clober any other registers (other than the obvious lr).
9067 : //
9068 : // Although, technically, IP should be considered a register which may be
9069 : // clobbered, the call itself will not touch it. Windows on ARM is a pure
9070 : // thumb-2 environment, so there is no interworking required. As a result, we
9071 : // do not expect a veneer to be emitted by the linker, clobbering IP.
9072 : //
9073 : // Each module receives its own copy of __chkstk, so no import thunk is
9074 : // required, again, ensuring that IP is not clobbered.
9075 : //
9076 : // Finally, although some linkers may theoretically provide a trampoline for
9077 : // out of range calls (which is quite common due to a 32M range limitation of
9078 : // branches for Thumb), we can generate the long-call version via
9079 : // -mcmodel=large, alleviating the need for the trampoline which may clobber
9080 : // IP.
9081 :
9082 7 : switch (TM.getCodeModel()) {
9083 : case CodeModel::Tiny:
9084 : llvm_unreachable("Tiny code model not available on ARM.");
9085 6 : case CodeModel::Small:
9086 : case CodeModel::Medium:
9087 : case CodeModel::Kernel:
9088 12 : BuildMI(*MBB, MI, DL, TII.get(ARM::tBL))
9089 6 : .add(predOps(ARMCC::AL))
9090 : .addExternalSymbol("__chkstk")
9091 6 : .addReg(ARM::R4, RegState::Implicit | RegState::Kill)
9092 6 : .addReg(ARM::R4, RegState::Implicit | RegState::Define)
9093 : .addReg(ARM::R12,
9094 6 : RegState::Implicit | RegState::Define | RegState::Dead)
9095 : .addReg(ARM::CPSR,
9096 6 : RegState::Implicit | RegState::Define | RegState::Dead);
9097 6 : break;
9098 1 : case CodeModel::Large: {
9099 1 : MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
9100 1 : unsigned Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
9101 :
9102 2 : BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg)
9103 : .addExternalSymbol("__chkstk");
9104 2 : BuildMI(*MBB, MI, DL, TII.get(ARM::tBLXr))
9105 1 : .add(predOps(ARMCC::AL))
9106 1 : .addReg(Reg, RegState::Kill)
9107 1 : .addReg(ARM::R4, RegState::Implicit | RegState::Kill)
9108 1 : .addReg(ARM::R4, RegState::Implicit | RegState::Define)
9109 : .addReg(ARM::R12,
9110 1 : RegState::Implicit | RegState::Define | RegState::Dead)
9111 : .addReg(ARM::CPSR,
9112 1 : RegState::Implicit | RegState::Define | RegState::Dead);
9113 1 : break;
9114 : }
9115 : }
9116 :
9117 14 : BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), ARM::SP)
9118 7 : .addReg(ARM::SP, RegState::Kill)
9119 7 : .addReg(ARM::R4, RegState::Kill)
9120 : .setMIFlags(MachineInstr::FrameSetup)
9121 7 : .add(predOps(ARMCC::AL))
9122 7 : .add(condCodeOp());
9123 :
9124 7 : MI.eraseFromParent();
9125 7 : return MBB;
9126 : }
9127 :
9128 : MachineBasicBlock *
9129 58 : ARMTargetLowering::EmitLowered__dbzchk(MachineInstr &MI,
9130 : MachineBasicBlock *MBB) const {
9131 : DebugLoc DL = MI.getDebugLoc();
9132 58 : MachineFunction *MF = MBB->getParent();
9133 58 : const TargetInstrInfo *TII = Subtarget->getInstrInfo();
9134 :
9135 58 : MachineBasicBlock *ContBB = MF->CreateMachineBasicBlock();
9136 58 : MF->insert(++MBB->getIterator(), ContBB);
9137 58 : ContBB->splice(ContBB->begin(), MBB,
9138 : std::next(MachineBasicBlock::iterator(MI)), MBB->end());
9139 58 : ContBB->transferSuccessorsAndUpdatePHIs(MBB);
9140 58 : MBB->addSuccessor(ContBB);
9141 :
9142 58 : MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
9143 58 : BuildMI(TrapBB, DL, TII->get(ARM::t__brkdiv0));
9144 : MF->push_back(TrapBB);
9145 58 : MBB->addSuccessor(TrapBB);
9146 :
9147 116 : BuildMI(*MBB, MI, DL, TII->get(ARM::tCMPi8))
9148 58 : .addReg(MI.getOperand(0).getReg())
9149 : .addImm(0)
9150 58 : .add(predOps(ARMCC::AL));
9151 116 : BuildMI(*MBB, MI, DL, TII->get(ARM::t2Bcc))
9152 : .addMBB(TrapBB)
9153 : .addImm(ARMCC::EQ)
9154 58 : .addReg(ARM::CPSR);
9155 :
9156 58 : MI.eraseFromParent();
9157 58 : return ContBB;
9158 : }
9159 :
9160 : MachineBasicBlock *
9161 589 : ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
9162 : MachineBasicBlock *BB) const {
9163 589 : const TargetInstrInfo *TII = Subtarget->getInstrInfo();
9164 : DebugLoc dl = MI.getDebugLoc();
9165 589 : bool isThumb2 = Subtarget->isThumb2();
9166 1178 : switch (MI.getOpcode()) {
9167 0 : default: {
9168 0 : MI.print(errs());
9169 0 : llvm_unreachable("Unexpected instr type to insert");
9170 : }
9171 :
9172 : // Thumb1 post-indexed loads are really just single-register LDMs.
9173 3 : case ARM::tLDR_postidx: {
9174 3 : MachineOperand Def(MI.getOperand(1));
9175 6 : BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD))
9176 : .add(Def) // Rn_wb
9177 3 : .add(MI.getOperand(2)) // Rn
9178 3 : .add(MI.getOperand(3)) // PredImm
9179 3 : .add(MI.getOperand(4)) // PredReg
9180 3 : .add(MI.getOperand(0)); // Rt
9181 3 : MI.eraseFromParent();
9182 : return BB;
9183 : }
9184 :
9185 : // The Thumb2 pre-indexed stores have the same MI operands, they just
9186 : // define them differently in the .td files from the isel patterns, so
9187 : // they need pseudos.
9188 4 : case ARM::t2STR_preidx:
9189 4 : MI.setDesc(TII->get(ARM::t2STR_PRE));
9190 4 : return BB;
9191 0 : case ARM::t2STRB_preidx:
9192 0 : MI.setDesc(TII->get(ARM::t2STRB_PRE));
9193 0 : return BB;
9194 1 : case ARM::t2STRH_preidx:
9195 1 : MI.setDesc(TII->get(ARM::t2STRH_PRE));
9196 1 : return BB;
9197 :
9198 10 : case ARM::STRi_preidx:
9199 : case ARM::STRBi_preidx: {
9200 10 : unsigned NewOpc = MI.getOpcode() == ARM::STRi_preidx ? ARM::STR_PRE_IMM
9201 : : ARM::STRB_PRE_IMM;
9202 : // Decode the offset.
9203 10 : unsigned Offset = MI.getOperand(4).getImm();
9204 : bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub;
9205 : Offset = ARM_AM::getAM2Offset(Offset);
9206 : if (isSub)
9207 0 : Offset = -Offset;
9208 :
9209 10 : MachineMemOperand *MMO = *MI.memoperands_begin();
9210 20 : BuildMI(*BB, MI, dl, TII->get(NewOpc))
9211 10 : .add(MI.getOperand(0)) // Rn_wb
9212 10 : .add(MI.getOperand(1)) // Rt
9213 10 : .add(MI.getOperand(2)) // Rn
9214 10 : .addImm(Offset) // offset (skip GPR==zero_reg)
9215 10 : .add(MI.getOperand(5)) // pred
9216 10 : .add(MI.getOperand(6))
9217 : .addMemOperand(MMO);
9218 10 : MI.eraseFromParent();
9219 10 : return BB;
9220 : }
9221 3 : case ARM::STRr_preidx:
9222 : case ARM::STRBr_preidx:
9223 : case ARM::STRH_preidx: {
9224 : unsigned NewOpc;
9225 : switch (MI.getOpcode()) {
9226 0 : default: llvm_unreachable("unexpected opcode!");
9227 : case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break;
9228 0 : case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break;
9229 1 : case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break;
9230 : }
9231 6 : MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
9232 24 : for (unsigned i = 0; i < MI.getNumOperands(); ++i)
9233 21 : MIB.add(MI.getOperand(i));
9234 3 : MI.eraseFromParent();
9235 : return BB;
9236 : }
9237 :
9238 94 : case ARM::tMOVCCr_pseudo: {
9239 : // To "insert" a SELECT_CC instruction, we actually have to insert the
9240 : // diamond control-flow pattern. The incoming instruction knows the
9241 : // destination vreg to set, the condition code register to branch on, the
9242 : // true/false values to select between, and a branch opcode to use.
9243 94 : const BasicBlock *LLVM_BB = BB->getBasicBlock();
9244 94 : MachineFunction::iterator It = ++BB->getIterator();
9245 :
9246 : // thisMBB:
9247 : // ...
9248 : // TrueVal = ...
9249 : // cmpTY ccX, r1, r2
9250 : // bCC copy1MBB
9251 : // fallthrough --> copy0MBB
9252 : MachineBasicBlock *thisMBB = BB;
9253 94 : MachineFunction *F = BB->getParent();
9254 94 : MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
9255 94 : MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
9256 : F->insert(It, copy0MBB);
9257 : F->insert(It, sinkMBB);
9258 :
9259 : // Transfer the remainder of BB and its successor edges to sinkMBB.
9260 94 : sinkMBB->splice(sinkMBB->begin(), BB,
9261 : std::next(MachineBasicBlock::iterator(MI)), BB->end());
9262 94 : sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
9263 :
9264 94 : BB->addSuccessor(copy0MBB);
9265 94 : BB->addSuccessor(sinkMBB);
9266 :
9267 94 : BuildMI(BB, dl, TII->get(ARM::tBcc))
9268 : .addMBB(sinkMBB)
9269 94 : .addImm(MI.getOperand(3).getImm())
9270 94 : .addReg(MI.getOperand(4).getReg());
9271 :
9272 : // copy0MBB:
9273 : // %FalseValue = ...
9274 : // # fallthrough to sinkMBB
9275 : BB = copy0MBB;
9276 :
9277 : // Update machine-CFG edges
9278 94 : BB->addSuccessor(sinkMBB);
9279 :
9280 : // sinkMBB:
9281 : // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
9282 : // ...
9283 : BB = sinkMBB;
9284 188 : BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), MI.getOperand(0).getReg())
9285 94 : .addReg(MI.getOperand(1).getReg())
9286 : .addMBB(copy0MBB)
9287 94 : .addReg(MI.getOperand(2).getReg())
9288 : .addMBB(thisMBB);
9289 :
9290 94 : MI.eraseFromParent(); // The pseudo instruction is gone now.
9291 : return BB;
9292 : }
9293 :
9294 : case ARM::BCCi64:
9295 : case ARM::BCCZi64: {
9296 : // If there is an unconditional branch to the other successor, remove it.
9297 1 : BB->erase(std::next(MachineBasicBlock::iterator(MI)), BB->end());
9298 :
9299 : // Compare both parts that make up the double comparison separately for
9300 : // equality.
9301 1 : bool RHSisZero = MI.getOpcode() == ARM::BCCZi64;
9302 :
9303 1 : unsigned LHS1 = MI.getOperand(1).getReg();
9304 1 : unsigned LHS2 = MI.getOperand(2).getReg();
9305 1 : if (RHSisZero) {
9306 2 : BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
9307 1 : .addReg(LHS1)
9308 : .addImm(0)
9309 1 : .add(predOps(ARMCC::AL));
9310 2 : BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
9311 1 : .addReg(LHS2).addImm(0)
9312 1 : .addImm(ARMCC::EQ).addReg(ARM::CPSR);
9313 : } else {
9314 0 : unsigned RHS1 = MI.getOperand(3).getReg();
9315 0 : unsigned RHS2 = MI.getOperand(4).getReg();
9316 0 : BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
9317 0 : .addReg(LHS1)
9318 0 : .addReg(RHS1)
9319 0 : .add(predOps(ARMCC::AL));
9320 0 : BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
9321 0 : .addReg(LHS2).addReg(RHS2)
9322 0 : .addImm(ARMCC::EQ).addReg(ARM::CPSR);
9323 : }
9324 :
9325 1 : MachineBasicBlock *destMBB = MI.getOperand(RHSisZero ? 3 : 5).getMBB();
9326 : MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
9327 1 : if (MI.getOperand(0).getImm() == ARMCC::NE)
9328 : std::swap(destMBB, exitMBB);
9329 :
9330 2 : BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
9331 1 : .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
9332 1 : if (isThumb2)
9333 0 : BuildMI(BB, dl, TII->get(ARM::t2B))
9334 : .addMBB(exitMBB)
9335 0 : .add(predOps(ARMCC::AL));
9336 : else
9337 1 : BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB);
9338 :
9339 1 : MI.eraseFromParent(); // The pseudo instruction is gone now.
9340 : return BB;
9341 : }
9342 :
9343 : case ARM::Int_eh_sjlj_setjmp:
9344 : case ARM::Int_eh_sjlj_setjmp_nofp:
9345 : case ARM::tInt_eh_sjlj_setjmp:
9346 : case ARM::t2Int_eh_sjlj_setjmp:
9347 : case ARM::t2Int_eh_sjlj_setjmp_nofp:
9348 : return BB;
9349 :
9350 32 : case ARM::Int_eh_sjlj_setup_dispatch:
9351 32 : EmitSjLjDispatchBlock(MI, BB);
9352 32 : return BB;
9353 :
9354 8 : case ARM::ABS:
9355 : case ARM::t2ABS: {
9356 : // To insert an ABS instruction, we have to insert the
9357 : // diamond control-flow pattern. The incoming instruction knows the
9358 : // source vreg to test against 0, the destination vreg to set,
9359 : // the condition code register to branch on, the
9360 : // true/false values to select between, and a branch opcode to use.
9361 : // It transforms
9362 : // V1 = ABS V0
9363 : // into
9364 : // V2 = MOVS V0
9365 : // BCC (branch to SinkBB if V0 >= 0)
9366 : // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0)
9367 : // SinkBB: V1 = PHI(V2, V3)
9368 8 : const BasicBlock *LLVM_BB = BB->getBasicBlock();
9369 8 : MachineFunction::iterator BBI = ++BB->getIterator();
9370 8 : MachineFunction *Fn = BB->getParent();
9371 8 : MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
9372 8 : MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB);
9373 : Fn->insert(BBI, RSBBB);
9374 : Fn->insert(BBI, SinkBB);
9375 :
9376 8 : unsigned int ABSSrcReg = MI.getOperand(1).getReg();
9377 8 : unsigned int ABSDstReg = MI.getOperand(0).getReg();
9378 : bool ABSSrcKIll = MI.getOperand(1).isKill();
9379 8 : bool isThumb2 = Subtarget->isThumb2();
9380 8 : MachineRegisterInfo &MRI = Fn->getRegInfo();
9381 : // In Thumb mode S must not be specified if source register is the SP or
9382 : // PC and if destination register is the SP, so restrict register class
9383 : unsigned NewRsbDstReg =
9384 11 : MRI.createVirtualRegister(isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass);
9385 :
9386 : // Transfer the remainder of BB and its successor edges to sinkMBB.
9387 8 : SinkBB->splice(SinkBB->begin(), BB,
9388 : std::next(MachineBasicBlock::iterator(MI)), BB->end());
9389 8 : SinkBB->transferSuccessorsAndUpdatePHIs(BB);
9390 :
9391 8 : BB->addSuccessor(RSBBB);
9392 8 : BB->addSuccessor(SinkBB);
9393 :
9394 : // fall through to SinkMBB
9395 8 : RSBBB->addSuccessor(SinkBB);
9396 :
9397 : // insert a cmp at the end of BB
9398 11 : BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
9399 8 : .addReg(ABSSrcReg)
9400 : .addImm(0)
9401 8 : .add(predOps(ARMCC::AL));
9402 :
9403 : // insert a bcc with opposite CC to ARMCC::MI at the end of BB
9404 8 : BuildMI(BB, dl,
9405 11 : TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
9406 8 : .addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR);
9407 :
9408 : // insert rsbri in RSBBB
9409 : // Note: BCC and rsbri will be converted into predicated rsbmi
9410 : // by if-conversion pass
9411 8 : BuildMI(*RSBBB, RSBBB->begin(), dl,
9412 14 : TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
9413 15 : .addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0)
9414 : .addImm(0)
9415 8 : .add(predOps(ARMCC::AL))
9416 8 : .add(condCodeOp());
9417 :
9418 : // insert PHI in SinkBB,
9419 : // reuse ABSDstReg to not change uses of ABS instruction
9420 8 : BuildMI(*SinkBB, SinkBB->begin(), dl,
9421 16 : TII->get(ARM::PHI), ABSDstReg)
9422 8 : .addReg(NewRsbDstReg).addMBB(RSBBB)
9423 8 : .addReg(ABSSrcReg).addMBB(BB);
9424 :
9425 : // remove ABS instruction
9426 8 : MI.eraseFromParent();
9427 :
9428 : // return last added BB
9429 : return SinkBB;
9430 : }
9431 : case ARM::COPY_STRUCT_BYVAL_I32:
9432 : ++NumLoopByVals;
9433 360 : return EmitStructByval(MI, BB);
9434 7 : case ARM::WIN__CHKSTK:
9435 7 : return EmitLowered__chkstk(MI, BB);
9436 58 : case ARM::WIN__DBZCHK:
9437 58 : return EmitLowered__dbzchk(MI, BB);
9438 : }
9439 : }
9440 :
9441 : /// Attaches vregs to MEMCPY that it will use as scratch registers
9442 : /// when it is expanded into LDM/STM. This is done as a post-isel lowering
9443 : /// instead of as a custom inserter because we need the use list from the SDNode.
9444 56 : static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget,
9445 : MachineInstr &MI, const SDNode *Node) {
9446 56 : bool isThumb1 = Subtarget->isThumb1Only();
9447 :
9448 : DebugLoc DL = MI.getDebugLoc();
9449 56 : MachineFunction *MF = MI.getParent()->getParent();
9450 56 : MachineRegisterInfo &MRI = MF->getRegInfo();
9451 : MachineInstrBuilder MIB(*MF, MI);
9452 :
9453 : // If the new dst/src is unused mark it as dead.
9454 56 : if (!Node->hasAnyUseOfValue(0)) {
9455 26 : MI.getOperand(0).setIsDead(true);
9456 : }
9457 56 : if (!Node->hasAnyUseOfValue(1)) {
9458 26 : MI.getOperand(1).setIsDead(true);
9459 : }
9460 :
9461 : // The MEMCPY both defines and kills the scratch registers.
9462 285 : for (unsigned I = 0; I != MI.getOperand(4).getImm(); ++I) {
9463 362 : unsigned TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass
9464 : : &ARM::GPRRegClass);
9465 229 : MIB.addReg(TmpReg, RegState::Define|RegState::Dead);
9466 : }
9467 56 : }
9468 :
9469 1105 : void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
9470 : SDNode *Node) const {
9471 2210 : if (MI.getOpcode() == ARM::MEMCPY) {
9472 56 : attachMEMCPYScratchRegs(Subtarget, MI, Node);
9473 56 : return;
9474 : }
9475 :
9476 : const MCInstrDesc *MCID = &MI.getDesc();
9477 : // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
9478 : // RSC. Coming out of isel, they have an implicit CPSR def, but the optional
9479 : // operand is still set to noreg. If needed, set the optional operand's
9480 : // register to CPSR, and remove the redundant implicit def.
9481 : //
9482 : // e.g. ADCS (..., implicit-def CPSR) -> ADC (... opt:def CPSR).
9483 :
9484 : // Rename pseudo opcodes.
9485 1049 : unsigned NewOpc = convertAddSubFlagsOpcode(MI.getOpcode());
9486 : unsigned ccOutIdx;
9487 1049 : if (NewOpc) {
9488 653 : const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo();
9489 653 : MCID = &TII->get(NewOpc);
9490 :
9491 : assert(MCID->getNumOperands() ==
9492 : MI.getDesc().getNumOperands() + 5 - MI.getDesc().getSize()
9493 : && "converted opcode should be the same except for cc_out"
9494 : " (and, on Thumb1, pred)");
9495 :
9496 : MI.setDesc(*MCID);
9497 :
9498 : // Add the optional cc_out operand
9499 653 : MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/true));
9500 :
9501 : // On Thumb1, move all input operands to the end, then add the predicate
9502 653 : if (Subtarget->isThumb1Only()) {
9503 1180 : for (unsigned c = MCID->getNumOperands() - 4; c--;) {
9504 1180 : MI.addOperand(MI.getOperand(1));
9505 590 : MI.RemoveOperand(1);
9506 : }
9507 :
9508 : // Restore the ties
9509 1921 : for (unsigned i = MI.getNumOperands(); i--;) {
9510 1626 : const MachineOperand& op = MI.getOperand(i);
9511 1626 : if (op.isReg() && op.isUse()) {
9512 707 : int DefIdx = MCID->getOperandConstraint(i, MCOI::TIED_TO);
9513 : if (DefIdx != -1)
9514 163 : MI.tieOperands(DefIdx, i);
9515 : }
9516 : }
9517 :
9518 295 : MI.addOperand(MachineOperand::CreateImm(ARMCC::AL));
9519 295 : MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/false));
9520 : ccOutIdx = 1;
9521 : } else
9522 716 : ccOutIdx = MCID->getNumOperands() - 1;
9523 : } else
9524 792 : ccOutIdx = MCID->getNumOperands() - 1;
9525 :
9526 : // Any ARM instruction that sets the 's' bit should specify an optional
9527 : // "cc_out" operand in the last operand position.
9528 1049 : if (!MI.hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) {
9529 : assert(!NewOpc && "Optional cc_out operand required");
9530 : return;
9531 : }
9532 : // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it
9533 : // since we already have an optional CPSR def.
9534 : bool definesCPSR = false;
9535 : bool deadCPSR = false;
9536 2098 : for (unsigned i = MCID->getNumOperands(), e = MI.getNumOperands(); i != e;
9537 : ++i) {
9538 1049 : const MachineOperand &MO = MI.getOperand(i);
9539 1049 : if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) {
9540 : definesCPSR = true;
9541 1049 : if (MO.isDead())
9542 : deadCPSR = true;
9543 1049 : MI.RemoveOperand(i);
9544 1049 : break;
9545 : }
9546 : }
9547 1049 : if (!definesCPSR) {
9548 : assert(!NewOpc && "Optional cc_out operand required");
9549 : return;
9550 : }
9551 : assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag");
9552 1049 : if (deadCPSR) {
9553 : assert(!MI.getOperand(ccOutIdx).getReg() &&
9554 : "expect uninitialized optional cc_out operand");
9555 : // Thumb1 instructions must have the S bit even if the CPSR is dead.
9556 418 : if (!Subtarget->isThumb1Only())
9557 : return;
9558 : }
9559 :
9560 : // If this instruction was defined with an optional CPSR def and its dag node
9561 : // had a live implicit CPSR def, then activate the optional CPSR def.
9562 769 : MachineOperand &MO = MI.getOperand(ccOutIdx);
9563 769 : MO.setReg(ARM::CPSR);
9564 769 : MO.setIsDef(true);
9565 : }
9566 :
9567 : //===----------------------------------------------------------------------===//
9568 : // ARM Optimization Hooks
9569 : //===----------------------------------------------------------------------===//
9570 :
9571 : // Helper function that checks if N is a null or all ones constant.
9572 : static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) {
9573 245 : return AllOnes ? isAllOnesConstant(N) : isNullConstant(N);
9574 : }
9575 :
9576 : // Return true if N is conditionally 0 or all ones.
9577 : // Detects these expressions where cc is an i1 value:
9578 : //
9579 : // (select cc 0, y) [AllOnes=0]
9580 : // (select cc y, 0) [AllOnes=0]
9581 : // (zext cc) [AllOnes=0]
9582 : // (sext cc) [AllOnes=0/1]
9583 : // (select cc -1, y) [AllOnes=1]
9584 : // (select cc y, -1) [AllOnes=1]
9585 : //
9586 : // Invert is set when N is the null/all ones constant when CC is false.
9587 : // OtherOp is set to the alternative value of N.
9588 34014 : static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes,
9589 : SDValue &CC, bool &Invert,
9590 : SDValue &OtherOp,
9591 : SelectionDAG &DAG) {
9592 68028 : switch (N->getOpcode()) {
9593 : default: return false;
9594 130 : case ISD::SELECT: {
9595 130 : CC = N->getOperand(0);
9596 130 : SDValue N1 = N->getOperand(1);
9597 130 : SDValue N2 = N->getOperand(2);
9598 130 : if (isZeroOrAllOnes(N1, AllOnes)) {
9599 15 : Invert = false;
9600 15 : OtherOp = N2;
9601 15 : return true;
9602 : }
9603 115 : if (isZeroOrAllOnes(N2, AllOnes)) {
9604 24 : Invert = true;
9605 24 : OtherOp = N1;
9606 24 : return true;
9607 : }
9608 : return false;
9609 : }
9610 287 : case ISD::ZERO_EXTEND:
9611 : // (zext cc) can never be the all ones value.
9612 287 : if (AllOnes)
9613 : return false;
9614 : LLVM_FALLTHROUGH;
9615 : case ISD::SIGN_EXTEND: {
9616 : SDLoc dl(N);
9617 468 : EVT VT = N->getValueType(0);
9618 468 : CC = N->getOperand(0);
9619 468 : if (CC.getValueType() != MVT::i1 || CC.getOpcode() != ISD::SETCC)
9620 : return false;
9621 13 : Invert = !AllOnes;
9622 13 : if (AllOnes)
9623 : // When looking for an AllOnes constant, N is an sext, and the 'other'
9624 : // value is 0.
9625 0 : OtherOp = DAG.getConstant(0, dl, VT);
9626 13 : else if (N->getOpcode() == ISD::ZERO_EXTEND)
9627 : // When looking for a 0 constant, N can be zext or sext.
9628 13 : OtherOp = DAG.getConstant(1, dl, VT);
9629 : else
9630 0 : OtherOp = DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), dl,
9631 0 : VT);
9632 : return true;
9633 : }
9634 : }
9635 : }
9636 :
9637 : // Combine a constant select operand into its use:
9638 : //
9639 : // (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
9640 : // (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
9641 : // (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) [AllOnes=1]
9642 : // (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
9643 : // (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
9644 : //
9645 : // The transform is rejected if the select doesn't have a constant operand that
9646 : // is null, or all ones when AllOnes is set.
9647 : //
9648 : // Also recognize sext/zext from i1:
9649 : //
9650 : // (add (zext cc), x) -> (select cc (add x, 1), x)
9651 : // (add (sext cc), x) -> (select cc (add x, -1), x)
9652 : //
9653 : // These transformations eventually create predicated instructions.
9654 : //
9655 : // @param N The node to transform.
9656 : // @param Slct The N operand that is a select.
9657 : // @param OtherOp The other N operand (x above).
9658 : // @param DCI Context.
9659 : // @param AllOnes Require the select constant to be all ones instead of null.
9660 : // @returns The new node, or SDValue() on failure.
9661 : static
9662 34014 : SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
9663 : TargetLowering::DAGCombinerInfo &DCI,
9664 : bool AllOnes = false) {
9665 34014 : SelectionDAG &DAG = DCI.DAG;
9666 34014 : EVT VT = N->getValueType(0);
9667 34014 : SDValue NonConstantVal;
9668 34014 : SDValue CCOp;
9669 : bool SwapSelectOps;
9670 34014 : if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps,
9671 : NonConstantVal, DAG))
9672 33962 : return SDValue();
9673 :
9674 : // Slct is now know to be the desired identity constant when CC is true.
9675 52 : SDValue TrueVal = OtherOp;
9676 52 : SDValue FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
9677 104 : OtherOp, NonConstantVal);
9678 : // Unless SwapSelectOps says CC should be false.
9679 52 : if (SwapSelectOps)
9680 : std::swap(TrueVal, FalseVal);
9681 :
9682 52 : return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
9683 104 : CCOp, TrueVal, FalseVal);
9684 : }
9685 :
9686 : // Attempt combineSelectAndUse on each operand of a commutative operator N.
9687 : static
9688 7518 : SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes,
9689 : TargetLowering::DAGCombinerInfo &DCI) {
9690 7518 : SDValue N0 = N->getOperand(0);
9691 7518 : SDValue N1 = N->getOperand(1);
9692 : if (N0.getNode()->hasOneUse())
9693 5684 : if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes))
9694 12 : return Result;
9695 : if (N1.getNode()->hasOneUse())
9696 5654 : if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes))
9697 2 : return Result;
9698 7504 : return SDValue();
9699 : }
9700 :
9701 : static bool IsVUZPShuffleNode(SDNode *N) {
9702 : // VUZP shuffle node.
9703 0 : if (N->getOpcode() == ARMISD::VUZP)
9704 : return true;
9705 :
9706 : // "VUZP" on i32 is an alias for VTRN.
9707 0 : if (N->getOpcode() == ARMISD::VTRN && N->getValueType(0) == MVT::v2i32)
9708 : return true;
9709 :
9710 : return false;
9711 : }
9712 :
9713 0 : static SDValue AddCombineToVPADD(SDNode *N, SDValue N0, SDValue N1,
9714 : TargetLowering::DAGCombinerInfo &DCI,
9715 : const ARMSubtarget *Subtarget) {
9716 : // Look for ADD(VUZP.0, VUZP.1).
9717 0 : if (!IsVUZPShuffleNode(N0.getNode()) || N0.getNode() != N1.getNode() ||
9718 : N0 == N1)
9719 0 : return SDValue();
9720 :
9721 : // Make sure the ADD is a 64-bit add; there is no 128-bit VPADD.
9722 0 : if (!N->getValueType(0).is64BitVector())
9723 0 : return SDValue();
9724 :
9725 : // Generate vpadd.
9726 0 : SelectionDAG &DAG = DCI.DAG;
9727 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9728 : SDLoc dl(N);
9729 : SDNode *Unzip = N0.getNode();
9730 0 : EVT VT = N->getValueType(0);
9731 :
9732 : SmallVector<SDValue, 8> Ops;
9733 0 : Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpadd, dl,
9734 0 : TLI.getPointerTy(DAG.getDataLayout())));
9735 0 : Ops.push_back(Unzip->getOperand(0));
9736 0 : Ops.push_back(Unzip->getOperand(1));
9737 :
9738 0 : return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
9739 : }
9740 :
9741 0 : static SDValue AddCombineVUZPToVPADDL(SDNode *N, SDValue N0, SDValue N1,
9742 : TargetLowering::DAGCombinerInfo &DCI,
9743 : const ARMSubtarget *Subtarget) {
9744 : // Check for two extended operands.
9745 0 : if (!(N0.getOpcode() == ISD::SIGN_EXTEND &&
9746 0 : N1.getOpcode() == ISD::SIGN_EXTEND) &&
9747 0 : !(N0.getOpcode() == ISD::ZERO_EXTEND &&
9748 : N1.getOpcode() == ISD::ZERO_EXTEND))
9749 0 : return SDValue();
9750 :
9751 0 : SDValue N00 = N0.getOperand(0);
9752 0 : SDValue N10 = N1.getOperand(0);
9753 :
9754 : // Look for ADD(SEXT(VUZP.0), SEXT(VUZP.1))
9755 0 : if (!IsVUZPShuffleNode(N00.getNode()) || N00.getNode() != N10.getNode() ||
9756 : N00 == N10)
9757 0 : return SDValue();
9758 :
9759 : // We only recognize Q register paddl here; this can't be reached until
9760 : // after type legalization.
9761 0 : if (!N00.getValueType().is64BitVector() ||
9762 0 : !N0.getValueType().is128BitVector())
9763 0 : return SDValue();
9764 :
9765 : // Generate vpaddl.
9766 0 : SelectionDAG &DAG = DCI.DAG;
9767 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9768 : SDLoc dl(N);
9769 0 : EVT VT = N->getValueType(0);
9770 :
9771 : SmallVector<SDValue, 8> Ops;
9772 : // Form vpaddl.sN or vpaddl.uN depending on the kind of extension.
9773 : unsigned Opcode;
9774 0 : if (N0.getOpcode() == ISD::SIGN_EXTEND)
9775 : Opcode = Intrinsic::arm_neon_vpaddls;
9776 : else
9777 : Opcode = Intrinsic::arm_neon_vpaddlu;
9778 0 : Ops.push_back(DAG.getConstant(Opcode, dl,
9779 0 : TLI.getPointerTy(DAG.getDataLayout())));
9780 0 : EVT ElemTy = N00.getValueType().getVectorElementType();
9781 : unsigned NumElts = VT.getVectorNumElements();
9782 0 : EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(), ElemTy, NumElts * 2);
9783 0 : SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), ConcatVT,
9784 0 : N00.getOperand(0), N00.getOperand(1));
9785 0 : Ops.push_back(Concat);
9786 :
9787 0 : return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
9788 : }
9789 :
9790 : // FIXME: This function shouldn't be necessary; if we lower BUILD_VECTOR in
9791 : // an appropriate manner, we end up with ADD(VUZP(ZEXT(N))), which is
9792 : // much easier to match.
9793 : static SDValue
9794 0 : AddCombineBUILD_VECTORToVPADDL(SDNode *N, SDValue N0, SDValue N1,
9795 : TargetLowering::DAGCombinerInfo &DCI,
9796 : const ARMSubtarget *Subtarget) {
9797 : // Only perform optimization if after legalize, and if NEON is available. We
9798 : // also expected both operands to be BUILD_VECTORs.
9799 0 : if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()
9800 0 : || N0.getOpcode() != ISD::BUILD_VECTOR
9801 0 : || N1.getOpcode() != ISD::BUILD_VECTOR)
9802 0 : return SDValue();
9803 :
9804 : // Check output type since VPADDL operand elements can only be 8, 16, or 32.
9805 0 : EVT VT = N->getValueType(0);
9806 0 : if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64)
9807 0 : return SDValue();
9808 :
9809 : // Check that the vector operands are of the right form.
9810 : // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR
9811 : // operands, where N is the size of the formed vector.
9812 : // Each EXTRACT_VECTOR should have the same input vector and odd or even
9813 : // index such that we have a pair wise add pattern.
9814 :
9815 : // Grab the vector that all EXTRACT_VECTOR nodes should be referencing.
9816 0 : if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
9817 0 : return SDValue();
9818 0 : SDValue Vec = N0->getOperand(0)->getOperand(0);
9819 0 : SDNode *V = Vec.getNode();
9820 : unsigned nextIndex = 0;
9821 :
9822 : // For each operands to the ADD which are BUILD_VECTORs,
9823 : // check to see if each of their operands are an EXTRACT_VECTOR with
9824 : // the same vector and appropriate index.
9825 0 : for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {
9826 0 : if (N0->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT
9827 0 : && N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
9828 :
9829 : SDValue ExtVec0 = N0->getOperand(i);
9830 : SDValue ExtVec1 = N1->getOperand(i);
9831 :
9832 : // First operand is the vector, verify its the same.
9833 0 : if (V != ExtVec0->getOperand(0).getNode() ||
9834 0 : V != ExtVec1->getOperand(0).getNode())
9835 0 : return SDValue();
9836 :
9837 : // Second is the constant, verify its correct.
9838 : ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1));
9839 : ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1));
9840 :
9841 : // For the constant, we want to see all the even or all the odd.
9842 0 : if (!C0 || !C1 || C0->getZExtValue() != nextIndex
9843 0 : || C1->getZExtValue() != nextIndex+1)
9844 0 : return SDValue();
9845 :
9846 : // Increment index.
9847 0 : nextIndex+=2;
9848 : } else
9849 0 : return SDValue();
9850 : }
9851 :
9852 : // Don't generate vpaddl+vmovn; we'll match it to vpadd later. Also make sure
9853 : // we're using the entire input vector, otherwise there's a size/legality
9854 : // mismatch somewhere.
9855 0 : if (nextIndex != Vec.getValueType().getVectorNumElements() ||
9856 0 : Vec.getValueType().getVectorElementType() == VT.getVectorElementType())
9857 0 : return SDValue();
9858 :
9859 : // Create VPADDL node.
9860 0 : SelectionDAG &DAG = DCI.DAG;
9861 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9862 :
9863 : SDLoc dl(N);
9864 :
9865 : // Build operand list.
9866 : SmallVector<SDValue, 8> Ops;
9867 0 : Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, dl,
9868 0 : TLI.getPointerTy(DAG.getDataLayout())));
9869 :
9870 : // Input is the vector.
9871 0 : Ops.push_back(Vec);
9872 :
9873 : // Get widened type and narrowed type.
9874 : MVT widenType;
9875 : unsigned numElem = VT.getVectorNumElements();
9876 :
9877 0 : EVT inputLaneType = Vec.getValueType().getVectorElementType();
9878 0 : switch (inputLaneType.getSimpleVT().SimpleTy) {
9879 0 : case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
9880 0 : case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
9881 0 : case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
9882 0 : default:
9883 0 : llvm_unreachable("Invalid vector element type for padd optimization.");
9884 : }
9885 :
9886 0 : SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, widenType, Ops);
9887 0 : unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE;
9888 0 : return DAG.getNode(ExtOp, dl, VT, tmp);
9889 : }
9890 :
9891 : static SDValue findMUL_LOHI(SDValue V) {
9892 426 : if (V->getOpcode() == ISD::UMUL_LOHI ||
9893 : V->getOpcode() == ISD::SMUL_LOHI)
9894 : return V;
9895 : return SDValue();
9896 : }
9897 :
9898 0 : static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode,
9899 : TargetLowering::DAGCombinerInfo &DCI,
9900 : const ARMSubtarget *Subtarget) {
9901 0 : if (Subtarget->isThumb()) {
9902 0 : if (!Subtarget->hasDSP())
9903 0 : return SDValue();
9904 0 : } else if (!Subtarget->hasV5TEOps())
9905 0 : return SDValue();
9906 :
9907 : // SMLALBB, SMLALBT, SMLALTB, SMLALTT multiply two 16-bit values and
9908 : // accumulates the product into a 64-bit value. The 16-bit values will
9909 : // be sign extended somehow or SRA'd into 32-bit values
9910 : // (addc (adde (mul 16bit, 16bit), lo), hi)
9911 0 : SDValue Mul = AddcNode->getOperand(0);
9912 0 : SDValue Lo = AddcNode->getOperand(1);
9913 0 : if (Mul.getOpcode() != ISD::MUL) {
9914 0 : Lo = AddcNode->getOperand(0);
9915 0 : Mul = AddcNode->getOperand(1);
9916 0 : if (Mul.getOpcode() != ISD::MUL)
9917 0 : return SDValue();
9918 : }
9919 :
9920 0 : SDValue SRA = AddeNode->getOperand(0);
9921 0 : SDValue Hi = AddeNode->getOperand(1);
9922 0 : if (SRA.getOpcode() != ISD::SRA) {
9923 0 : SRA = AddeNode->getOperand(1);
9924 0 : Hi = AddeNode->getOperand(0);
9925 0 : if (SRA.getOpcode() != ISD::SRA)
9926 0 : return SDValue();
9927 : }
9928 : if (auto Const = dyn_cast<ConstantSDNode>(SRA.getOperand(1))) {
9929 0 : if (Const->getZExtValue() != 31)
9930 0 : return SDValue();
9931 : } else
9932 0 : return SDValue();
9933 :
9934 : if (SRA.getOperand(0) != Mul)
9935 0 : return SDValue();
9936 :
9937 0 : SelectionDAG &DAG = DCI.DAG;
9938 : SDLoc dl(AddcNode);
9939 : unsigned Opcode = 0;
9940 0 : SDValue Op0;
9941 0 : SDValue Op1;
9942 :
9943 0 : if (isS16(Mul.getOperand(0), DAG) && isS16(Mul.getOperand(1), DAG)) {
9944 : Opcode = ARMISD::SMLALBB;
9945 0 : Op0 = Mul.getOperand(0);
9946 0 : Op1 = Mul.getOperand(1);
9947 0 : } else if (isS16(Mul.getOperand(0), DAG) && isSRA16(Mul.getOperand(1))) {
9948 : Opcode = ARMISD::SMLALBT;
9949 0 : Op0 = Mul.getOperand(0);
9950 0 : Op1 = Mul.getOperand(1).getOperand(0);
9951 0 : } else if (isSRA16(Mul.getOperand(0)) && isS16(Mul.getOperand(1), DAG)) {
9952 : Opcode = ARMISD::SMLALTB;
9953 0 : Op0 = Mul.getOperand(0).getOperand(0);
9954 0 : Op1 = Mul.getOperand(1);
9955 0 : } else if (isSRA16(Mul.getOperand(0)) && isSRA16(Mul.getOperand(1))) {
9956 : Opcode = ARMISD::SMLALTT;
9957 0 : Op0 = Mul->getOperand(0).getOperand(0);
9958 0 : Op1 = Mul->getOperand(1).getOperand(0);
9959 : }
9960 :
9961 0 : if (!Op0 || !Op1)
9962 0 : return SDValue();
9963 :
9964 : SDValue SMLAL = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
9965 0 : Op0, Op1, Lo, Hi);
9966 : // Replace the ADDs' nodes uses by the MLA node's values.
9967 : SDValue HiMLALResult(SMLAL.getNode(), 1);
9968 : SDValue LoMLALResult(SMLAL.getNode(), 0);
9969 :
9970 0 : DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);
9971 0 : DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult);
9972 :
9973 : // Return original node to notify the driver to stop replacing.
9974 : SDValue resNode(AddcNode, 0);
9975 0 : return resNode;
9976 : }
9977 :
9978 1098 : static SDValue AddCombineTo64bitMLAL(SDNode *AddeSubeNode,
9979 : TargetLowering::DAGCombinerInfo &DCI,
9980 : const ARMSubtarget *Subtarget) {
9981 : // Look for multiply add opportunities.
9982 : // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where
9983 : // each add nodes consumes a value from ISD::UMUL_LOHI and there is
9984 : // a glue link from the first add to the second add.
9985 : // If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by
9986 : // a S/UMLAL instruction.
9987 : // UMUL_LOHI
9988 : // / :lo \ :hi
9989 : // V \ [no multiline comment]
9990 : // loAdd -> ADDC |
9991 : // \ :carry /
9992 : // V V
9993 : // ADDE <- hiAdd
9994 : //
9995 : // In the special case where only the higher part of a signed result is used
9996 : // and the add to the low part of the result of ISD::UMUL_LOHI adds or subtracts
9997 : // a constant with the exact value of 0x80000000, we recognize we are dealing
9998 : // with a "rounded multiply and add" (or subtract) and transform it into
9999 : // either a ARMISD::SMMLAR or ARMISD::SMMLSR respectively.
10000 :
10001 : assert((AddeSubeNode->getOpcode() == ARMISD::ADDE ||
10002 : AddeSubeNode->getOpcode() == ARMISD::SUBE) &&
10003 : "Expect an ADDE or SUBE");
10004 :
10005 : assert(AddeSubeNode->getNumOperands() == 3 &&
10006 : AddeSubeNode->getOperand(2).getValueType() == MVT::i32 &&
10007 : "ADDE node has the wrong inputs");
10008 :
10009 : // Check that we are chained to the right ADDC or SUBC node.
10010 1098 : SDNode *AddcSubcNode = AddeSubeNode->getOperand(2).getNode();
10011 1098 : if ((AddeSubeNode->getOpcode() == ARMISD::ADDE &&
10012 1098 : AddcSubcNode->getOpcode() != ARMISD::ADDC) ||
10013 40 : (AddeSubeNode->getOpcode() == ARMISD::SUBE &&
10014 40 : AddcSubcNode->getOpcode() != ARMISD::SUBC))
10015 315 : return SDValue();
10016 :
10017 783 : SDValue AddcSubcOp0 = AddcSubcNode->getOperand(0);
10018 783 : SDValue AddcSubcOp1 = AddcSubcNode->getOperand(1);
10019 :
10020 : // Check if the two operands are from the same mul_lohi node.
10021 783 : if (AddcSubcOp0.getNode() == AddcSubcOp1.getNode())
10022 116 : return SDValue();
10023 :
10024 : assert(AddcSubcNode->getNumValues() == 2 &&
10025 : AddcSubcNode->getValueType(0) == MVT::i32 &&
10026 : "Expect ADDC with two result values. First: i32");
10027 :
10028 : // Check that the ADDC adds the low result of the S/UMUL_LOHI. If not, it
10029 : // maybe a SMLAL which multiplies two 16-bit values.
10030 627 : if (AddeSubeNode->getOpcode() == ARMISD::ADDE &&
10031 627 : AddcSubcOp0->getOpcode() != ISD::UMUL_LOHI &&
10032 418 : AddcSubcOp0->getOpcode() != ISD::SMUL_LOHI &&
10033 1085 : AddcSubcOp1->getOpcode() != ISD::UMUL_LOHI &&
10034 : AddcSubcOp1->getOpcode() != ISD::SMUL_LOHI)
10035 398 : return AddCombineTo64BitSMLAL16(AddcSubcNode, AddeSubeNode, DCI, Subtarget);
10036 :
10037 : // Check for the triangle shape.
10038 269 : SDValue AddeSubeOp0 = AddeSubeNode->getOperand(0);
10039 269 : SDValue AddeSubeOp1 = AddeSubeNode->getOperand(1);
10040 :
10041 : // Make sure that the ADDE/SUBE operands are not coming from the same node.
10042 269 : if (AddeSubeOp0.getNode() == AddeSubeOp1.getNode())
10043 119 : return SDValue();
10044 :
10045 : // Find the MUL_LOHI node walking up ADDE/SUBE's operands.
10046 : bool IsLeftOperandMUL = false;
10047 : SDValue MULOp = findMUL_LOHI(AddeSubeOp0);
10048 : if (MULOp == SDValue())
10049 : MULOp = findMUL_LOHI(AddeSubeOp1);
10050 : else
10051 : IsLeftOperandMUL = true;
10052 : if (MULOp == SDValue())
10053 8 : return SDValue();
10054 :
10055 : // Figure out the right opcode.
10056 142 : unsigned Opc = MULOp->getOpcode();
10057 142 : unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL;
10058 :
10059 : // Figure out the high and low input values to the MLAL node.
10060 : SDValue *HiAddSub = nullptr;
10061 : SDValue *LoMul = nullptr;
10062 : SDValue *LowAddSub = nullptr;
10063 :
10064 : // Ensure that ADDE/SUBE is from high result of ISD::xMUL_LOHI.
10065 : if ((AddeSubeOp0 != MULOp.getValue(1)) && (AddeSubeOp1 != MULOp.getValue(1)))
10066 10 : return SDValue();
10067 :
10068 132 : if (IsLeftOperandMUL)
10069 : HiAddSub = &AddeSubeOp1;
10070 : else
10071 : HiAddSub = &AddeSubeOp0;
10072 :
10073 : // Ensure that LoMul and LowAddSub are taken from correct ISD::SMUL_LOHI node
10074 : // whose low result is fed to the ADDC/SUBC we are checking.
10075 :
10076 : if (AddcSubcOp0 == MULOp.getValue(0)) {
10077 : LoMul = &AddcSubcOp0;
10078 : LowAddSub = &AddcSubcOp1;
10079 : }
10080 : if (AddcSubcOp1 == MULOp.getValue(0)) {
10081 : LoMul = &AddcSubcOp1;
10082 : LowAddSub = &AddcSubcOp0;
10083 : }
10084 :
10085 132 : if (!LoMul)
10086 20 : return SDValue();
10087 :
10088 : // If HiAddSub is the same node as ADDC/SUBC or is a predecessor of ADDC/SUBC
10089 : // the replacement below will create a cycle.
10090 223 : if (AddcSubcNode == HiAddSub->getNode() ||
10091 : AddcSubcNode->isPredecessorOf(HiAddSub->getNode()))
10092 2 : return SDValue();
10093 :
10094 : // Create the merged node.
10095 110 : SelectionDAG &DAG = DCI.DAG;
10096 :
10097 : // Start building operand list.
10098 : SmallVector<SDValue, 8> Ops;
10099 220 : Ops.push_back(LoMul->getOperand(0));
10100 220 : Ops.push_back(LoMul->getOperand(1));
10101 :
10102 : // Check whether we can use SMMLAR, SMMLSR or SMMULR instead. For this to be
10103 : // the case, we must be doing signed multiplication and only use the higher
10104 : // part of the result of the MLAL, furthermore the LowAddSub must be a constant
10105 : // addition or subtraction with the value of 0x800000.
10106 87 : if (Subtarget->hasV6Ops() && Subtarget->hasDSP() && Subtarget->useMulOps() &&
10107 44 : FinalOpc == ARMISD::SMLAL && !AddeSubeNode->hasAnyUseOfValue(1) &&
10108 168 : LowAddSub->getNode()->getOpcode() == ISD::Constant &&
10109 19 : static_cast<ConstantSDNode *>(LowAddSub->getNode())->getZExtValue() ==
10110 : 0x80000000) {
10111 12 : Ops.push_back(*HiAddSub);
10112 12 : if (AddcSubcNode->getOpcode() == ARMISD::SUBC) {
10113 : FinalOpc = ARMISD::SMMLSR;
10114 : } else {
10115 : FinalOpc = ARMISD::SMMLAR;
10116 : }
10117 24 : SDValue NewNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode), MVT::i32, Ops);
10118 12 : DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), NewNode);
10119 :
10120 12 : return SDValue(AddeSubeNode, 0);
10121 98 : } else if (AddcSubcNode->getOpcode() == ARMISD::SUBC)
10122 : // SMMLS is generated during instruction selection and the rest of this
10123 : // function can not handle the case where AddcSubcNode is a SUBC.
10124 18 : return SDValue();
10125 :
10126 : // Finish building the operand list for {U/S}MLAL
10127 80 : Ops.push_back(*LowAddSub);
10128 80 : Ops.push_back(*HiAddSub);
10129 :
10130 80 : SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode),
10131 160 : DAG.getVTList(MVT::i32, MVT::i32), Ops);
10132 :
10133 : // Replace the ADDs' nodes uses by the MLA node's values.
10134 : SDValue HiMLALResult(MLALNode.getNode(), 1);
10135 80 : DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), HiMLALResult);
10136 :
10137 : SDValue LoMLALResult(MLALNode.getNode(), 0);
10138 80 : DAG.ReplaceAllUsesOfValueWith(SDValue(AddcSubcNode, 0), LoMLALResult);
10139 :
10140 : // Return original node to notify the driver to stop replacing.
10141 80 : return SDValue(AddeSubeNode, 0);
10142 : }
10143 :
10144 1422 : static SDValue AddCombineTo64bitUMAAL(SDNode *AddeNode,
10145 : TargetLowering::DAGCombinerInfo &DCI,
10146 : const ARMSubtarget *Subtarget) {
10147 : // UMAAL is similar to UMLAL except that it adds two unsigned values.
10148 : // While trying to combine for the other MLAL nodes, first search for the
10149 : // chance to use UMAAL. Check if Addc uses a node which has already
10150 : // been combined into a UMLAL. The other pattern is UMLAL using Addc/Adde
10151 : // as the addend, and it's handled in PerformUMLALCombine.
10152 :
10153 1422 : if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
10154 616 : return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
10155 :
10156 : // Check that we have a glued ADDC node.
10157 806 : SDNode* AddcNode = AddeNode->getOperand(2).getNode();
10158 806 : if (AddcNode->getOpcode() != ARMISD::ADDC)
10159 358 : return SDValue();
10160 :
10161 : // Find the converted UMAAL or quit if it doesn't exist.
10162 : SDNode *UmlalNode = nullptr;
10163 : SDValue AddHi;
10164 896 : if (AddcNode->getOperand(0).getOpcode() == ARMISD::UMLAL) {
10165 : UmlalNode = AddcNode->getOperand(0).getNode();
10166 6 : AddHi = AddcNode->getOperand(1);
10167 884 : } else if (AddcNode->getOperand(1).getOpcode() == ARMISD::UMLAL) {
10168 : UmlalNode = AddcNode->getOperand(1).getNode();
10169 0 : AddHi = AddcNode->getOperand(0);
10170 : } else {
10171 442 : return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
10172 : }
10173 :
10174 : // The ADDC should be glued to an ADDE node, which uses the same UMLAL as
10175 : // the ADDC as well as Zero.
10176 12 : if (!isNullConstant(UmlalNode->getOperand(3)))
10177 0 : return SDValue();
10178 :
10179 12 : if ((isNullConstant(AddeNode->getOperand(0)) &&
10180 6 : AddeNode->getOperand(1).getNode() == UmlalNode) ||
10181 12 : (AddeNode->getOperand(0).getNode() == UmlalNode &&
10182 6 : isNullConstant(AddeNode->getOperand(1)))) {
10183 6 : SelectionDAG &DAG = DCI.DAG;
10184 6 : SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1),
10185 6 : UmlalNode->getOperand(2), AddHi };
10186 6 : SDValue UMAAL = DAG.getNode(ARMISD::UMAAL, SDLoc(AddcNode),
10187 12 : DAG.getVTList(MVT::i32, MVT::i32), Ops);
10188 :
10189 : // Replace the ADDs' nodes uses by the UMAAL node's values.
10190 6 : DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), SDValue(UMAAL.getNode(), 1));
10191 6 : DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), SDValue(UMAAL.getNode(), 0));
10192 :
10193 : // Return original node to notify the driver to stop replacing.
10194 6 : return SDValue(AddeNode, 0);
10195 : }
10196 0 : return SDValue();
10197 : }
10198 :
10199 44 : static SDValue PerformUMLALCombine(SDNode *N, SelectionDAG &DAG,
10200 : const ARMSubtarget *Subtarget) {
10201 44 : if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
10202 16 : return SDValue();
10203 :
10204 : // Check that we have a pair of ADDC and ADDE as operands.
10205 : // Both addends of the ADDE must be zero.
10206 28 : SDNode* AddcNode = N->getOperand(2).getNode();
10207 28 : SDNode* AddeNode = N->getOperand(3).getNode();
10208 36 : if ((AddcNode->getOpcode() == ARMISD::ADDC) &&
10209 14 : (AddeNode->getOpcode() == ARMISD::ADDE) &&
10210 18 : isNullConstant(AddeNode->getOperand(0)) &&
10211 40 : isNullConstant(AddeNode->getOperand(1)) &&
10212 6 : (AddeNode->getOperand(2).getNode() == AddcNode))
10213 6 : return DAG.getNode(ARMISD::UMAAL, SDLoc(N),
10214 : DAG.getVTList(MVT::i32, MVT::i32),
10215 6 : {N->getOperand(0), N->getOperand(1),
10216 24 : AddcNode->getOperand(0), AddcNode->getOperand(1)});
10217 : else
10218 22 : return SDValue();
10219 : }
10220 :
10221 2289 : static SDValue PerformAddcSubcCombine(SDNode *N,
10222 : TargetLowering::DAGCombinerInfo &DCI,
10223 : const ARMSubtarget *Subtarget) {
10224 2289 : SelectionDAG &DAG(DCI.DAG);
10225 :
10226 2289 : if (N->getOpcode() == ARMISD::SUBC) {
10227 : // (SUBC (ADDE 0, 0, C), 1) -> C
10228 1436 : SDValue LHS = N->getOperand(0);
10229 1436 : SDValue RHS = N->getOperand(1);
10230 647 : if (LHS->getOpcode() == ARMISD::ADDE &&
10231 1941 : isNullConstant(LHS->getOperand(0)) &&
10232 3377 : isNullConstant(LHS->getOperand(1)) && isOneConstant(RHS)) {
10233 1294 : return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2));
10234 : }
10235 : }
10236 :
10237 1642 : if (Subtarget->isThumb1Only()) {
10238 404 : SDValue RHS = N->getOperand(1);
10239 : if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
10240 164 : int32_t imm = C->getSExtValue();
10241 164 : if (imm < 0 && imm > std::numeric_limits<int>::min()) {
10242 : SDLoc DL(N);
10243 9 : RHS = DAG.getConstant(-imm, DL, MVT::i32);
10244 9 : unsigned Opcode = (N->getOpcode() == ARMISD::ADDC) ? ARMISD::SUBC
10245 : : ARMISD::ADDC;
10246 27 : return DAG.getNode(Opcode, DL, N->getVTList(), N->getOperand(0), RHS);
10247 : }
10248 : }
10249 : }
10250 :
10251 1633 : return SDValue();
10252 : }
10253 :
10254 845 : static SDValue PerformAddeSubeCombine(SDNode *N,
10255 : TargetLowering::DAGCombinerInfo &DCI,
10256 : const ARMSubtarget *Subtarget) {
10257 845 : if (Subtarget->isThumb1Only()) {
10258 609 : SelectionDAG &DAG = DCI.DAG;
10259 609 : SDValue RHS = N->getOperand(1);
10260 : if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
10261 291 : int64_t imm = C->getSExtValue();
10262 291 : if (imm < 0) {
10263 : SDLoc DL(N);
10264 :
10265 : // The with-carry-in form matches bitwise not instead of the negation.
10266 : // Effectively, the inverse interpretation of the carry flag already
10267 : // accounts for part of the negation.
10268 9 : RHS = DAG.getConstant(~imm, DL, MVT::i32);
10269 :
10270 9 : unsigned Opcode = (N->getOpcode() == ARMISD::ADDE) ? ARMISD::SUBE
10271 : : ARMISD::ADDE;
10272 : return DAG.getNode(Opcode, DL, N->getVTList(),
10273 27 : N->getOperand(0), RHS, N->getOperand(2));
10274 : }
10275 : }
10276 236 : } else if (N->getOperand(1)->getOpcode() == ISD::SMUL_LOHI) {
10277 40 : return AddCombineTo64bitMLAL(N, DCI, Subtarget);
10278 : }
10279 796 : return SDValue();
10280 : }
10281 :
10282 : /// PerformADDECombine - Target-specific dag combine transform from
10283 : /// ARMISD::ADDC, ARMISD::ADDE, and ISD::MUL_LOHI to MLAL or
10284 : /// ARMISD::ADDC, ARMISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL
10285 1955 : static SDValue PerformADDECombine(SDNode *N,
10286 : TargetLowering::DAGCombinerInfo &DCI,
10287 : const ARMSubtarget *Subtarget) {
10288 : // Only ARM and Thumb2 support UMLAL/SMLAL.
10289 1955 : if (Subtarget->isThumb1Only())
10290 533 : return PerformAddeSubeCombine(N, DCI, Subtarget);
10291 :
10292 : // Only perform the checks after legalize when the pattern is available.
10293 1422 : if (DCI.isBeforeLegalize()) return SDValue();
10294 :
10295 1422 : return AddCombineTo64bitUMAAL(N, DCI, Subtarget);
10296 : }
10297 :
10298 : /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
10299 : /// operands N0 and N1. This is a helper for PerformADDCombine that is
10300 : /// called with the default operands, and if that fails, with commuted
10301 : /// operands.
10302 42152 : static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
10303 : TargetLowering::DAGCombinerInfo &DCI,
10304 : const ARMSubtarget *Subtarget){
10305 : // Attempt to create vpadd for this add.
10306 42152 : if (SDValue Result = AddCombineToVPADD(N, N0, N1, DCI, Subtarget))
10307 4 : return Result;
10308 :
10309 : // Attempt to create vpaddl for this add.
10310 42148 : if (SDValue Result = AddCombineVUZPToVPADDL(N, N0, N1, DCI, Subtarget))
10311 6 : return Result;
10312 42142 : if (SDValue Result = AddCombineBUILD_VECTORToVPADDL(N, N0, N1, DCI,
10313 42142 : Subtarget))
10314 2 : return Result;
10315 :
10316 : // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
10317 : if (N0.getNode()->hasOneUse())
10318 21422 : if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI))
10319 31 : return Result;
10320 42109 : return SDValue();
10321 : }
10322 :
10323 : bool
10324 114 : ARMTargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
10325 : CombineLevel Level) const {
10326 114 : if (Level == BeforeLegalizeTypes)
10327 : return true;
10328 :
10329 81 : if (Subtarget->isThumb() && Subtarget->isThumb1Only())
10330 : return true;
10331 :
10332 81 : if (N->getOpcode() != ISD::SHL)
10333 7 : return true;
10334 :
10335 : // Turn off commute-with-shift transform after legalization, so it doesn't
10336 : // conflict with PerformSHLSimplify. (We could try to detect when
10337 : // PerformSHLSimplify would trigger more precisely, but it isn't
10338 : // really necessary.)
10339 : return false;
10340 : }
10341 :
10342 29129 : static SDValue PerformSHLSimplify(SDNode *N,
10343 : TargetLowering::DAGCombinerInfo &DCI,
10344 : const ARMSubtarget *ST) {
10345 : // Allow the generic combiner to identify potential bswaps.
10346 29129 : if (DCI.isBeforeLegalize())
10347 11348 : return SDValue();
10348 :
10349 : // DAG combiner will fold:
10350 : // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
10351 : // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2
10352 : // Other code patterns that can be also be modified have the following form:
10353 : // b + ((a << 1) | 510)
10354 : // b + ((a << 1) & 510)
10355 : // b + ((a << 1) ^ 510)
10356 : // b + ((a << 1) + 510)
10357 :
10358 : // Many instructions can perform the shift for free, but it requires both
10359 : // the operands to be registers. If c1 << c2 is too large, a mov immediate
10360 : // instruction will needed. So, unfold back to the original pattern if:
10361 : // - if c1 and c2 are small enough that they don't require mov imms.
10362 : // - the user(s) of the node can perform an shl
10363 :
10364 : // No shifted operands for 16-bit instructions.
10365 17781 : if (ST->isThumb() && ST->isThumb1Only())
10366 1964 : return SDValue();
10367 :
10368 : // Check that all the users could perform the shl themselves.
10369 17452 : for (auto U : N->uses()) {
10370 31868 : switch(U->getOpcode()) {
10371 13445 : default:
10372 13445 : return SDValue();
10373 2489 : case ISD::SUB:
10374 : case ISD::ADD:
10375 : case ISD::AND:
10376 : case ISD::OR:
10377 : case ISD::XOR:
10378 : case ISD::SETCC:
10379 : case ARMISD::CMP:
10380 : // Check that the user isn't already using a constant because there
10381 : // aren't any instructions that support an immediate operand and a
10382 : // shifted operand.
10383 2489 : if (isa<ConstantSDNode>(U->getOperand(0)) ||
10384 : isa<ConstantSDNode>(U->getOperand(1)))
10385 759 : return SDValue();
10386 :
10387 : // Check that it's not already using a shift.
10388 1730 : if (U->getOperand(0).getOpcode() == ISD::SHL ||
10389 : U->getOperand(1).getOpcode() == ISD::SHL)
10390 95 : return SDValue();
10391 : break;
10392 : }
10393 : }
10394 :
10395 1518 : if (N->getOpcode() != ISD::ADD && N->getOpcode() != ISD::OR &&
10396 2160 : N->getOpcode() != ISD::XOR && N->getOpcode() != ISD::AND)
10397 0 : return SDValue();
10398 :
10399 3036 : if (N->getOperand(0).getOpcode() != ISD::SHL)
10400 1423 : return SDValue();
10401 :
10402 : SDValue SHL = N->getOperand(0);
10403 :
10404 : auto *C1ShlC2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
10405 : auto *C2 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
10406 95 : if (!C1ShlC2 || !C2)
10407 16 : return SDValue();
10408 :
10409 79 : APInt C2Int = C2->getAPIntValue();
10410 79 : APInt C1Int = C1ShlC2->getAPIntValue();
10411 :
10412 : // Check that performing a lshr will not lose any information.
10413 : APInt Mask = APInt::getHighBitsSet(C2Int.getBitWidth(),
10414 158 : C2Int.getBitWidth() - C2->getZExtValue());
10415 79 : if ((C1Int & Mask) != C1Int)
10416 3 : return SDValue();
10417 :
10418 : // Shift the first constant.
10419 76 : C1Int.lshrInPlace(C2Int);
10420 :
10421 : // The immediates are encoded as an 8-bit value that can be rotated.
10422 : auto LargeImm = [](const APInt &Imm) {
10423 : unsigned Zeros = Imm.countLeadingZeros() + Imm.countTrailingZeros();
10424 : return Imm.getBitWidth() - Zeros > 8;
10425 : };
10426 :
10427 76 : if (LargeImm(C1Int) || LargeImm(C2Int))
10428 5 : return SDValue();
10429 :
10430 71 : SelectionDAG &DAG = DCI.DAG;
10431 : SDLoc dl(N);
10432 71 : SDValue X = SHL.getOperand(0);
10433 : SDValue BinOp = DAG.getNode(N->getOpcode(), dl, MVT::i32, X,
10434 71 : DAG.getConstant(C1Int, dl, MVT::i32));
10435 : // Shift left to compensate for the lshr of C1Int.
10436 71 : SDValue Res = DAG.getNode(ISD::SHL, dl, MVT::i32, BinOp, SHL.getOperand(1));
10437 :
10438 : LLVM_DEBUG(dbgs() << "Simplify shl use:\n"; SHL.getOperand(0).dump();
10439 : SHL.dump(); N->dump());
10440 : LLVM_DEBUG(dbgs() << "Into:\n"; X.dump(); BinOp.dump(); Res.dump());
10441 71 : return Res;
10442 : }
10443 :
10444 :
10445 : /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
10446 : ///
10447 21114 : static SDValue PerformADDCombine(SDNode *N,
10448 : TargetLowering::DAGCombinerInfo &DCI,
10449 : const ARMSubtarget *Subtarget) {
10450 21114 : SDValue N0 = N->getOperand(0);
10451 21114 : SDValue N1 = N->getOperand(1);
10452 :
10453 : // Only works one way, because it needs an immediate operand.
10454 21114 : if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
10455 28 : return Result;
10456 :
10457 : // First try with the default operand order.
10458 21086 : if (SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget))
10459 20 : return Result;
10460 :
10461 : // If that didn't work, try again with the operands commuted.
10462 21066 : return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget);
10463 : }
10464 :
10465 : /// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
10466 : ///
10467 1528 : static SDValue PerformSUBCombine(SDNode *N,
10468 : TargetLowering::DAGCombinerInfo &DCI) {
10469 1528 : SDValue N0 = N->getOperand(0);
10470 1528 : SDValue N1 = N->getOperand(1);
10471 :
10472 : // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
10473 : if (N1.getNode()->hasOneUse())
10474 1254 : if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI))
10475 7 : return Result;
10476 :
10477 1521 : return SDValue();
10478 : }
10479 :
10480 : /// PerformVMULCombine
10481 : /// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the
10482 : /// special multiplier accumulator forwarding.
10483 : /// vmul d3, d0, d2
10484 : /// vmla d3, d1, d2
10485 : /// is faster than
10486 : /// vadd d3, d0, d1
10487 : /// vmul d3, d3, d2
10488 : // However, for (A + B) * (A + B),
10489 : // vadd d2, d0, d1
10490 : // vmul d3, d0, d2
10491 : // vmla d3, d1, d2
10492 : // is slower than
10493 : // vadd d2, d0, d1
10494 : // vmul d3, d2, d2
10495 0 : static SDValue PerformVMULCombine(SDNode *N,
10496 : TargetLowering::DAGCombinerInfo &DCI,
10497 : const ARMSubtarget *Subtarget) {
10498 0 : if (!Subtarget->hasVMLxForwarding())
10499 0 : return SDValue();
10500 :
10501 0 : SelectionDAG &DAG = DCI.DAG;
10502 0 : SDValue N0 = N->getOperand(0);
10503 0 : SDValue N1 = N->getOperand(1);
10504 : unsigned Opcode = N0.getOpcode();
10505 0 : if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
10506 0 : Opcode != ISD::FADD && Opcode != ISD::FSUB) {
10507 : Opcode = N1.getOpcode();
10508 0 : if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
10509 0 : Opcode != ISD::FADD && Opcode != ISD::FSUB)
10510 0 : return SDValue();
10511 : std::swap(N0, N1);
10512 : }
10513 :
10514 : if (N0 == N1)
10515 0 : return SDValue();
10516 :
10517 0 : EVT VT = N->getValueType(0);
10518 : SDLoc DL(N);
10519 0 : SDValue N00 = N0->getOperand(0);
10520 0 : SDValue N01 = N0->getOperand(1);
10521 : return DAG.getNode(Opcode, DL, VT,
10522 : DAG.getNode(ISD::MUL, DL, VT, N00, N1),
10523 0 : DAG.getNode(ISD::MUL, DL, VT, N01, N1));
10524 : }
10525 :
10526 1753 : static SDValue PerformMULCombine(SDNode *N,
10527 : TargetLowering::DAGCombinerInfo &DCI,
10528 : const ARMSubtarget *Subtarget) {
10529 1753 : SelectionDAG &DAG = DCI.DAG;
10530 :
10531 1753 : if (Subtarget->isThumb1Only())
10532 183 : return SDValue();
10533 :
10534 1570 : if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
10535 961 : return SDValue();
10536 :
10537 609 : EVT VT = N->getValueType(0);
10538 609 : if (VT.is64BitVector() || VT.is128BitVector())
10539 108 : return PerformVMULCombine(N, DCI, Subtarget);
10540 : if (VT != MVT::i32)
10541 0 : return SDValue();
10542 :
10543 501 : ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
10544 : if (!C)
10545 338 : return SDValue();
10546 :
10547 163 : int64_t MulAmt = C->getSExtValue();
10548 163 : unsigned ShiftAmt = countTrailingZeros<uint64_t>(MulAmt);
10549 :
10550 163 : ShiftAmt = ShiftAmt & (32 - 1);
10551 163 : SDValue V = N->getOperand(0);
10552 : SDLoc DL(N);
10553 :
10554 163 : SDValue Res;
10555 163 : MulAmt >>= ShiftAmt;
10556 :
10557 163 : if (MulAmt >= 0) {
10558 149 : if (isPowerOf2_32(MulAmt - 1)) {
10559 : // (mul x, 2^N + 1) => (add (shl x, N), x)
10560 53 : Res = DAG.getNode(ISD::ADD, DL, VT,
10561 : V,
10562 : DAG.getNode(ISD::SHL, DL, VT,
10563 : V,
10564 : DAG.getConstant(Log2_32(MulAmt - 1), DL,
10565 53 : MVT::i32)));
10566 96 : } else if (isPowerOf2_32(MulAmt + 1)) {
10567 : // (mul x, 2^N - 1) => (sub (shl x, N), x)
10568 6 : Res = DAG.getNode(ISD::SUB, DL, VT,
10569 : DAG.getNode(ISD::SHL, DL, VT,
10570 : V,
10571 : DAG.getConstant(Log2_32(MulAmt + 1), DL,
10572 : MVT::i32)),
10573 6 : V);
10574 : } else
10575 90 : return SDValue();
10576 : } else {
10577 14 : uint64_t MulAmtAbs = -MulAmt;
10578 14 : if (isPowerOf2_32(MulAmtAbs + 1)) {
10579 : // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
10580 6 : Res = DAG.getNode(ISD::SUB, DL, VT,
10581 : V,
10582 : DAG.getNode(ISD::SHL, DL, VT,
10583 : V,
10584 : DAG.getConstant(Log2_32(MulAmtAbs + 1), DL,
10585 6 : MVT::i32)));
10586 8 : } else if (isPowerOf2_32(MulAmtAbs - 1)) {
10587 : // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
10588 2 : Res = DAG.getNode(ISD::ADD, DL, VT,
10589 : V,
10590 : DAG.getNode(ISD::SHL, DL, VT,
10591 : V,
10592 : DAG.getConstant(Log2_32(MulAmtAbs - 1), DL,
10593 2 : MVT::i32)));
10594 2 : Res = DAG.getNode(ISD::SUB, DL, VT,
10595 2 : DAG.getConstant(0, DL, MVT::i32), Res);
10596 : } else
10597 6 : return SDValue();
10598 : }
10599 :
10600 67 : if (ShiftAmt != 0)
10601 38 : Res = DAG.getNode(ISD::SHL, DL, VT,
10602 38 : Res, DAG.getConstant(ShiftAmt, DL, MVT::i32));
10603 :
10604 : // Do not add new nodes to DAG combiner worklist.
10605 67 : DCI.CombineTo(N, Res, false);
10606 67 : return SDValue();
10607 : }
10608 :
10609 0 : static SDValue CombineANDShift(SDNode *N,
10610 : TargetLowering::DAGCombinerInfo &DCI,
10611 : const ARMSubtarget *Subtarget) {
10612 : // Allow DAGCombine to pattern-match before we touch the canonical form.
10613 0 : if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
10614 0 : return SDValue();
10615 :
10616 0 : if (N->getValueType(0) != MVT::i32)
10617 0 : return SDValue();
10618 :
10619 0 : ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
10620 : if (!N1C)
10621 0 : return SDValue();
10622 :
10623 0 : uint32_t C1 = (uint32_t)N1C->getZExtValue();
10624 : // Don't transform uxtb/uxth.
10625 0 : if (C1 == 255 || C1 == 65535)
10626 0 : return SDValue();
10627 :
10628 0 : SDNode *N0 = N->getOperand(0).getNode();
10629 : if (!N0->hasOneUse())
10630 0 : return SDValue();
10631 :
10632 0 : if (N0->getOpcode() != ISD::SHL && N0->getOpcode() != ISD::SRL)
10633 0 : return SDValue();
10634 :
10635 : bool LeftShift = N0->getOpcode() == ISD::SHL;
10636 :
10637 0 : ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
10638 : if (!N01C)
10639 0 : return SDValue();
10640 :
10641 0 : uint32_t C2 = (uint32_t)N01C->getZExtValue();
10642 0 : if (!C2 || C2 >= 32)
10643 0 : return SDValue();
10644 :
10645 0 : SelectionDAG &DAG = DCI.DAG;
10646 : SDLoc DL(N);
10647 :
10648 : // We have a pattern of the form "(and (shl x, c2) c1)" or
10649 : // "(and (srl x, c2) c1)", where c1 is a shifted mask. Try to
10650 : // transform to a pair of shifts, to save materializing c1.
10651 :
10652 : // First pattern: right shift, and c1+1 is a power of two.
10653 : // FIXME: Also check reversed pattern (left shift, and ~c1+1 is a power
10654 : // of two).
10655 : // FIXME: Use demanded bits?
10656 0 : if (!LeftShift && isMask_32(C1)) {
10657 0 : uint32_t C3 = countLeadingZeros(C1);
10658 0 : if (C2 < C3) {
10659 0 : SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
10660 0 : DAG.getConstant(C3 - C2, DL, MVT::i32));
10661 : return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
10662 0 : DAG.getConstant(C3, DL, MVT::i32));
10663 : }
10664 : }
10665 :
10666 : // Second pattern: left shift, and (c1>>c2)+1 is a power of two.
10667 : // FIXME: Also check reversed pattern (right shift, and ~(c1<<c2)+1
10668 : // is a power of two).
10669 : // FIXME: Use demanded bits?
10670 0 : if (LeftShift && isShiftedMask_32(C1)) {
10671 0 : uint32_t C3 = countLeadingZeros(C1);
10672 0 : if (C2 + C3 < 32 && C1 == ((-1U << (C2 + C3)) >> C3)) {
10673 0 : SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
10674 0 : DAG.getConstant(C2 + C3, DL, MVT::i32));
10675 : return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
10676 0 : DAG.getConstant(C3, DL, MVT::i32));
10677 : }
10678 : }
10679 :
10680 : // FIXME: Transform "(and (shl x, c2) c1)" ->
10681 : // "(shl (and x, c1>>c2), c2)" if "c1 >> c2" is a cheaper immediate than
10682 : // c1.
10683 0 : return SDValue();
10684 : }
10685 :
10686 5014 : static SDValue PerformANDCombine(SDNode *N,
10687 : TargetLowering::DAGCombinerInfo &DCI,
10688 : const ARMSubtarget *Subtarget) {
10689 : // Attempt to use immediate-form VBIC
10690 5014 : BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
10691 : SDLoc dl(N);
10692 5014 : EVT VT = N->getValueType(0);
10693 5014 : SelectionDAG &DAG = DCI.DAG;
10694 :
10695 5014 : if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
10696 131 : return SDValue();
10697 :
10698 : APInt SplatBits, SplatUndef;
10699 : unsigned SplatBitSize;
10700 : bool HasAnyUndefs;
10701 4927 : if (BVN &&
10702 44 : BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
10703 27 : if (SplatBitSize <= 64) {
10704 27 : EVT VbicVT;
10705 27 : SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(),
10706 : SplatUndef.getZExtValue(), SplatBitSize,
10707 27 : DAG, dl, VbicVT, VT.is128BitVector(),
10708 54 : OtherModImm);
10709 27 : if (Val.getNode()) {
10710 : SDValue Input =
10711 10 : DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0));
10712 5 : SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val);
10713 5 : return DAG.getNode(ISD::BITCAST, dl, VT, Vbic);
10714 : }
10715 : }
10716 : }
10717 :
10718 4878 : if (!Subtarget->isThumb1Only()) {
10719 : // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))
10720 4196 : if (SDValue Result = combineSelectAndUseCommutative(N, true, DCI))
10721 4 : return Result;
10722 :
10723 4192 : if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
10724 7 : return Result;
10725 : }
10726 :
10727 4867 : if (Subtarget->isThumb1Only())
10728 682 : if (SDValue Result = CombineANDShift(N, DCI, Subtarget))
10729 4 : return Result;
10730 :
10731 4863 : return SDValue();
10732 : }
10733 :
10734 : // Try combining OR nodes to SMULWB, SMULWT.
10735 0 : static SDValue PerformORCombineToSMULWBT(SDNode *OR,
10736 : TargetLowering::DAGCombinerInfo &DCI,
10737 : const ARMSubtarget *Subtarget) {
10738 0 : if (!Subtarget->hasV6Ops() ||
10739 0 : (Subtarget->isThumb() &&
10740 0 : (!Subtarget->hasThumb2() || !Subtarget->hasDSP())))
10741 0 : return SDValue();
10742 :
10743 0 : SDValue SRL = OR->getOperand(0);
10744 0 : SDValue SHL = OR->getOperand(1);
10745 :
10746 0 : if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) {
10747 0 : SRL = OR->getOperand(1);
10748 0 : SHL = OR->getOperand(0);
10749 : }
10750 0 : if (!isSRL16(SRL) || !isSHL16(SHL))
10751 0 : return SDValue();
10752 :
10753 : // The first operands to the shifts need to be the two results from the
10754 : // same smul_lohi node.
10755 0 : if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) ||
10756 : SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)
10757 0 : return SDValue();
10758 :
10759 : SDNode *SMULLOHI = SRL.getOperand(0).getNode();
10760 0 : if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) ||
10761 0 : SHL.getOperand(0) != SDValue(SMULLOHI, 1))
10762 0 : return SDValue();
10763 :
10764 : // Now we have:
10765 : // (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16)))
10766 : // For SMUL[B|T] smul_lohi will take a 32-bit and a 16-bit arguments.
10767 : // For SMUWB the 16-bit value will signed extended somehow.
10768 : // For SMULWT only the SRA is required.
10769 : // Check both sides of SMUL_LOHI
10770 0 : SDValue OpS16 = SMULLOHI->getOperand(0);
10771 0 : SDValue OpS32 = SMULLOHI->getOperand(1);
10772 :
10773 0 : SelectionDAG &DAG = DCI.DAG;
10774 0 : if (!isS16(OpS16, DAG) && !isSRA16(OpS16)) {
10775 0 : OpS16 = OpS32;
10776 0 : OpS32 = SMULLOHI->getOperand(0);
10777 : }
10778 :
10779 : SDLoc dl(OR);
10780 : unsigned Opcode = 0;
10781 0 : if (isS16(OpS16, DAG))
10782 : Opcode = ARMISD::SMULWB;
10783 0 : else if (isSRA16(OpS16)) {
10784 : Opcode = ARMISD::SMULWT;
10785 0 : OpS16 = OpS16->getOperand(0);
10786 : }
10787 : else
10788 0 : return SDValue();
10789 :
10790 0 : SDValue Res = DAG.getNode(Opcode, dl, MVT::i32, OpS32, OpS16);
10791 0 : DAG.ReplaceAllUsesOfValueWith(SDValue(OR, 0), Res);
10792 0 : return SDValue(OR, 0);
10793 : }
10794 :
10795 420 : static SDValue PerformORCombineToBFI(SDNode *N,
10796 : TargetLowering::DAGCombinerInfo &DCI,
10797 : const ARMSubtarget *Subtarget) {
10798 : // BFI is only available on V6T2+
10799 499 : if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
10800 287 : return SDValue();
10801 :
10802 133 : EVT VT = N->getValueType(0);
10803 133 : SDValue N0 = N->getOperand(0);
10804 133 : SDValue N1 = N->getOperand(1);
10805 133 : SelectionDAG &DAG = DCI.DAG;
10806 : SDLoc DL(N);
10807 : // 1) or (and A, mask), val => ARMbfi A, val, mask
10808 : // iff (val & mask) == val
10809 : //
10810 : // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
10811 : // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
10812 : // && mask == ~mask2
10813 : // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
10814 : // && ~mask == mask2
10815 : // (i.e., copy a bitfield value into another bitfield of the same width)
10816 :
10817 : if (VT != MVT::i32)
10818 18 : return SDValue();
10819 :
10820 115 : SDValue N00 = N0.getOperand(0);
10821 :
10822 : // The value and the mask need to be constants so we can verify this is
10823 : // actually a bitfield set. If the mask is 0xffff, we can do better
10824 : // via a movt instruction, so don't use BFI in that case.
10825 115 : SDValue MaskOp = N0.getOperand(1);
10826 : ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp);
10827 : if (!MaskC)
10828 16 : return SDValue();
10829 99 : unsigned Mask = MaskC->getZExtValue();
10830 99 : if (Mask == 0xffff)
10831 21 : return SDValue();
10832 78 : SDValue Res;
10833 : // Case (1): or (and A, mask), val => ARMbfi A, val, mask
10834 : ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
10835 : if (N1C) {
10836 22 : unsigned Val = N1C->getZExtValue();
10837 22 : if ((Val & ~Mask) != Val)
10838 0 : return SDValue();
10839 :
10840 22 : if (ARM::isBitFieldInvertedMask(Mask)) {
10841 8 : Val >>= countTrailingZeros(~Mask);
10842 :
10843 8 : Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
10844 : DAG.getConstant(Val, DL, MVT::i32),
10845 8 : DAG.getConstant(Mask, DL, MVT::i32));
10846 :
10847 8 : DCI.CombineTo(N, Res, false);
10848 : // Return value from the original node to inform the combiner than N is
10849 : // now dead.
10850 8 : return SDValue(N, 0);
10851 : }
10852 56 : } else if (N1.getOpcode() == ISD::AND) {
10853 : // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
10854 : ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
10855 : if (!N11C)
10856 0 : return SDValue();
10857 40 : unsigned Mask2 = N11C->getZExtValue();
10858 :
10859 : // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern
10860 : // as is to match.
10861 40 : if (ARM::isBitFieldInvertedMask(Mask) &&
10862 23 : (Mask == ~Mask2)) {
10863 : // The pack halfword instruction works better for masks that fit it,
10864 : // so use that when it's available.
10865 19 : if (Subtarget->hasDSP() &&
10866 15 : (Mask == 0xffff || Mask == 0xffff0000))
10867 4 : return SDValue();
10868 : // 2a
10869 : unsigned amt = countTrailingZeros(Mask2);
10870 15 : Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
10871 15 : DAG.getConstant(amt, DL, MVT::i32));
10872 15 : Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
10873 15 : DAG.getConstant(Mask, DL, MVT::i32));
10874 15 : DCI.CombineTo(N, Res, false);
10875 : // Return value from the original node to inform the combiner than N is
10876 : // now dead.
10877 15 : return SDValue(N, 0);
10878 21 : } else if (ARM::isBitFieldInvertedMask(~Mask) &&
10879 : (~Mask == Mask2)) {
10880 : // The pack halfword instruction works better for masks that fit it,
10881 : // so use that when it's available.
10882 4 : if (Subtarget->hasDSP() &&
10883 0 : (Mask2 == 0xffff || Mask2 == 0xffff0000))
10884 0 : return SDValue();
10885 : // 2b
10886 : unsigned lsb = countTrailingZeros(Mask);
10887 4 : Res = DAG.getNode(ISD::SRL, DL, VT, N00,
10888 4 : DAG.getConstant(lsb, DL, MVT::i32));
10889 4 : Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
10890 4 : DAG.getConstant(Mask2, DL, MVT::i32));
10891 4 : DCI.CombineTo(N, Res, false);
10892 : // Return value from the original node to inform the combiner than N is
10893 : // now dead.
10894 4 : return SDValue(N, 0);
10895 : }
10896 : }
10897 :
10898 131 : if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&
10899 54 : N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&
10900 7 : ARM::isBitFieldInvertedMask(~Mask)) {
10901 : // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
10902 : // where lsb(mask) == #shamt and masked bits of B are known zero.
10903 5 : SDValue ShAmt = N00.getOperand(1);
10904 10 : unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue();
10905 5 : unsigned LSB = countTrailingZeros(Mask);
10906 5 : if (ShAmtC != LSB)
10907 2 : return SDValue();
10908 :
10909 3 : Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),
10910 3 : DAG.getConstant(~Mask, DL, MVT::i32));
10911 :
10912 3 : DCI.CombineTo(N, Res, false);
10913 : // Return value from the original node to inform the combiner than N is
10914 : // now dead.
10915 3 : return SDValue(N, 0);
10916 : }
10917 :
10918 42 : return SDValue();
10919 : }
10920 :
10921 : /// PerformORCombine - Target-specific dag combine xforms for ISD::OR
10922 2981 : static SDValue PerformORCombine(SDNode *N,
10923 : TargetLowering::DAGCombinerInfo &DCI,
10924 : const ARMSubtarget *Subtarget) {
10925 : // Attempt to use immediate-form VORR
10926 2981 : BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
10927 : SDLoc dl(N);
10928 2981 : EVT VT = N->getValueType(0);
10929 2981 : SelectionDAG &DAG = DCI.DAG;
10930 :
10931 2981 : if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
10932 231 : return SDValue();
10933 :
10934 : APInt SplatBits, SplatUndef;
10935 : unsigned SplatBitSize;
10936 : bool HasAnyUndefs;
10937 2760 : if (BVN && Subtarget->hasNEON() &&
10938 10 : BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
10939 10 : if (SplatBitSize <= 64) {
10940 9 : EVT VorrVT;
10941 : SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
10942 : SplatUndef.getZExtValue(), SplatBitSize,
10943 9 : DAG, dl, VorrVT, VT.is128BitVector(),
10944 18 : OtherModImm);
10945 9 : if (Val.getNode()) {
10946 : SDValue Input =
10947 16 : DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
10948 8 : SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
10949 8 : return DAG.getNode(ISD::BITCAST, dl, VT, Vorr);
10950 : }
10951 : }
10952 : }
10953 :
10954 2742 : if (!Subtarget->isThumb1Only()) {
10955 : // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
10956 2162 : if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
10957 8 : return Result;
10958 2154 : if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget))
10959 24 : return Result;
10960 : }
10961 :
10962 2710 : SDValue N0 = N->getOperand(0);
10963 2710 : SDValue N1 = N->getOperand(1);
10964 :
10965 : // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
10966 2842 : if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&
10967 116 : DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
10968 :
10969 : // The code below optimizes (or (and X, Y), Z).
10970 : // The AND operand needs to have a single user to make these optimizations
10971 : // profitable.
10972 225 : if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
10973 7 : return SDValue();
10974 :
10975 : APInt SplatUndef;
10976 : unsigned SplatBitSize;
10977 : bool HasAnyUndefs;
10978 :
10979 : APInt SplatBits0, SplatBits1;
10980 109 : BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
10981 109 : BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
10982 : // Ensure that the second operand of both ands are constants
10983 9 : if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
10984 118 : HasAnyUndefs) && !HasAnyUndefs) {
10985 9 : if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
10986 18 : HasAnyUndefs) && !HasAnyUndefs) {
10987 : // Ensure that the bit width of the constants are the same and that
10988 : // the splat arguments are logical inverses as per the pattern we
10989 : // are trying to simplify.
10990 17 : if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() &&
10991 25 : SplatBits0 == ~SplatBits1) {
10992 : // Canonicalize the vector type to make instruction selection
10993 : // simpler.
10994 8 : EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
10995 : SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT,
10996 : N0->getOperand(1),
10997 8 : N0->getOperand(0),
10998 16 : N1->getOperand(0));
10999 8 : return DAG.getNode(ISD::BITCAST, dl, VT, Result);
11000 : }
11001 : }
11002 : }
11003 : }
11004 :
11005 : // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
11006 : // reasonable.
11007 3145 : if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
11008 420 : if (SDValue Res = PerformORCombineToBFI(N, DCI, Subtarget))
11009 30 : return Res;
11010 : }
11011 :
11012 2665 : if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
11013 30 : return Result;
11014 :
11015 2635 : return SDValue();
11016 : }
11017 :
11018 1514 : static SDValue PerformXORCombine(SDNode *N,
11019 : TargetLowering::DAGCombinerInfo &DCI,
11020 : const ARMSubtarget *Subtarget) {
11021 1514 : EVT VT = N->getValueType(0);
11022 1514 : SelectionDAG &DAG = DCI.DAG;
11023 :
11024 1514 : if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
11025 67 : return SDValue();
11026 :
11027 1447 : if (!Subtarget->isThumb1Only()) {
11028 : // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
11029 1160 : if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
11030 2 : return Result;
11031 :
11032 1158 : if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
11033 6 : return Result;
11034 : }
11035 :
11036 1439 : return SDValue();
11037 : }
11038 :
11039 : // ParseBFI - given a BFI instruction in N, extract the "from" value (Rn) and return it,
11040 : // and fill in FromMask and ToMask with (consecutive) bits in "from" to be extracted and
11041 : // their position in "to" (Rd).
11042 22 : static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask) {
11043 : assert(N->getOpcode() == ARMISD::BFI);
11044 :
11045 22 : SDValue From = N->getOperand(1);
11046 44 : ToMask = ~cast<ConstantSDNode>(N->getOperand(2))->getAPIntValue();
11047 22 : FromMask = APInt::getLowBitsSet(ToMask.getBitWidth(), ToMask.countPopulation());
11048 :
11049 : // If the Base came from a SHR #C, we can deduce that it is really testing bit
11050 : // #C in the base of the SHR.
11051 22 : if (From->getOpcode() == ISD::SRL &&
11052 14 : isa<ConstantSDNode>(From->getOperand(1))) {
11053 14 : APInt Shift = cast<ConstantSDNode>(From->getOperand(1))->getAPIntValue();
11054 : assert(Shift.getLimitedValue() < 32 && "Shift too large!");
11055 14 : FromMask <<= Shift.getLimitedValue(31);
11056 14 : From = From->getOperand(0);
11057 : }
11058 :
11059 22 : return From;
11060 : }
11061 :
11062 : // If A and B contain one contiguous set of bits, does A | B == A . B?
11063 : //
11064 : // Neither A nor B must be zero.
11065 11 : static bool BitsProperlyConcatenate(const APInt &A, const APInt &B) {
11066 11 : unsigned LastActiveBitInA = A.countTrailingZeros();
11067 11 : unsigned FirstActiveBitInB = B.getBitWidth() - B.countLeadingZeros() - 1;
11068 11 : return LastActiveBitInA - 1 == FirstActiveBitInB;
11069 : }
11070 :
11071 8 : static SDValue FindBFIToCombineWith(SDNode *N) {
11072 : // We have a BFI in N. Follow a possible chain of BFIs and find a BFI it can combine with,
11073 : // if one exists.
11074 : APInt ToMask, FromMask;
11075 8 : SDValue From = ParseBFI(N, ToMask, FromMask);
11076 8 : SDValue To = N->getOperand(0);
11077 :
11078 : // Now check for a compatible BFI to merge with. We can pass through BFIs that
11079 : // aren't compatible, but not if they set the same bit in their destination as
11080 : // we do (or that of any BFI we're going to combine with).
11081 : SDValue V = To;
11082 : APInt CombinedToMask = ToMask;
11083 13 : while (V.getOpcode() == ARMISD::BFI) {
11084 : APInt NewToMask, NewFromMask;
11085 8 : SDValue NewFrom = ParseBFI(V.getNode(), NewToMask, NewFromMask);
11086 : if (NewFrom != From) {
11087 : // This BFI has a different base. Keep going.
11088 : CombinedToMask |= NewToMask;
11089 4 : V = V.getOperand(0);
11090 : continue;
11091 : }
11092 :
11093 : // Do the written bits conflict with any we've seen so far?
11094 4 : if ((NewToMask & CombinedToMask).getBoolValue())
11095 : // Conflicting bits - bail out because going further is unsafe.
11096 0 : return SDValue();
11097 :
11098 : // Are the new bits contiguous when combined with the old bits?
11099 4 : if (BitsProperlyConcatenate(ToMask, NewToMask) &&
11100 2 : BitsProperlyConcatenate(FromMask, NewFromMask))
11101 1 : return V;
11102 3 : if (BitsProperlyConcatenate(NewToMask, ToMask) &&
11103 2 : BitsProperlyConcatenate(NewFromMask, FromMask))
11104 2 : return V;
11105 :
11106 : // We've seen a write to some bits, so track it.
11107 : CombinedToMask |= NewToMask;
11108 : // Keep going...
11109 1 : V = V.getOperand(0);
11110 : }
11111 :
11112 5 : return SDValue();
11113 : }
11114 :
11115 0 : static SDValue PerformBFICombine(SDNode *N,
11116 : TargetLowering::DAGCombinerInfo &DCI) {
11117 0 : SDValue N1 = N->getOperand(1);
11118 0 : if (N1.getOpcode() == ISD::AND) {
11119 : // (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
11120 : // the bits being cleared by the AND are not demanded by the BFI.
11121 : ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
11122 : if (!N11C)
11123 0 : return SDValue();
11124 0 : unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
11125 0 : unsigned LSB = countTrailingZeros(~InvMask);
11126 0 : unsigned Width = (32 - countLeadingZeros(~InvMask)) - LSB;
11127 : assert(Width <
11128 : static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
11129 : "undefined behavior");
11130 : unsigned Mask = (1u << Width) - 1;
11131 0 : unsigned Mask2 = N11C->getZExtValue();
11132 0 : if ((Mask & (~Mask2)) == 0)
11133 0 : return DCI.DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0),
11134 : N->getOperand(0), N1.getOperand(0),
11135 0 : N->getOperand(2));
11136 0 : } else if (N->getOperand(0).getOpcode() == ARMISD::BFI) {
11137 : // We have a BFI of a BFI. Walk up the BFI chain to see how long it goes.
11138 : // Keep track of any consecutive bits set that all come from the same base
11139 : // value. We can combine these together into a single BFI.
11140 0 : SDValue CombineBFI = FindBFIToCombineWith(N);
11141 0 : if (CombineBFI == SDValue())
11142 0 : return SDValue();
11143 :
11144 : // We've found a BFI.
11145 : APInt ToMask1, FromMask1;
11146 0 : SDValue From1 = ParseBFI(N, ToMask1, FromMask1);
11147 :
11148 : APInt ToMask2, FromMask2;
11149 0 : SDValue From2 = ParseBFI(CombineBFI.getNode(), ToMask2, FromMask2);
11150 : assert(From1 == From2);
11151 : (void)From2;
11152 :
11153 : // First, unlink CombineBFI.
11154 0 : DCI.DAG.ReplaceAllUsesWith(CombineBFI, CombineBFI.getOperand(0));
11155 : // Then create a new BFI, combining the two together.
11156 0 : APInt NewFromMask = FromMask1 | FromMask2;
11157 0 : APInt NewToMask = ToMask1 | ToMask2;
11158 :
11159 0 : EVT VT = N->getValueType(0);
11160 : SDLoc dl(N);
11161 :
11162 0 : if (NewFromMask[0] == 0)
11163 0 : From1 = DCI.DAG.getNode(
11164 : ISD::SRL, dl, VT, From1,
11165 0 : DCI.DAG.getConstant(NewFromMask.countTrailingZeros(), dl, VT));
11166 0 : return DCI.DAG.getNode(ARMISD::BFI, dl, VT, N->getOperand(0), From1,
11167 0 : DCI.DAG.getConstant(~NewToMask, dl, VT));
11168 : }
11169 0 : return SDValue();
11170 : }
11171 :
11172 : /// PerformVMOVRRDCombine - Target-specific dag combine xforms for
11173 : /// ARMISD::VMOVRRD.
11174 6799 : static SDValue PerformVMOVRRDCombine(SDNode *N,
11175 : TargetLowering::DAGCombinerInfo &DCI,
11176 : const ARMSubtarget *Subtarget) {
11177 : // vmovrrd(vmovdrr x, y) -> x,y
11178 6799 : SDValue InDouble = N->getOperand(0);
11179 6799 : if (InDouble.getOpcode() == ARMISD::VMOVDRR && !Subtarget->isFPOnlySP())
11180 110 : return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
11181 :
11182 : // vmovrrd(load f64) -> (load i32), (load i32)
11183 : SDNode *InNode = InDouble.getNode();
11184 : if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() &&
11185 54 : InNode->getValueType(0) == MVT::f64 &&
11186 54 : InNode->getOperand(1).getOpcode() == ISD::FrameIndex &&
11187 17 : !cast<LoadSDNode>(InNode)->isVolatile()) {
11188 : // TODO: Should this be done for non-FrameIndex operands?
11189 : LoadSDNode *LD = cast<LoadSDNode>(InNode);
11190 :
11191 17 : SelectionDAG &DAG = DCI.DAG;
11192 : SDLoc DL(LD);
11193 17 : SDValue BasePtr = LD->getBasePtr();
11194 : SDValue NewLD1 =
11195 17 : DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, LD->getPointerInfo(),
11196 17 : LD->getAlignment(), LD->getMemOperand()->getFlags());
11197 :
11198 : SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
11199 17 : DAG.getConstant(4, DL, MVT::i32));
11200 : SDValue NewLD2 = DAG.getLoad(
11201 17 : MVT::i32, DL, NewLD1.getValue(1), OffsetPtr, LD->getPointerInfo(),
11202 17 : std::min(4U, LD->getAlignment() / 2), LD->getMemOperand()->getFlags());
11203 :
11204 17 : DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
11205 17 : if (DCI.DAG.getDataLayout().isBigEndian())
11206 : std::swap (NewLD1, NewLD2);
11207 17 : SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
11208 17 : return Result;
11209 : }
11210 :
11211 6672 : return SDValue();
11212 : }
11213 :
11214 : /// PerformVMOVDRRCombine - Target-specific dag combine xforms for
11215 : /// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands.
11216 3484 : static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) {
11217 : // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)
11218 3484 : SDValue Op0 = N->getOperand(0);
11219 3484 : SDValue Op1 = N->getOperand(1);
11220 3484 : if (Op0.getOpcode() == ISD::BITCAST)
11221 5 : Op0 = Op0.getOperand(0);
11222 3484 : if (Op1.getOpcode() == ISD::BITCAST)
11223 8 : Op1 = Op1.getOperand(0);
11224 6 : if (Op0.getOpcode() == ARMISD::VMOVRRD &&
11225 0 : Op0.getNode() == Op1.getNode() &&
11226 3484 : Op0.getResNo() == 0 && Op1.getResNo() == 1)
11227 0 : return DAG.getNode(ISD::BITCAST, SDLoc(N),
11228 0 : N->getValueType(0), Op0.getOperand(0));
11229 3484 : return SDValue();
11230 : }
11231 :
11232 : /// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
11233 : /// are normal, non-volatile loads. If so, it is profitable to bitcast an
11234 : /// i64 vector to have f64 elements, since the value can then be loaded
11235 : /// directly into a VFP register.
11236 48 : static bool hasNormalLoadOperand(SDNode *N) {
11237 144 : unsigned NumElts = N->getValueType(0).getVectorNumElements();
11238 411 : for (unsigned i = 0; i < NumElts; ++i) {
11239 732 : SDNode *Elt = N->getOperand(i).getNode();
11240 3 : if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile())
11241 : return true;
11242 : }
11243 : return false;
11244 : }
11245 :
11246 : /// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
11247 : /// ISD::BUILD_VECTOR.
11248 0 : static SDValue PerformBUILD_VECTORCombine(SDNode *N,
11249 : TargetLowering::DAGCombinerInfo &DCI,
11250 : const ARMSubtarget *Subtarget) {
11251 : // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
11252 : // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value
11253 : // into a pair of GPRs, which is fine when the value is used as a scalar,
11254 : // but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
11255 0 : SelectionDAG &DAG = DCI.DAG;
11256 0 : if (N->getNumOperands() == 2)
11257 0 : if (SDValue RV = PerformVMOVDRRCombine(N, DAG))
11258 0 : return RV;
11259 :
11260 : // Load i64 elements as f64 values so that type legalization does not split
11261 : // them up into i32 values.
11262 0 : EVT VT = N->getValueType(0);
11263 0 : if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N))
11264 0 : return SDValue();
11265 : SDLoc dl(N);
11266 : SmallVector<SDValue, 8> Ops;
11267 : unsigned NumElts = VT.getVectorNumElements();
11268 0 : for (unsigned i = 0; i < NumElts; ++i) {
11269 0 : SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i));
11270 0 : Ops.push_back(V);
11271 : // Make the DAGCombiner fold the bitcast.
11272 0 : DCI.AddToWorklist(V.getNode());
11273 : }
11274 0 : EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);
11275 0 : SDValue BV = DAG.getBuildVector(FloatVT, dl, Ops);
11276 0 : return DAG.getNode(ISD::BITCAST, dl, VT, BV);
11277 : }
11278 :
11279 : /// Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
11280 : static SDValue
11281 542 : PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
11282 : // ARMISD::BUILD_VECTOR is introduced when legalizing ISD::BUILD_VECTOR.
11283 : // At that time, we may have inserted bitcasts from integer to float.
11284 : // If these bitcasts have survived DAGCombine, change the lowering of this
11285 : // BUILD_VECTOR in something more vector friendly, i.e., that does not
11286 : // force to use floating point types.
11287 :
11288 : // Make sure we can change the type of the vector.
11289 : // This is possible iff:
11290 : // 1. The vector is only used in a bitcast to a integer type. I.e.,
11291 : // 1.1. Vector is used only once.
11292 : // 1.2. Use is a bit convert to an integer type.
11293 : // 2. The size of its operands are 32-bits (64-bits are not legal).
11294 542 : EVT VT = N->getValueType(0);
11295 542 : EVT EltVT = VT.getVectorElementType();
11296 :
11297 : // Check 1.1. and 2.
11298 542 : if (EltVT.getSizeInBits() != 32 || !N->hasOneUse())
11299 362 : return SDValue();
11300 :
11301 : // By construction, the input type must be float.
11302 : assert(EltVT == MVT::f32 && "Unexpected type!");
11303 :
11304 : // Check 1.2.
11305 : SDNode *Use = *N->use_begin();
11306 180 : if (Use->getOpcode() != ISD::BITCAST ||
11307 340 : Use->getValueType(0).isFloatingPoint())
11308 76 : return SDValue();
11309 :
11310 : // Check profitability.
11311 : // Model is, if more than half of the relevant operands are bitcast from
11312 : // i32, turn the build_vector into a sequence of insert_vector_elt.
11313 : // Relevant operands are everything that is not statically
11314 : // (i.e., at compile time) bitcasted.
11315 : unsigned NumOfBitCastedElts = 0;
11316 : unsigned NumElts = VT.getVectorNumElements();
11317 : unsigned NumOfRelevantElts = NumElts;
11318 312 : for (unsigned Idx = 0; Idx < NumElts; ++Idx) {
11319 208 : SDValue Elt = N->getOperand(Idx);
11320 416 : if (Elt->getOpcode() == ISD::BITCAST) {
11321 : // Assume only bit cast to i32 will go away.
11322 202 : if (Elt->getOperand(0).getValueType() == MVT::i32)
11323 202 : ++NumOfBitCastedElts;
11324 6 : } else if (Elt.isUndef() || isa<ConstantSDNode>(Elt))
11325 : // Constants are statically casted, thus do not count them as
11326 : // relevant operands.
11327 4 : --NumOfRelevantElts;
11328 : }
11329 :
11330 : // Check if more than half of the elements require a non-free bitcast.
11331 104 : if (NumOfBitCastedElts <= NumOfRelevantElts / 2)
11332 2 : return SDValue();
11333 :
11334 102 : SelectionDAG &DAG = DCI.DAG;
11335 : // Create the new vector type.
11336 102 : EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
11337 : // Check if the type is legal.
11338 102 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11339 102 : if (!TLI.isTypeLegal(VecVT))
11340 0 : return SDValue();
11341 :
11342 : // Combine:
11343 : // ARMISD::BUILD_VECTOR E1, E2, ..., EN.
11344 : // => BITCAST INSERT_VECTOR_ELT
11345 : // (INSERT_VECTOR_ELT (...), (BITCAST EN-1), N-1),
11346 : // (BITCAST EN), N.
11347 102 : SDValue Vec = DAG.getUNDEF(VecVT);
11348 : SDLoc dl(N);
11349 306 : for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) {
11350 408 : SDValue V = N->getOperand(Idx);
11351 204 : if (V.isUndef())
11352 4 : continue;
11353 200 : if (V.getOpcode() == ISD::BITCAST &&
11354 200 : V->getOperand(0).getValueType() == MVT::i32)
11355 : // Fold obvious case.
11356 200 : V = V.getOperand(0);
11357 : else {
11358 0 : V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V);
11359 : // Make the DAGCombiner fold the bitcasts.
11360 0 : DCI.AddToWorklist(V.getNode());
11361 : }
11362 200 : SDValue LaneIdx = DAG.getConstant(Idx, dl, MVT::i32);
11363 200 : Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Vec, V, LaneIdx);
11364 : }
11365 102 : Vec = DAG.getNode(ISD::BITCAST, dl, VT, Vec);
11366 : // Make the DAGCombiner fold the bitcasts.
11367 102 : DCI.AddToWorklist(Vec.getNode());
11368 102 : return Vec;
11369 : }
11370 :
11371 : /// PerformInsertEltCombine - Target-specific dag combine xforms for
11372 : /// ISD::INSERT_VECTOR_ELT.
11373 1319 : static SDValue PerformInsertEltCombine(SDNode *N,
11374 : TargetLowering::DAGCombinerInfo &DCI) {
11375 : // Bitcast an i64 load inserted into a vector to f64.
11376 : // Otherwise, the i64 value will be legalized to a pair of i32 values.
11377 1319 : EVT VT = N->getValueType(0);
11378 1319 : SDNode *Elt = N->getOperand(1).getNode();
11379 1319 : if (VT.getVectorElementType() != MVT::i64 ||
11380 6 : !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile())
11381 1313 : return SDValue();
11382 :
11383 6 : SelectionDAG &DAG = DCI.DAG;
11384 : SDLoc dl(N);
11385 6 : EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
11386 6 : VT.getVectorNumElements());
11387 12 : SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));
11388 12 : SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1));
11389 : // Make the DAGCombiner fold the bitcasts.
11390 6 : DCI.AddToWorklist(Vec.getNode());
11391 6 : DCI.AddToWorklist(V.getNode());
11392 : SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT,
11393 12 : Vec, V, N->getOperand(2));
11394 6 : return DAG.getNode(ISD::BITCAST, dl, VT, InsElt);
11395 : }
11396 :
11397 : /// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
11398 : /// ISD::VECTOR_SHUFFLE.
11399 565 : static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
11400 : // The LLVM shufflevector instruction does not require the shuffle mask
11401 : // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does
11402 : // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the
11403 : // operands do not match the mask length, they are extended by concatenating
11404 : // them with undef vectors. That is probably the right thing for other
11405 : // targets, but for NEON it is better to concatenate two double-register
11406 : // size vector operands into a single quad-register size vector. Do that
11407 : // transformation here:
11408 : // shuffle(concat(v1, undef), concat(v2, undef)) ->
11409 : // shuffle(concat(v1, v2), undef)
11410 565 : SDValue Op0 = N->getOperand(0);
11411 565 : SDValue Op1 = N->getOperand(1);
11412 92 : if (Op0.getOpcode() != ISD::CONCAT_VECTORS ||
11413 37 : Op1.getOpcode() != ISD::CONCAT_VECTORS ||
11414 602 : Op0.getNumOperands() != 2 ||
11415 : Op1.getNumOperands() != 2)
11416 528 : return SDValue();
11417 37 : SDValue Concat0Op1 = Op0.getOperand(1);
11418 37 : SDValue Concat1Op1 = Op1.getOperand(1);
11419 37 : if (!Concat0Op1.isUndef() || !Concat1Op1.isUndef())
11420 0 : return SDValue();
11421 : // Skip the transformation if any of the types are illegal.
11422 37 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11423 74 : EVT VT = N->getValueType(0);
11424 : if (!TLI.isTypeLegal(VT) ||
11425 : !TLI.isTypeLegal(Concat0Op1.getValueType()) ||
11426 : !TLI.isTypeLegal(Concat1Op1.getValueType()))
11427 17 : return SDValue();
11428 :
11429 20 : SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
11430 40 : Op0.getOperand(0), Op1.getOperand(0));
11431 : // Translate the shuffle mask.
11432 : SmallVector<int, 16> NewMask;
11433 : unsigned NumElts = VT.getVectorNumElements();
11434 20 : unsigned HalfElts = NumElts/2;
11435 : ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
11436 208 : for (unsigned n = 0; n < NumElts; ++n) {
11437 188 : int MaskElt = SVN->getMaskElt(n);
11438 188 : int NewElt = -1;
11439 188 : if (MaskElt < (int)HalfElts)
11440 111 : NewElt = MaskElt;
11441 77 : else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts))
11442 77 : NewElt = HalfElts + MaskElt - NumElts;
11443 188 : NewMask.push_back(NewElt);
11444 : }
11445 20 : return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat,
11446 60 : DAG.getUNDEF(VT), NewMask);
11447 : }
11448 :
11449 : /// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
11450 : /// NEON load/store intrinsics, and generic vector load/stores, to merge
11451 : /// base address updates.
11452 : /// For generic load/stores, the memory type is assumed to be a vector.
11453 : /// The caller is assumed to have checked legality.
11454 11108 : static SDValue CombineBaseUpdate(SDNode *N,
11455 : TargetLowering::DAGCombinerInfo &DCI) {
11456 11108 : SelectionDAG &DAG = DCI.DAG;
11457 22216 : const bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
11458 : N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
11459 11108 : const bool isStore = N->getOpcode() == ISD::STORE;
11460 11108 : const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);
11461 22216 : SDValue Addr = N->getOperand(AddrOpIdx);
11462 : MemSDNode *MemN = cast<MemSDNode>(N);
11463 : SDLoc dl(N);
11464 :
11465 : // Search for a use of the address operand that is an increment.
11466 11108 : for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
11467 8413692 : UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
11468 : SDNode *User = *UI;
11469 8402890 : if (User->getOpcode() != ISD::ADD ||
11470 : UI.getUse().getResNo() != Addr.getResNo())
11471 8402584 : continue;
11472 :
11473 : // Check that the add is independent of the load/store. Otherwise, folding
11474 : // it would create a cycle. We can avoid searching through Addr as it's a
11475 : // predecessor to both.
11476 : SmallPtrSet<const SDNode *, 32> Visited;
11477 : SmallVector<const SDNode *, 16> Worklist;
11478 330 : Visited.insert(Addr.getNode());
11479 330 : Worklist.push_back(N);
11480 330 : Worklist.push_back(User);
11481 660 : if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
11482 330 : SDNode::hasPredecessorHelper(User, Visited, Worklist))
11483 19 : continue;
11484 :
11485 : // Find the new opcode for the updating load/store.
11486 : bool isLoadOp = true;
11487 : bool isLaneOp = false;
11488 : unsigned NewOpc = 0;
11489 : unsigned NumVecs = 0;
11490 311 : if (isIntrinsic) {
11491 164 : unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
11492 82 : switch (IntNo) {
11493 0 : default: llvm_unreachable("unexpected intrinsic for Neon base update");
11494 : case Intrinsic::arm_neon_vld1: NewOpc = ARMISD::VLD1_UPD;
11495 : NumVecs = 1; break;
11496 7 : case Intrinsic::arm_neon_vld2: NewOpc = ARMISD::VLD2_UPD;
11497 7 : NumVecs = 2; break;
11498 11 : case Intrinsic::arm_neon_vld3: NewOpc = ARMISD::VLD3_UPD;
11499 11 : NumVecs = 3; break;
11500 4 : case Intrinsic::arm_neon_vld4: NewOpc = ARMISD::VLD4_UPD;
11501 4 : NumVecs = 4; break;
11502 : case Intrinsic::arm_neon_vld2dup:
11503 : case Intrinsic::arm_neon_vld3dup:
11504 : case Intrinsic::arm_neon_vld4dup:
11505 : // TODO: Support updating VLDxDUP nodes. For now, we just skip
11506 : // combining base updates for such intrinsics.
11507 : continue;
11508 4 : case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD;
11509 4 : NumVecs = 2; isLaneOp = true; break;
11510 2 : case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD;
11511 2 : NumVecs = 3; isLaneOp = true; break;
11512 2 : case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD;
11513 2 : NumVecs = 4; isLaneOp = true; break;
11514 9 : case Intrinsic::arm_neon_vst1: NewOpc = ARMISD::VST1_UPD;
11515 9 : NumVecs = 1; isLoadOp = false; break;
11516 5 : case Intrinsic::arm_neon_vst2: NewOpc = ARMISD::VST2_UPD;
11517 5 : NumVecs = 2; isLoadOp = false; break;
11518 4 : case Intrinsic::arm_neon_vst3: NewOpc = ARMISD::VST3_UPD;
11519 4 : NumVecs = 3; isLoadOp = false; break;
11520 4 : case Intrinsic::arm_neon_vst4: NewOpc = ARMISD::VST4_UPD;
11521 4 : NumVecs = 4; isLoadOp = false; break;
11522 1 : case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD;
11523 1 : NumVecs = 2; isLoadOp = false; isLaneOp = true; break;
11524 1 : case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD;
11525 1 : NumVecs = 3; isLoadOp = false; isLaneOp = true; break;
11526 1 : case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD;
11527 1 : NumVecs = 4; isLoadOp = false; isLaneOp = true; break;
11528 : }
11529 : } else {
11530 : isLaneOp = true;
11531 458 : switch (N->getOpcode()) {
11532 0 : default: llvm_unreachable("unexpected opcode for Neon base update");
11533 : case ARMISD::VLD1DUP: NewOpc = ARMISD::VLD1DUP_UPD; NumVecs = 1; break;
11534 4 : case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break;
11535 1 : case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break;
11536 1 : case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break;
11537 88 : case ISD::LOAD: NewOpc = ARMISD::VLD1_UPD;
11538 88 : NumVecs = 1; isLaneOp = false; break;
11539 129 : case ISD::STORE: NewOpc = ARMISD::VST1_UPD;
11540 129 : NumVecs = 1; isLaneOp = false; isLoadOp = false; break;
11541 : }
11542 : }
11543 :
11544 : // Find the size of memory referenced by the load/store.
11545 308 : EVT VecTy;
11546 308 : if (isLoadOp) {
11547 308 : VecTy = N->getValueType(0);
11548 154 : } else if (isIntrinsic) {
11549 75 : VecTy = N->getOperand(AddrOpIdx+1).getValueType();
11550 : } else {
11551 : assert(isStore && "Node has to be a load, a store, or an intrinsic!");
11552 258 : VecTy = N->getOperand(1).getValueType();
11553 : }
11554 :
11555 308 : unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
11556 308 : if (isLaneOp)
11557 23 : NumBytes /= VecTy.getVectorNumElements();
11558 :
11559 : // If the increment is a constant, it must match the memory ref size.
11560 308 : SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
11561 308 : ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());
11562 314 : if (NumBytes >= 3 * 16 && (!CInc || CInc->getZExtValue() != NumBytes)) {
11563 : // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
11564 : // separate instructions that make it harder to use a non-constant update.
11565 : continue;
11566 : }
11567 :
11568 : // OK, we found an ADD we can fold into the base update.
11569 : // Now, create a _UPD node, taking care of not breaking alignment.
11570 :
11571 306 : EVT AlignedVecTy = VecTy;
11572 306 : unsigned Alignment = MemN->getAlignment();
11573 :
11574 : // If this is a less-than-standard-aligned load/store, change the type to
11575 : // match the standard alignment.
11576 : // The alignment is overlooked when selecting _UPD variants; and it's
11577 : // easier to introduce bitcasts here than fix that.
11578 : // There are 3 ways to get to this base-update combine:
11579 : // - intrinsics: they are assumed to be properly aligned (to the standard
11580 : // alignment of the memory type), so we don't need to do anything.
11581 : // - ARMISD::VLDx nodes: they are only generated from the aforementioned
11582 : // intrinsics, so, likewise, there's nothing to do.
11583 : // - generic load/store instructions: the alignment is specified as an
11584 : // explicit operand, rather than implicitly as the standard alignment
11585 : // of the memory type (like the intrisics). We need to change the
11586 : // memory type to match the explicit alignment. That way, we don't
11587 : // generate non-standard-aligned ARMISD::VLDx nodes.
11588 : if (isa<LSBaseSDNode>(N)) {
11589 217 : if (Alignment == 0)
11590 : Alignment = 1;
11591 217 : if (Alignment < VecTy.getScalarSizeInBits() / 8) {
11592 46 : MVT EltTy = MVT::getIntegerVT(Alignment * 8);
11593 : assert(NumVecs == 1 && "Unexpected multi-element generic load/store.");
11594 : assert(!isLaneOp && "Unexpected generic load/store lane.");
11595 46 : unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8);
11596 46 : AlignedVecTy = MVT::getVectorVT(EltTy, NumElts);
11597 : }
11598 : // Don't set an explicit alignment on regular load/stores that we want
11599 : // to transform to VLD/VST 1_UPD nodes.
11600 : // This matches the behavior of regular load/stores, which only get an
11601 : // explicit alignment if the MMO alignment is larger than the standard
11602 : // alignment of the memory type.
11603 : // Intrinsics, however, always get an explicit alignment, set to the
11604 : // alignment of the MMO.
11605 : Alignment = 1;
11606 : }
11607 :
11608 : // Create the new updating load/store node.
11609 : // First, create an SDVTList for the new updating node's results.
11610 306 : EVT Tys[6];
11611 306 : unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
11612 : unsigned n;
11613 518 : for (n = 0; n < NumResultVecs; ++n)
11614 212 : Tys[n] = AlignedVecTy;
11615 306 : Tys[n++] = MVT::i32;
11616 306 : Tys[n] = MVT::Other;
11617 306 : SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs+2));
11618 :
11619 : // Then, gather the new node's operands.
11620 : SmallVector<SDValue, 8> Ops;
11621 612 : Ops.push_back(N->getOperand(0)); // incoming chain
11622 612 : Ops.push_back(N->getOperand(AddrOpIdx));
11623 306 : Ops.push_back(Inc);
11624 :
11625 : if (StoreSDNode *StN = dyn_cast<StoreSDNode>(N)) {
11626 : // Try to match the intrinsic's signature
11627 129 : Ops.push_back(StN->getValue());
11628 : } else {
11629 : // Loads (and of course intrinsics) match the intrinsics' signature,
11630 : // so just add all but the alignment operand.
11631 266 : for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands() - 1; ++i)
11632 178 : Ops.push_back(N->getOperand(i));
11633 : }
11634 :
11635 : // For all node types, the alignment operand is always the last one.
11636 306 : Ops.push_back(DAG.getConstant(Alignment, dl, MVT::i32));
11637 :
11638 : // If this is a non-standard-aligned STORE, the penultimate operand is the
11639 : // stored value. Bitcast it to the aligned type.
11640 306 : if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) {
11641 40 : SDValue &StVal = Ops[Ops.size()-2];
11642 20 : StVal = DAG.getNode(ISD::BITCAST, dl, AlignedVecTy, StVal);
11643 : }
11644 :
11645 306 : EVT LoadVT = isLaneOp ? VecTy.getVectorElementType() : AlignedVecTy;
11646 : SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, LoadVT,
11647 612 : MemN->getMemOperand());
11648 :
11649 : // Update the uses.
11650 : SmallVector<SDValue, 5> NewResults;
11651 518 : for (unsigned i = 0; i < NumResultVecs; ++i)
11652 212 : NewResults.push_back(SDValue(UpdN.getNode(), i));
11653 :
11654 : // If this is an non-standard-aligned LOAD, the first result is the loaded
11655 : // value. Bitcast it to the expected result type.
11656 306 : if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) {
11657 : SDValue &LdVal = NewResults[0];
11658 26 : LdVal = DAG.getNode(ISD::BITCAST, dl, VecTy, LdVal);
11659 : }
11660 :
11661 612 : NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain
11662 306 : DCI.CombineTo(N, NewResults);
11663 306 : DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
11664 :
11665 : break;
11666 : }
11667 11108 : return SDValue();
11668 : }
11669 :
11670 : static SDValue PerformVLDCombine(SDNode *N,
11671 : TargetLowering::DAGCombinerInfo &DCI) {
11672 1068 : if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
11673 : return SDValue();
11674 :
11675 546 : return CombineBaseUpdate(N, DCI);
11676 : }
11677 :
11678 : /// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
11679 : /// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
11680 : /// are also VDUPLANEs. If so, combine them to a vldN-dup operation and
11681 : /// return true.
11682 103 : static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
11683 103 : SelectionDAG &DAG = DCI.DAG;
11684 103 : EVT VT = N->getValueType(0);
11685 : // vldN-dup instructions only support 64-bit vectors for N > 1.
11686 103 : if (!VT.is64BitVector())
11687 : return false;
11688 :
11689 : // Check if the VDUPLANE operand is a vldN-dup intrinsic.
11690 60 : SDNode *VLD = N->getOperand(0).getNode();
11691 60 : if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
11692 : return false;
11693 : unsigned NumVecs = 0;
11694 : unsigned NewOpc = 0;
11695 24 : unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue();
11696 12 : if (IntNo == Intrinsic::arm_neon_vld2lane) {
11697 : NumVecs = 2;
11698 : NewOpc = ARMISD::VLD2DUP;
11699 4 : } else if (IntNo == Intrinsic::arm_neon_vld3lane) {
11700 : NumVecs = 3;
11701 : NewOpc = ARMISD::VLD3DUP;
11702 2 : } else if (IntNo == Intrinsic::arm_neon_vld4lane) {
11703 : NumVecs = 4;
11704 : NewOpc = ARMISD::VLD4DUP;
11705 : } else {
11706 : return false;
11707 : }
11708 :
11709 : // First check that all the vldN-lane uses are VDUPLANEs and that the lane
11710 : // numbers match the load.
11711 : unsigned VLDLaneNo =
11712 24 : cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue();
11713 12 : for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
11714 49 : UI != UE; ++UI) {
11715 : // Ignore uses of the chain result.
11716 37 : if (UI.getUse().getResNo() == NumVecs)
11717 : continue;
11718 : SDNode *User = *UI;
11719 60 : if (User->getOpcode() != ARMISD::VDUPLANE ||
11720 60 : VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue())
11721 : return false;
11722 : }
11723 :
11724 : // Create the vldN-dup node.
11725 12 : EVT Tys[5];
11726 : unsigned n;
11727 42 : for (n = 0; n < NumVecs; ++n)
11728 30 : Tys[n] = VT;
11729 12 : Tys[n] = MVT::Other;
11730 12 : SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumVecs+1));
11731 12 : SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
11732 : MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
11733 12 : SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys,
11734 : Ops, VLDMemInt->getMemoryVT(),
11735 12 : VLDMemInt->getMemOperand());
11736 :
11737 : // Update the uses.
11738 12 : for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
11739 49 : UI != UE; ++UI) {
11740 : unsigned ResNo = UI.getUse().getResNo();
11741 : // Ignore uses of the chain result.
11742 37 : if (ResNo == NumVecs)
11743 : continue;
11744 : SDNode *User = *UI;
11745 30 : DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo));
11746 : }
11747 :
11748 : // Now the vldN-lane intrinsic is dead except for its chain result.
11749 : // Update uses of the chain.
11750 : std::vector<SDValue> VLDDupResults;
11751 42 : for (unsigned n = 0; n < NumVecs; ++n)
11752 30 : VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
11753 12 : VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
11754 12 : DCI.CombineTo(VLD, VLDDupResults);
11755 :
11756 : return true;
11757 : }
11758 :
11759 : /// PerformVDUPLANECombine - Target-specific dag combine xforms for
11760 : /// ARMISD::VDUPLANE.
11761 103 : static SDValue PerformVDUPLANECombine(SDNode *N,
11762 : TargetLowering::DAGCombinerInfo &DCI) {
11763 103 : SDValue Op = N->getOperand(0);
11764 :
11765 : // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses
11766 : // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.
11767 103 : if (CombineVLDDUP(N, DCI))
11768 12 : return SDValue(N, 0);
11769 :
11770 : // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
11771 : // redundant. Ignore bit_converts for now; element sizes are checked below.
11772 318 : while (Op.getOpcode() == ISD::BITCAST)
11773 68 : Op = Op.getOperand(0);
11774 91 : if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
11775 90 : return SDValue();
11776 :
11777 : // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
11778 1 : unsigned EltSize = Op.getScalarValueSizeInBits();
11779 : // The canonical VMOV for a zero vector uses a 32-bit element size.
11780 2 : unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
11781 : unsigned EltBits;
11782 1 : if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0)
11783 : EltSize = 8;
11784 2 : EVT VT = N->getValueType(0);
11785 1 : if (EltSize > VT.getScalarSizeInBits())
11786 0 : return SDValue();
11787 :
11788 2 : return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
11789 : }
11790 :
11791 : /// PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
11792 0 : static SDValue PerformVDUPCombine(SDNode *N,
11793 : TargetLowering::DAGCombinerInfo &DCI) {
11794 0 : SelectionDAG &DAG = DCI.DAG;
11795 0 : SDValue Op = N->getOperand(0);
11796 :
11797 : // Match VDUP(LOAD) -> VLD1DUP.
11798 : // We match this pattern here rather than waiting for isel because the
11799 : // transform is only legal for unindexed loads.
11800 : LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode());
11801 0 : if (LD && Op.hasOneUse() && LD->isUnindexed() &&
11802 0 : LD->getMemoryVT() == N->getValueType(0).getVectorElementType()) {
11803 0 : SDValue Ops[] = { LD->getOperand(0), LD->getOperand(1),
11804 0 : DAG.getConstant(LD->getAlignment(), SDLoc(N), MVT::i32) };
11805 0 : SDVTList SDTys = DAG.getVTList(N->getValueType(0), MVT::Other);
11806 0 : SDValue VLDDup = DAG.getMemIntrinsicNode(ARMISD::VLD1DUP, SDLoc(N), SDTys,
11807 : Ops, LD->getMemoryVT(),
11808 0 : LD->getMemOperand());
11809 0 : DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), VLDDup.getValue(1));
11810 0 : return VLDDup;
11811 : }
11812 :
11813 0 : return SDValue();
11814 : }
11815 :
11816 20782 : static SDValue PerformLOADCombine(SDNode *N,
11817 : TargetLowering::DAGCombinerInfo &DCI) {
11818 41564 : EVT VT = N->getValueType(0);
11819 :
11820 : // If this is a legal vector load, try to combine it into a VLD1_UPD.
11821 18882 : if (ISD::isNormalLoad(N) && VT.isVector() &&
11822 6901 : DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT))
11823 6835 : return CombineBaseUpdate(N, DCI);
11824 :
11825 13947 : return SDValue();
11826 : }
11827 :
11828 : /// PerformSTORECombine - Target-specific dag combine xforms for
11829 : /// ISD::STORE.
11830 19167 : static SDValue PerformSTORECombine(SDNode *N,
11831 : TargetLowering::DAGCombinerInfo &DCI) {
11832 : StoreSDNode *St = cast<StoreSDNode>(N);
11833 19167 : if (St->isVolatile())
11834 2832 : return SDValue();
11835 :
11836 : // Optimize trunc store (of multiple scalars) to shuffle and store. First,
11837 : // pack all of the elements in one place. Next, store to memory in fewer
11838 : // chunks.
11839 16335 : SDValue StVal = St->getValue();
11840 16335 : EVT VT = StVal.getValueType();
11841 17514 : if (St->isTruncatingStore() && VT.isVector()) {
11842 23 : SelectionDAG &DAG = DCI.DAG;
11843 23 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11844 23 : EVT StVT = St->getMemoryVT();
11845 : unsigned NumElems = VT.getVectorNumElements();
11846 : assert(StVT != VT && "Cannot truncate to the same type");
11847 : unsigned FromEltSz = VT.getScalarSizeInBits();
11848 : unsigned ToEltSz = StVT.getScalarSizeInBits();
11849 :
11850 : // From, To sizes and ElemCount must be pow of two
11851 23 : if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue();
11852 :
11853 : // We are going to use the original vector elt for storing.
11854 : // Accumulated smaller vector elements must be a multiple of the store size.
11855 23 : if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue();
11856 :
11857 23 : unsigned SizeRatio = FromEltSz / ToEltSz;
11858 : assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits());
11859 :
11860 : // Create a type on which we perform the shuffle.
11861 23 : EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
11862 23 : NumElems*SizeRatio);
11863 : assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
11864 :
11865 : SDLoc DL(St);
11866 23 : SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
11867 23 : SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
11868 91 : for (unsigned i = 0; i < NumElems; ++i)
11869 204 : ShuffleVec[i] = DAG.getDataLayout().isBigEndian()
11870 68 : ? (i + 1) * SizeRatio - 1
11871 60 : : i * SizeRatio;
11872 :
11873 : // Can't shuffle using an illegal type.
11874 24 : if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
11875 :
11876 : SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec,
11877 : DAG.getUNDEF(WideVec.getValueType()),
11878 44 : ShuffleVec);
11879 : // At this point all of the data is stored at the bottom of the
11880 : // register. We now need to save it to mem.
11881 :
11882 : // Find the largest store unit
11883 22 : MVT StoreType = MVT::i8;
11884 154 : for (MVT Tp : MVT::integer_valuetypes()) {
11885 22 : if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
11886 18 : StoreType = Tp;
11887 : }
11888 : // Didn't find a legal store type.
11889 : if (!TLI.isTypeLegal(StoreType))
11890 4 : return SDValue();
11891 :
11892 : // Bitcast the original vector into a vector of store-size units
11893 18 : EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(),
11894 54 : StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits());
11895 : assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
11896 18 : SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
11897 : SmallVector<SDValue, 8> Chains;
11898 18 : SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, DL,
11899 36 : TLI.getPointerTy(DAG.getDataLayout()));
11900 18 : SDValue BasePtr = St->getBasePtr();
11901 :
11902 : // Perform one or more big stores into memory.
11903 18 : unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits();
11904 36 : for (unsigned I = 0; I < E; I++) {
11905 : SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
11906 : StoreType, ShuffWide,
11907 18 : DAG.getIntPtrConstant(I, DL));
11908 : SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr,
11909 18 : St->getPointerInfo(), St->getAlignment(),
11910 18 : St->getMemOperand()->getFlags());
11911 18 : BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
11912 18 : Increment);
11913 18 : Chains.push_back(Ch);
11914 : }
11915 18 : return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
11916 : }
11917 :
11918 : if (!ISD::isNormalStore(St))
11919 1173 : return SDValue();
11920 :
11921 : // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and
11922 : // ARM stores of arguments in the same cache line.
11923 30278 : if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
11924 : StVal.getNode()->hasOneUse()) {
11925 15 : SelectionDAG &DAG = DCI.DAG;
11926 15 : bool isBigEndian = DAG.getDataLayout().isBigEndian();
11927 : SDLoc DL(St);
11928 15 : SDValue BasePtr = St->getBasePtr();
11929 : SDValue NewST1 = DAG.getStore(
11930 27 : St->getChain(), DL, StVal.getNode()->getOperand(isBigEndian ? 1 : 0),
11931 15 : BasePtr, St->getPointerInfo(), St->getAlignment(),
11932 30 : St->getMemOperand()->getFlags());
11933 :
11934 : SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
11935 15 : DAG.getConstant(4, DL, MVT::i32));
11936 : return DAG.getStore(NewST1.getValue(0), DL,
11937 27 : StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
11938 15 : OffsetPtr, St->getPointerInfo(),
11939 15 : std::min(4U, St->getAlignment() / 2),
11940 42 : St->getMemOperand()->getFlags());
11941 : }
11942 :
11943 85 : if (StVal.getValueType() == MVT::i64 &&
11944 : StVal.getNode()->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
11945 :
11946 : // Bitcast an i64 store extracted from a vector to f64.
11947 : // Otherwise, the i64 value will be legalized to a pair of i32 values.
11948 1 : SelectionDAG &DAG = DCI.DAG;
11949 : SDLoc dl(StVal);
11950 1 : SDValue IntVec = StVal.getOperand(0);
11951 1 : EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
11952 2 : IntVec.getValueType().getVectorNumElements());
11953 1 : SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
11954 : SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
11955 1 : Vec, StVal.getOperand(1));
11956 1 : dl = SDLoc(N);
11957 1 : SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
11958 : // Make the DAGCombiner fold the bitcasts.
11959 1 : DCI.AddToWorklist(Vec.getNode());
11960 1 : DCI.AddToWorklist(ExtElt.getNode());
11961 1 : DCI.AddToWorklist(V.getNode());
11962 : return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
11963 1 : St->getPointerInfo(), St->getAlignment(),
11964 2 : St->getMemOperand()->getFlags(), St->getAAInfo());
11965 : }
11966 :
11967 : // If this is a legal vector store, try to combine it into a VST1_UPD.
11968 15123 : if (ISD::isNormalStore(N) && VT.isVector() &&
11969 3828 : DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT))
11970 3727 : return CombineBaseUpdate(N, DCI);
11971 :
11972 11396 : return SDValue();
11973 : }
11974 :
11975 : /// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)
11976 : /// can replace combinations of VMUL and VCVT (floating-point to integer)
11977 : /// when the VMUL has a constant operand that is a power of 2.
11978 : ///
11979 : /// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
11980 : /// vmul.f32 d16, d17, d16
11981 : /// vcvt.s32.f32 d16, d16
11982 : /// becomes:
11983 : /// vcvt.s32.f32 d16, d16, #3
11984 352 : static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG,
11985 : const ARMSubtarget *Subtarget) {
11986 352 : if (!Subtarget->hasNEON())
11987 0 : return SDValue();
11988 :
11989 352 : SDValue Op = N->getOperand(0);
11990 704 : if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
11991 : Op.getOpcode() != ISD::FMUL)
11992 333 : return SDValue();
11993 :
11994 19 : SDValue ConstVec = Op->getOperand(1);
11995 19 : if (!isa<BuildVectorSDNode>(ConstVec))
11996 2 : return SDValue();
11997 :
11998 17 : MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
11999 17 : uint32_t FloatBits = FloatTy.getSizeInBits();
12000 17 : MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
12001 17 : uint32_t IntBits = IntTy.getSizeInBits();
12002 17 : unsigned NumLanes = Op.getValueType().getVectorNumElements();
12003 17 : if (FloatBits != 32 || IntBits > 32 || NumLanes > 4) {
12004 : // These instructions only exist converting from f32 to i32. We can handle
12005 : // smaller integers by generating an extra truncate, but larger ones would
12006 : // be lossy. We also can't handle more then 4 lanes, since these intructions
12007 : // only support v2i32/v4i32 types.
12008 7 : return SDValue();
12009 : }
12010 :
12011 : BitVector UndefElements;
12012 : BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
12013 10 : int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
12014 10 : if (C == -1 || C == 0 || C > 32)
12015 2 : return SDValue();
12016 :
12017 : SDLoc dl(N);
12018 8 : bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
12019 8 : unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
12020 : Intrinsic::arm_neon_vcvtfp2fxu;
12021 : SDValue FixConv = DAG.getNode(
12022 : ISD::INTRINSIC_WO_CHAIN, dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
12023 8 : DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), Op->getOperand(0),
12024 13 : DAG.getConstant(C, dl, MVT::i32));
12025 :
12026 8 : if (IntBits < FloatBits)
12027 6 : FixConv = DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), FixConv);
12028 :
12029 8 : return FixConv;
12030 : }
12031 :
12032 : /// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
12033 : /// can replace combinations of VCVT (integer to floating-point) and VDIV
12034 : /// when the VDIV has a constant operand that is a power of 2.
12035 : ///
12036 : /// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
12037 : /// vcvt.f32.s32 d16, d16
12038 : /// vdiv.f32 d16, d17, d16
12039 : /// becomes:
12040 : /// vcvt.f32.s32 d16, d16, #3
12041 367 : static SDValue PerformVDIVCombine(SDNode *N, SelectionDAG &DAG,
12042 : const ARMSubtarget *Subtarget) {
12043 367 : if (!Subtarget->hasNEON())
12044 0 : return SDValue();
12045 :
12046 367 : SDValue Op = N->getOperand(0);
12047 367 : unsigned OpOpcode = Op.getNode()->getOpcode();
12048 1101 : if (!N->getValueType(0).isVector() || !N->getValueType(0).isSimple() ||
12049 19 : (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))
12050 352 : return SDValue();
12051 :
12052 15 : SDValue ConstVec = N->getOperand(1);
12053 15 : if (!isa<BuildVectorSDNode>(ConstVec))
12054 0 : return SDValue();
12055 :
12056 15 : MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
12057 15 : uint32_t FloatBits = FloatTy.getSizeInBits();
12058 15 : MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
12059 15 : uint32_t IntBits = IntTy.getSizeInBits();
12060 15 : unsigned NumLanes = Op.getValueType().getVectorNumElements();
12061 15 : if (FloatBits != 32 || IntBits > 32 || NumLanes > 4) {
12062 : // These instructions only exist converting from i32 to f32. We can handle
12063 : // smaller integers by generating an extra extend, but larger ones would
12064 : // be lossy. We also can't handle more then 4 lanes, since these intructions
12065 : // only support v2i32/v4i32 types.
12066 3 : return SDValue();
12067 : }
12068 :
12069 : BitVector UndefElements;
12070 : BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
12071 12 : int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
12072 12 : if (C == -1 || C == 0 || C > 32)
12073 2 : return SDValue();
12074 :
12075 : SDLoc dl(N);
12076 : bool isSigned = OpOpcode == ISD::SINT_TO_FP;
12077 10 : SDValue ConvInput = Op.getOperand(0);
12078 10 : if (IntBits < FloatBits)
12079 2 : ConvInput = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
12080 : dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
12081 5 : ConvInput);
12082 :
12083 10 : unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp :
12084 : Intrinsic::arm_neon_vcvtfxu2fp;
12085 : return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl,
12086 : Op.getValueType(),
12087 : DAG.getConstant(IntrinsicOpcode, dl, MVT::i32),
12088 10 : ConvInput, DAG.getConstant(C, dl, MVT::i32));
12089 : }
12090 :
12091 : /// Getvshiftimm - Check if this is a valid build_vector for the immediate
12092 : /// operand of a vector shift operation, where all the elements of the
12093 : /// build_vector must have the same constant integer value.
12094 593 : static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
12095 : // Ignore bit_converts.
12096 689 : while (Op.getOpcode() == ISD::BITCAST)
12097 96 : Op = Op.getOperand(0);
12098 : BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
12099 : APInt SplatBits, SplatUndef;
12100 : unsigned SplatBitSize;
12101 : bool HasAnyUndefs;
12102 234 : if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
12103 593 : HasAnyUndefs, ElementBits) ||
12104 234 : SplatBitSize > ElementBits)
12105 359 : return false;
12106 234 : Cnt = SplatBits.getSExtValue();
12107 234 : return true;
12108 : }
12109 :
12110 : /// isVShiftLImm - Check if this is a valid build_vector for the immediate
12111 : /// operand of a vector shift left operation. That value must be in the range:
12112 : /// 0 <= Value < ElementBits for a left shift; or
12113 : /// 0 <= Value <= ElementBits for a long left shift.
12114 271 : static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
12115 : assert(VT.isVector() && "vector shift count is not a vector type");
12116 271 : int64_t ElementBits = VT.getScalarSizeInBits();
12117 271 : if (! getVShiftImm(Op, ElementBits, Cnt))
12118 : return false;
12119 97 : return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits);
12120 : }
12121 :
12122 : /// isVShiftRImm - Check if this is a valid build_vector for the immediate
12123 : /// operand of a vector shift right operation. For a shift opcode, the value
12124 : /// is positive, but for an intrinsic the value count must be negative. The
12125 : /// absolute value must be in the range:
12126 : /// 1 <= |Value| <= ElementBits for a right shift; or
12127 : /// 1 <= |Value| <= ElementBits/2 for a narrow right shift.
12128 322 : static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
12129 : int64_t &Cnt) {
12130 : assert(VT.isVector() && "vector shift count is not a vector type");
12131 322 : int64_t ElementBits = VT.getScalarSizeInBits();
12132 322 : if (! getVShiftImm(Op, ElementBits, Cnt))
12133 : return false;
12134 137 : if (!isIntrinsic)
12135 53 : return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits));
12136 84 : if (Cnt >= -(isNarrow ? ElementBits/2 : ElementBits) && Cnt <= -1) {
12137 80 : Cnt = -Cnt;
12138 80 : return true;
12139 : }
12140 : return false;
12141 : }
12142 :
12143 : /// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
12144 2077 : static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
12145 6231 : unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
12146 : switch (IntNo) {
12147 : default:
12148 : // Don't do anything for most intrinsics.
12149 : break;
12150 :
12151 : // Vector shifts: check for immediate versions and lower them.
12152 : // Note: This is done during DAG combining instead of DAG legalizing because
12153 : // the build_vectors for 64-bit vector element shift counts are generally
12154 : // not legal, and it is hard to see their values after they get legalized to
12155 : // loads from a constant pool.
12156 314 : case Intrinsic::arm_neon_vshifts:
12157 : case Intrinsic::arm_neon_vshiftu:
12158 : case Intrinsic::arm_neon_vrshifts:
12159 : case Intrinsic::arm_neon_vrshiftu:
12160 : case Intrinsic::arm_neon_vrshiftn:
12161 : case Intrinsic::arm_neon_vqshifts:
12162 : case Intrinsic::arm_neon_vqshiftu:
12163 : case Intrinsic::arm_neon_vqshiftsu:
12164 : case Intrinsic::arm_neon_vqshiftns:
12165 : case Intrinsic::arm_neon_vqshiftnu:
12166 : case Intrinsic::arm_neon_vqshiftnsu:
12167 : case Intrinsic::arm_neon_vqrshiftns:
12168 : case Intrinsic::arm_neon_vqrshiftnu:
12169 : case Intrinsic::arm_neon_vqrshiftnsu: {
12170 628 : EVT VT = N->getOperand(1).getValueType();
12171 : int64_t Cnt;
12172 : unsigned VShiftOpc = 0;
12173 :
12174 : switch (IntNo) {
12175 170 : case Intrinsic::arm_neon_vshifts:
12176 : case Intrinsic::arm_neon_vshiftu:
12177 170 : if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
12178 : VShiftOpc = ARMISD::VSHL;
12179 : break;
12180 : }
12181 308 : if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
12182 16 : VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ?
12183 : ARMISD::VSHRs : ARMISD::VSHRu);
12184 : break;
12185 : }
12186 138 : return SDValue();
12187 :
12188 64 : case Intrinsic::arm_neon_vrshifts:
12189 : case Intrinsic::arm_neon_vrshiftu:
12190 64 : if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
12191 : break;
12192 32 : return SDValue();
12193 :
12194 48 : case Intrinsic::arm_neon_vqshifts:
12195 : case Intrinsic::arm_neon_vqshiftu:
12196 48 : if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
12197 : break;
12198 32 : return SDValue();
12199 :
12200 8 : case Intrinsic::arm_neon_vqshiftsu:
12201 8 : if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
12202 : break;
12203 0 : llvm_unreachable("invalid shift count for vqshlu intrinsic");
12204 :
12205 24 : case Intrinsic::arm_neon_vrshiftn:
12206 : case Intrinsic::arm_neon_vqshiftns:
12207 : case Intrinsic::arm_neon_vqshiftnu:
12208 : case Intrinsic::arm_neon_vqshiftnsu:
12209 : case Intrinsic::arm_neon_vqrshiftns:
12210 : case Intrinsic::arm_neon_vqrshiftnu:
12211 : case Intrinsic::arm_neon_vqrshiftnsu:
12212 : // Narrowing shifts require an immediate right shift.
12213 24 : if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
12214 : break;
12215 0 : llvm_unreachable("invalid shift count for narrowing vector shift "
12216 : "intrinsic");
12217 :
12218 0 : default:
12219 0 : llvm_unreachable("unhandled vector shift");
12220 : }
12221 :
12222 : switch (IntNo) {
12223 : case Intrinsic::arm_neon_vshifts:
12224 : case Intrinsic::arm_neon_vshiftu:
12225 : // Opcode already set above.
12226 : break;
12227 16 : case Intrinsic::arm_neon_vrshifts:
12228 16 : VShiftOpc = ARMISD::VRSHRs; break;
12229 16 : case Intrinsic::arm_neon_vrshiftu:
12230 16 : VShiftOpc = ARMISD::VRSHRu; break;
12231 3 : case Intrinsic::arm_neon_vrshiftn:
12232 3 : VShiftOpc = ARMISD::VRSHRN; break;
12233 8 : case Intrinsic::arm_neon_vqshifts:
12234 8 : VShiftOpc = ARMISD::VQSHLs; break;
12235 8 : case Intrinsic::arm_neon_vqshiftu:
12236 8 : VShiftOpc = ARMISD::VQSHLu; break;
12237 8 : case Intrinsic::arm_neon_vqshiftsu:
12238 8 : VShiftOpc = ARMISD::VQSHLsu; break;
12239 3 : case Intrinsic::arm_neon_vqshiftns:
12240 3 : VShiftOpc = ARMISD::VQSHRNs; break;
12241 4 : case Intrinsic::arm_neon_vqshiftnu:
12242 4 : VShiftOpc = ARMISD::VQSHRNu; break;
12243 3 : case Intrinsic::arm_neon_vqshiftnsu:
12244 3 : VShiftOpc = ARMISD::VQSHRNsu; break;
12245 4 : case Intrinsic::arm_neon_vqrshiftns:
12246 4 : VShiftOpc = ARMISD::VQRSHRNs; break;
12247 4 : case Intrinsic::arm_neon_vqrshiftnu:
12248 4 : VShiftOpc = ARMISD::VQRSHRNu; break;
12249 3 : case Intrinsic::arm_neon_vqrshiftnsu:
12250 3 : VShiftOpc = ARMISD::VQRSHRNsu; break;
12251 : }
12252 :
12253 : SDLoc dl(N);
12254 : return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
12255 112 : N->getOperand(1), DAG.getConstant(Cnt, dl, MVT::i32));
12256 : }
12257 :
12258 16 : case Intrinsic::arm_neon_vshiftins: {
12259 32 : EVT VT = N->getOperand(1).getValueType();
12260 : int64_t Cnt;
12261 : unsigned VShiftOpc = 0;
12262 :
12263 16 : if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
12264 : VShiftOpc = ARMISD::VSLI;
12265 16 : else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
12266 : VShiftOpc = ARMISD::VSRI;
12267 : else {
12268 0 : llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
12269 : }
12270 :
12271 : SDLoc dl(N);
12272 : return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
12273 16 : N->getOperand(1), N->getOperand(2),
12274 16 : DAG.getConstant(Cnt, dl, MVT::i32));
12275 : }
12276 :
12277 : case Intrinsic::arm_neon_vqrshifts:
12278 : case Intrinsic::arm_neon_vqrshiftu:
12279 : // No immediate versions of these to check for.
12280 : break;
12281 : }
12282 :
12283 1747 : return SDValue();
12284 : }
12285 :
12286 : /// PerformShiftCombine - Checks for immediate versions of vector shifts and
12287 : /// lowers them. As with the vector shift intrinsics, this is done during DAG
12288 : /// combining instead of DAG legalizing because the build_vectors for 64-bit
12289 : /// vector element shift counts are generally not legal, and it is hard to see
12290 : /// their values after they get legalized to loads from a constant pool.
12291 2647 : static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
12292 : const ARMSubtarget *ST) {
12293 2647 : EVT VT = N->getValueType(0);
12294 3865 : if (N->getOpcode() == ISD::SRL && VT == MVT::i32 && ST->hasV6Ops()) {
12295 : // Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high
12296 : // 16-bits of x is zero. This optimizes rev + lsr 16 to rev16.
12297 1218 : SDValue N1 = N->getOperand(1);
12298 : if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
12299 1200 : SDValue N0 = N->getOperand(0);
12300 2412 : if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP &&
12301 12 : DAG.MaskedValueIsZero(N0.getOperand(0),
12302 1221 : APInt::getHighBitsSet(32, 16)))
12303 6 : return DAG.getNode(ISD::ROTR, SDLoc(N), VT, N0, N1);
12304 : }
12305 : }
12306 :
12307 : // Nothing to be done for scalar shifts.
12308 2644 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12309 2644 : if (!VT.isVector() || !TLI.isTypeLegal(VT))
12310 2543 : return SDValue();
12311 :
12312 : assert(ST->hasNEON() && "unexpected vector shift");
12313 : int64_t Cnt;
12314 :
12315 202 : switch (N->getOpcode()) {
12316 0 : default: llvm_unreachable("unexpected shift opcode");
12317 :
12318 29 : case ISD::SHL:
12319 58 : if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) {
12320 : SDLoc dl(N);
12321 21 : return DAG.getNode(ARMISD::VSHL, dl, VT, N->getOperand(0),
12322 21 : DAG.getConstant(Cnt, dl, MVT::i32));
12323 8 : }
12324 : break;
12325 :
12326 72 : case ISD::SRA:
12327 : case ISD::SRL:
12328 144 : if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
12329 53 : unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ?
12330 : ARMISD::VSHRs : ARMISD::VSHRu);
12331 : SDLoc dl(N);
12332 53 : return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
12333 53 : DAG.getConstant(Cnt, dl, MVT::i32));
12334 19 : }
12335 : }
12336 27 : return SDValue();
12337 : }
12338 :
12339 : /// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
12340 : /// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
12341 1432 : static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
12342 : const ARMSubtarget *ST) {
12343 1432 : SDValue N0 = N->getOperand(0);
12344 :
12345 : // Check for sign- and zero-extensions of vector extract operations of 8-
12346 : // and 16-bit vector elements. NEON supports these directly. They are
12347 : // handled during DAG combining because type legalization will promote them
12348 : // to 32-bit types and it is messy to recognize the operations after that.
12349 1432 : if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
12350 20 : SDValue Vec = N0.getOperand(0);
12351 20 : SDValue Lane = N0.getOperand(1);
12352 40 : EVT VT = N->getValueType(0);
12353 : EVT EltVT = N0.getValueType();
12354 20 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12355 :
12356 : if (VT == MVT::i32 &&
12357 : (EltVT == MVT::i8 || EltVT == MVT::i16) &&
12358 : TLI.isTypeLegal(Vec.getValueType()) &&
12359 : isa<ConstantSDNode>(Lane)) {
12360 :
12361 : unsigned Opc = 0;
12362 18 : switch (N->getOpcode()) {
12363 0 : default: llvm_unreachable("unexpected opcode");
12364 : case ISD::SIGN_EXTEND:
12365 : Opc = ARMISD::VGETLANEs;
12366 : break;
12367 5 : case ISD::ZERO_EXTEND:
12368 : case ISD::ANY_EXTEND:
12369 : Opc = ARMISD::VGETLANEu;
12370 5 : break;
12371 : }
12372 18 : return DAG.getNode(Opc, SDLoc(N), VT, Vec, Lane);
12373 : }
12374 : }
12375 :
12376 1423 : return SDValue();
12377 : }
12378 :
12379 0 : static const APInt *isPowerOf2Constant(SDValue V) {
12380 : ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
12381 : if (!C)
12382 0 : return nullptr;
12383 169 : const APInt *CV = &C->getAPIntValue();
12384 169 : return CV->isPowerOf2() ? CV : nullptr;
12385 : }
12386 :
12387 593 : SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &DAG) const {
12388 : // If we have a CMOV, OR and AND combination such as:
12389 : // if (x & CN)
12390 : // y |= CM;
12391 : //
12392 : // And:
12393 : // * CN is a single bit;
12394 : // * All bits covered by CM are known zero in y
12395 : //
12396 : // Then we can convert this into a sequence of BFI instructions. This will
12397 : // always be a win if CM is a single bit, will always be no worse than the
12398 : // TST&OR sequence if CM is two bits, and for thumb will be no worse if CM is
12399 : // three bits (due to the extra IT instruction).
12400 :
12401 593 : SDValue Op0 = CMOV->getOperand(0);
12402 593 : SDValue Op1 = CMOV->getOperand(1);
12403 : auto CCNode = cast<ConstantSDNode>(CMOV->getOperand(2));
12404 593 : auto CC = CCNode->getAPIntValue().getLimitedValue();
12405 593 : SDValue CmpZ = CMOV->getOperand(4);
12406 :
12407 : // The compare must be against zero.
12408 1186 : if (!isNullConstant(CmpZ->getOperand(1)))
12409 170 : return SDValue();
12410 :
12411 : assert(CmpZ->getOpcode() == ARMISD::CMPZ);
12412 423 : SDValue And = CmpZ->getOperand(0);
12413 423 : if (And->getOpcode() != ISD::AND)
12414 269 : return SDValue();
12415 154 : const APInt *AndC = isPowerOf2Constant(And->getOperand(1));
12416 109 : if (!AndC)
12417 45 : return SDValue();
12418 109 : SDValue X = And->getOperand(0);
12419 :
12420 109 : if (CC == ARMCC::EQ) {
12421 : // We're performing an "equal to zero" compare. Swap the operands so we
12422 : // canonicalize on a "not equal to zero" compare.
12423 : std::swap(Op0, Op1);
12424 : } else {
12425 : assert(CC == ARMCC::NE && "How can a CMPZ node not be EQ or NE?");
12426 : }
12427 :
12428 109 : if (Op1->getOpcode() != ISD::OR)
12429 97 : return SDValue();
12430 :
12431 12 : ConstantSDNode *OrC = dyn_cast<ConstantSDNode>(Op1->getOperand(1));
12432 : if (!OrC)
12433 0 : return SDValue();
12434 12 : SDValue Y = Op1->getOperand(0);
12435 :
12436 : if (Op0 != Y)
12437 0 : return SDValue();
12438 :
12439 : // Now, is it profitable to continue?
12440 12 : APInt OrCI = OrC->getAPIntValue();
12441 12 : unsigned Heuristic = Subtarget->isThumb() ? 3 : 2;
12442 12 : if (OrCI.countPopulation() > Heuristic)
12443 3 : return SDValue();
12444 :
12445 : // Lastly, can we determine that the bits defined by OrCI
12446 : // are zero in Y?
12447 9 : KnownBits Known;
12448 9 : DAG.computeKnownBits(Y, Known);
12449 9 : if ((OrCI & Known.Zero) != OrCI)
12450 1 : return SDValue();
12451 :
12452 : // OK, we can do the combine.
12453 8 : SDValue V = Y;
12454 : SDLoc dl(X);
12455 8 : EVT VT = X.getValueType();
12456 : unsigned BitInX = AndC->logBase2();
12457 :
12458 8 : if (BitInX != 0) {
12459 : // We must shift X first.
12460 8 : X = DAG.getNode(ISD::SRL, dl, VT, X,
12461 8 : DAG.getConstant(BitInX, dl, VT));
12462 : }
12463 :
12464 45 : for (unsigned BitInY = 0, NumActiveBits = OrCI.getActiveBits();
12465 53 : BitInY < NumActiveBits; ++BitInY) {
12466 45 : if (OrCI[BitInY] == 0)
12467 36 : continue;
12468 9 : APInt Mask(VT.getSizeInBits(), 0);
12469 : Mask.setBit(BitInY);
12470 9 : V = DAG.getNode(ARMISD::BFI, dl, VT, V, X,
12471 : // Confusingly, the operand is an *inverted* mask.
12472 18 : DAG.getConstant(~Mask, dl, VT));
12473 : }
12474 :
12475 8 : return V;
12476 : }
12477 :
12478 : /// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
12479 : SDValue
12480 1983 : ARMTargetLowering::PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const {
12481 1983 : SDValue Cmp = N->getOperand(4);
12482 1983 : if (Cmp.getOpcode() != ARMISD::CMPZ)
12483 : // Only looking at NE cases.
12484 361 : return SDValue();
12485 :
12486 3244 : EVT VT = N->getValueType(0);
12487 : SDLoc dl(N);
12488 1622 : SDValue LHS = Cmp.getOperand(0);
12489 1622 : SDValue RHS = Cmp.getOperand(1);
12490 1622 : SDValue Chain = N->getOperand(0);
12491 1622 : SDValue BB = N->getOperand(1);
12492 1622 : SDValue ARMcc = N->getOperand(2);
12493 : ARMCC::CondCodes CC =
12494 1622 : (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
12495 :
12496 : // (brcond Chain BB ne CPSR (cmpz (and (cmov 0 1 CC CPSR Cmp) 1) 0))
12497 : // -> (brcond Chain BB CC CPSR Cmp)
12498 1043 : if (CC == ARMCC::NE && LHS.getOpcode() == ISD::AND && LHS->hasOneUse() &&
12499 1693 : LHS->getOperand(0)->getOpcode() == ARMISD::CMOV &&
12500 : LHS->getOperand(0)->hasOneUse()) {
12501 18 : auto *LHS00C = dyn_cast<ConstantSDNode>(LHS->getOperand(0)->getOperand(0));
12502 : auto *LHS01C = dyn_cast<ConstantSDNode>(LHS->getOperand(0)->getOperand(1));
12503 : auto *LHS1C = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
12504 : auto *RHSC = dyn_cast<ConstantSDNode>(RHS);
12505 36 : if ((LHS00C && LHS00C->getZExtValue() == 0) &&
12506 36 : (LHS01C && LHS01C->getZExtValue() == 1) &&
12507 54 : (LHS1C && LHS1C->getZExtValue() == 1) &&
12508 36 : (RHSC && RHSC->getZExtValue() == 0)) {
12509 : return DAG.getNode(
12510 : ARMISD::BRCOND, dl, VT, Chain, BB, LHS->getOperand(0)->getOperand(2),
12511 18 : LHS->getOperand(0)->getOperand(3), LHS->getOperand(0)->getOperand(4));
12512 : }
12513 : }
12514 :
12515 1604 : return SDValue();
12516 : }
12517 :
12518 : /// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
12519 : SDValue
12520 2310 : ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
12521 2310 : SDValue Cmp = N->getOperand(4);
12522 2310 : if (Cmp.getOpcode() != ARMISD::CMPZ)
12523 : // Only looking at EQ and NE cases.
12524 1298 : return SDValue();
12525 :
12526 2024 : EVT VT = N->getValueType(0);
12527 : SDLoc dl(N);
12528 1012 : SDValue LHS = Cmp.getOperand(0);
12529 1012 : SDValue RHS = Cmp.getOperand(1);
12530 1012 : SDValue FalseVal = N->getOperand(0);
12531 1012 : SDValue TrueVal = N->getOperand(1);
12532 1012 : SDValue ARMcc = N->getOperand(2);
12533 : ARMCC::CondCodes CC =
12534 1012 : (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
12535 :
12536 : // BFI is only available on V6T2+.
12537 1404 : if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) {
12538 593 : SDValue R = PerformCMOVToBFICombine(N, DAG);
12539 593 : if (R)
12540 8 : return R;
12541 : }
12542 :
12543 : // Simplify
12544 : // mov r1, r0
12545 : // cmp r1, x
12546 : // mov r0, y
12547 : // moveq r0, x
12548 : // to
12549 : // cmp r0, x
12550 : // movne r0, y
12551 : //
12552 : // mov r1, r0
12553 : // cmp r1, x
12554 : // mov r0, x
12555 : // movne r0, y
12556 : // to
12557 : // cmp r0, x
12558 : // movne r0, y
12559 : /// FIXME: Turn this into a target neutral optimization?
12560 : SDValue Res;
12561 1004 : if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) {
12562 186 : Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc,
12563 372 : N->getOperand(3), Cmp);
12564 818 : } else if (CC == ARMCC::EQ && TrueVal == RHS) {
12565 8 : SDValue ARMcc;
12566 8 : SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl);
12567 8 : Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc,
12568 16 : N->getOperand(3), NewCmp);
12569 : }
12570 :
12571 : // (cmov F T ne CPSR (cmpz (cmov 0 1 CC CPSR Cmp) 0))
12572 : // -> (cmov F T CC CPSR Cmp)
12573 1004 : if (CC == ARMCC::NE && LHS.getOpcode() == ARMISD::CMOV && LHS->hasOneUse()) {
12574 6 : auto *LHS0C = dyn_cast<ConstantSDNode>(LHS->getOperand(0));
12575 : auto *LHS1C = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
12576 : auto *RHSC = dyn_cast<ConstantSDNode>(RHS);
12577 12 : if ((LHS0C && LHS0C->getZExtValue() == 0) &&
12578 18 : (LHS1C && LHS1C->getZExtValue() == 1) &&
12579 12 : (RHSC && RHSC->getZExtValue() == 0)) {
12580 : return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
12581 : LHS->getOperand(2), LHS->getOperand(3),
12582 6 : LHS->getOperand(4));
12583 : }
12584 : }
12585 :
12586 998 : if (!VT.isInteger())
12587 15 : return SDValue();
12588 :
12589 : // Materialize a boolean comparison for integers so we can avoid branching.
12590 983 : if (isNullConstant(FalseVal)) {
12591 394 : if (CC == ARMCC::EQ && isOneConstant(TrueVal)) {
12592 250 : if (!Subtarget->isThumb1Only() && Subtarget->hasV5TOps()) {
12593 : // If x == y then x - y == 0 and ARM's CLZ will return 32, shifting it
12594 : // right 5 bits will make that 32 be 1, otherwise it will be 0.
12595 : // CMOV 0, 1, ==, (CMPZ x, y) -> SRL (CTLZ (SUB x, y)), 5
12596 122 : SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
12597 122 : Res = DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::CTLZ, dl, VT, Sub),
12598 122 : DAG.getConstant(5, dl, MVT::i32));
12599 : } else {
12600 : // CMOV 0, 1, ==, (CMPZ x, y) ->
12601 : // (ADDCARRY (SUB x, y), t:0, t:1)
12602 : // where t = (SUBCARRY 0, (SUB x, y), 0)
12603 : //
12604 : // The SUBCARRY computes 0 - (x - y) and this will give a borrow when
12605 : // x != y. In other words, a carry C == 1 when x == y, C == 0
12606 : // otherwise.
12607 : // The final ADDCARRY computes
12608 : // x - y + (0 - (x - y)) + C == C
12609 54 : SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
12610 54 : SDVTList VTs = DAG.getVTList(VT, MVT::i32);
12611 54 : SDValue Neg = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, Sub);
12612 : // ISD::SUBCARRY returns a borrow but we want the carry here
12613 : // actually.
12614 : SDValue Carry =
12615 : DAG.getNode(ISD::SUB, dl, MVT::i32,
12616 108 : DAG.getConstant(1, dl, MVT::i32), Neg.getValue(1));
12617 54 : Res = DAG.getNode(ISD::ADDCARRY, dl, VTs, Sub, Neg, Carry);
12618 : }
12619 218 : } else if (CC == ARMCC::NE && LHS != RHS &&
12620 238 : (!Subtarget->isThumb1Only() || isPowerOf2Constant(TrueVal))) {
12621 : // This seems pointless but will allow us to combine it further below.
12622 : // CMOV 0, z, !=, (CMPZ x, y) -> CMOV (SUB x, y), z, !=, (CMPZ x, y)
12623 214 : SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
12624 214 : Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, TrueVal, ARMcc,
12625 428 : N->getOperand(3), Cmp);
12626 : }
12627 589 : } else if (isNullConstant(TrueVal)) {
12628 14 : if (CC == ARMCC::EQ && LHS != RHS &&
12629 9 : (!Subtarget->isThumb1Only() || isPowerOf2Constant(FalseVal))) {
12630 : // This seems pointless but will allow us to combine it further below
12631 : // Note that we change == for != as this is the dual for the case above.
12632 : // CMOV z, 0, ==, (CMPZ x, y) -> CMOV (SUB x, y), z, !=, (CMPZ x, y)
12633 8 : SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
12634 8 : Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, FalseVal,
12635 : DAG.getConstant(ARMCC::NE, dl, MVT::i32),
12636 16 : N->getOperand(3), Cmp);
12637 : }
12638 : }
12639 :
12640 : // On Thumb1, the DAG above may be further combined if z is a power of 2
12641 : // (z == 2 ^ K).
12642 : // CMOV (SUB x, y), z, !=, (CMPZ x, y) ->
12643 : // merge t3, t4
12644 : // where t1 = (SUBCARRY (SUB x, y), z, 0)
12645 : // t2 = (SUBCARRY (SUB x, y), t1:0, t1:1)
12646 : // t3 = if K != 0 then (SHL t2:0, K) else t2:0
12647 : // t4 = (SUB 1, t2:1) [ we want a carry, not a borrow ]
12648 : const APInt *TrueConst;
12649 1106 : if (Subtarget->isThumb1Only() && CC == ARMCC::NE &&
12650 8 : (FalseVal.getOpcode() == ISD::SUB) && (FalseVal.getOperand(0) == LHS) &&
12651 16 : (FalseVal.getOperand(1) == RHS) &&
12652 : (TrueConst = isPowerOf2Constant(TrueVal))) {
12653 8 : SDVTList VTs = DAG.getVTList(VT, MVT::i32);
12654 : unsigned ShiftAmount = TrueConst->logBase2();
12655 8 : if (ShiftAmount)
12656 1 : TrueVal = DAG.getConstant(1, dl, VT);
12657 8 : SDValue Subc = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, TrueVal);
12658 8 : Res = DAG.getNode(ISD::SUBCARRY, dl, VTs, FalseVal, Subc, Subc.getValue(1));
12659 : // Make it a carry, not a borrow.
12660 : SDValue Carry = DAG.getNode(
12661 8 : ISD::SUB, dl, VT, DAG.getConstant(1, dl, MVT::i32), Res.getValue(1));
12662 8 : Res = DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Res, Carry);
12663 :
12664 8 : if (ShiftAmount)
12665 1 : Res = DAG.getNode(ISD::SHL, dl, VT, Res,
12666 1 : DAG.getConstant(ShiftAmount, dl, MVT::i32));
12667 : }
12668 :
12669 983 : if (Res.getNode()) {
12670 407 : KnownBits Known;
12671 407 : DAG.computeKnownBits(SDValue(N,0), Known);
12672 : // Capture demanded bits information that would be otherwise lost.
12673 407 : if (Known.Zero == 0xfffffffe)
12674 352 : Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
12675 352 : DAG.getValueType(MVT::i1));
12676 55 : else if (Known.Zero == 0xffffff00)
12677 2 : Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
12678 2 : DAG.getValueType(MVT::i8));
12679 53 : else if (Known.Zero == 0xffff0000)
12680 2 : Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
12681 2 : DAG.getValueType(MVT::i16));
12682 : }
12683 :
12684 983 : return Res;
12685 : }
12686 :
12687 165647 : SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
12688 : DAGCombinerInfo &DCI) const {
12689 331294 : switch (N->getOpcode()) {
12690 : default: break;
12691 1955 : case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget);
12692 44 : case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget);
12693 21114 : case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);
12694 1528 : case ISD::SUB: return PerformSUBCombine(N, DCI);
12695 1753 : case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
12696 2981 : case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
12697 1514 : case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
12698 5014 : case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
12699 2289 : case ARMISD::ADDC:
12700 2289 : case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI, Subtarget);
12701 312 : case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI, Subtarget);
12702 50 : case ARMISD::BFI: return PerformBFICombine(N, DCI);
12703 6799 : case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);
12704 2878 : case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
12705 19167 : case ISD::STORE: return PerformSTORECombine(N, DCI);
12706 1492 : case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget);
12707 1319 : case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
12708 565 : case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
12709 103 : case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
12710 103 : case ARMISD::VDUP: return PerformVDUPCombine(N, DCI);
12711 352 : case ISD::FP_TO_SINT:
12712 : case ISD::FP_TO_UINT:
12713 352 : return PerformVCVTCombine(N, DCI.DAG, Subtarget);
12714 367 : case ISD::FDIV:
12715 367 : return PerformVDIVCombine(N, DCI.DAG, Subtarget);
12716 2077 : case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
12717 2647 : case ISD::SHL:
12718 : case ISD::SRA:
12719 2647 : case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget);
12720 1432 : case ISD::SIGN_EXTEND:
12721 : case ISD::ZERO_EXTEND:
12722 1432 : case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
12723 2310 : case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG);
12724 1983 : case ARMISD::BRCOND: return PerformBRCONDCombine(N, DCI.DAG);
12725 20782 : case ISD::LOAD: return PerformLOADCombine(N, DCI);
12726 : case ARMISD::VLD1DUP:
12727 : case ARMISD::VLD2DUP:
12728 : case ARMISD::VLD3DUP:
12729 : case ARMISD::VLD4DUP:
12730 : return PerformVLDCombine(N, DCI);
12731 542 : case ARMISD::BUILD_VECTOR:
12732 542 : return PerformARMBUILD_VECTORCombine(N, DCI);
12733 42 : case ARMISD::SMULWB: {
12734 42 : unsigned BitWidth = N->getValueType(0).getSizeInBits();
12735 42 : APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
12736 84 : if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
12737 6 : return SDValue();
12738 : break;
12739 : }
12740 12 : case ARMISD::SMULWT: {
12741 12 : unsigned BitWidth = N->getValueType(0).getSizeInBits();
12742 12 : APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
12743 24 : if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
12744 0 : return SDValue();
12745 : break;
12746 : }
12747 23 : case ARMISD::SMLALBB: {
12748 23 : unsigned BitWidth = N->getValueType(0).getSizeInBits();
12749 23 : APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
12750 61 : if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
12751 30 : (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
12752 16 : return SDValue();
12753 : break;
12754 : }
12755 22 : case ARMISD::SMLALBT: {
12756 22 : unsigned LowWidth = N->getOperand(0).getValueType().getSizeInBits();
12757 22 : APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
12758 22 : unsigned HighWidth = N->getOperand(1).getValueType().getSizeInBits();
12759 22 : APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
12760 58 : if ((SimplifyDemandedBits(N->getOperand(0), LowMask, DCI)) ||
12761 28 : (SimplifyDemandedBits(N->getOperand(1), HighMask, DCI)))
12762 8 : return SDValue();
12763 : break;
12764 : }
12765 22 : case ARMISD::SMLALTB: {
12766 22 : unsigned HighWidth = N->getOperand(0).getValueType().getSizeInBits();
12767 22 : APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
12768 22 : unsigned LowWidth = N->getOperand(1).getValueType().getSizeInBits();
12769 22 : APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
12770 66 : if ((SimplifyDemandedBits(N->getOperand(0), HighMask, DCI)) ||
12771 44 : (SimplifyDemandedBits(N->getOperand(1), LowMask, DCI)))
12772 8 : return SDValue();
12773 : break;
12774 : }
12775 7 : case ARMISD::SMLALTT: {
12776 7 : unsigned BitWidth = N->getValueType(0).getSizeInBits();
12777 7 : APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
12778 21 : if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
12779 14 : (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
12780 0 : return SDValue();
12781 : break;
12782 : }
12783 3806 : case ISD::INTRINSIC_VOID:
12784 : case ISD::INTRINSIC_W_CHAIN:
12785 7612 : switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
12786 : case Intrinsic::arm_neon_vld1:
12787 : case Intrinsic::arm_neon_vld1x2:
12788 : case Intrinsic::arm_neon_vld1x3:
12789 : case Intrinsic::arm_neon_vld1x4:
12790 : case Intrinsic::arm_neon_vld2:
12791 : case Intrinsic::arm_neon_vld3:
12792 : case Intrinsic::arm_neon_vld4:
12793 : case Intrinsic::arm_neon_vld2lane:
12794 : case Intrinsic::arm_neon_vld3lane:
12795 : case Intrinsic::arm_neon_vld4lane:
12796 : case Intrinsic::arm_neon_vld2dup:
12797 : case Intrinsic::arm_neon_vld3dup:
12798 : case Intrinsic::arm_neon_vld4dup:
12799 : case Intrinsic::arm_neon_vst1:
12800 : case Intrinsic::arm_neon_vst1x2:
12801 : case Intrinsic::arm_neon_vst1x3:
12802 : case Intrinsic::arm_neon_vst1x4:
12803 : case Intrinsic::arm_neon_vst2:
12804 : case Intrinsic::arm_neon_vst3:
12805 : case Intrinsic::arm_neon_vst4:
12806 : case Intrinsic::arm_neon_vst2lane:
12807 : case Intrinsic::arm_neon_vst3lane:
12808 : case Intrinsic::arm_neon_vst4lane:
12809 : return PerformVLDCombine(N, DCI);
12810 : default: break;
12811 : }
12812 : break;
12813 : }
12814 61069 : return SDValue();
12815 : }
12816 :
12817 6 : bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc,
12818 : EVT VT) const {
12819 6 : return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
12820 : }
12821 :
12822 3390 : bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
12823 : unsigned,
12824 : unsigned,
12825 : bool *Fast) const {
12826 : // Depends what it gets converted into if the type is weird.
12827 3390 : if (!VT.isSimple())
12828 : return false;
12829 :
12830 : // The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus
12831 3389 : bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
12832 :
12833 3389 : switch (VT.getSimpleVT().SimpleTy) {
12834 : default:
12835 : return false;
12836 2361 : case MVT::i8:
12837 : case MVT::i16:
12838 : case MVT::i32: {
12839 : // Unaligned access can use (for example) LRDB, LRDH, LDR
12840 2361 : if (AllowsUnaligned) {
12841 1997 : if (Fast)
12842 1313 : *Fast = Subtarget->hasV7Ops();
12843 1997 : return true;
12844 : }
12845 : return false;
12846 : }
12847 674 : case MVT::f64:
12848 : case MVT::v2f64: {
12849 : // For any little-endian targets with neon, we can support unaligned ld/st
12850 : // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.
12851 : // A big-endian target may also explicitly support unaligned accesses
12852 674 : if (Subtarget->hasNEON() && (AllowsUnaligned || Subtarget->isLittle())) {
12853 670 : if (Fast)
12854 68 : *Fast = true;
12855 670 : return true;
12856 : }
12857 : return false;
12858 : }
12859 : }
12860 : }
12861 :
12862 : static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
12863 : unsigned AlignCheck) {
12864 68 : return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) &&
12865 19 : (DstAlign == 0 || DstAlign % AlignCheck == 0));
12866 : }
12867 :
12868 387 : EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size,
12869 : unsigned DstAlign, unsigned SrcAlign,
12870 : bool IsMemset, bool ZeroMemset,
12871 : bool MemcpyStrSrc,
12872 : MachineFunction &MF) const {
12873 387 : const Function &F = MF.getFunction();
12874 :
12875 : // See if we can use NEON instructions for this...
12876 472 : if ((!IsMemset || ZeroMemset) && Subtarget->hasNEON() &&
12877 : !F.hasFnAttribute(Attribute::NoImplicitFloat)) {
12878 : bool Fast;
12879 85 : if (Size >= 16 &&
12880 46 : (memOpAlign(SrcAlign, DstAlign, 16) ||
12881 138 : (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, 1, &Fast) && Fast))) {
12882 48 : return MVT::v2f64;
12883 37 : } else if (Size >= 8 &&
12884 7 : (memOpAlign(SrcAlign, DstAlign, 8) ||
12885 38 : (allowsMisalignedMemoryAccesses(MVT::f64, 0, 1, &Fast) &&
12886 : Fast))) {
12887 20 : return MVT::f64;
12888 : }
12889 : }
12890 :
12891 : // Let the target-independent logic figure it out.
12892 319 : return MVT::Other;
12893 : }
12894 :
12895 : // 64-bit integers are split into their high and low parts and held in two
12896 : // different registers, so the trunc is free since the low register can just
12897 : // be used.
12898 575 : bool ARMTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
12899 575 : if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
12900 : return false;
12901 565 : unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
12902 565 : unsigned DestBits = DstTy->getPrimitiveSizeInBits();
12903 565 : return (SrcBits == 64 && DestBits == 32);
12904 : }
12905 :
12906 4893 : bool ARMTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
12907 9785 : if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
12908 4884 : !DstVT.isInteger())
12909 : return false;
12910 4884 : unsigned SrcBits = SrcVT.getSizeInBits();
12911 4884 : unsigned DestBits = DstVT.getSizeInBits();
12912 4884 : return (SrcBits == 64 && DestBits == 32);
12913 : }
12914 :
12915 3931 : bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
12916 3931 : if (Val.getOpcode() != ISD::LOAD)
12917 : return false;
12918 :
12919 460 : EVT VT1 = Val.getValueType();
12920 460 : if (!VT1.isSimple() || !VT1.isInteger() ||
12921 799 : !VT2.isSimple() || !VT2.isInteger())
12922 : return false;
12923 :
12924 339 : switch (VT1.getSimpleVT().SimpleTy) {
12925 : default: break;
12926 : case MVT::i1:
12927 : case MVT::i8:
12928 : case MVT::i16:
12929 : // 8-bit and 16-bit loads implicitly zero-extend to 32-bits.
12930 : return true;
12931 : }
12932 :
12933 314 : return false;
12934 : }
12935 :
12936 437 : bool ARMTargetLowering::isFNegFree(EVT VT) const {
12937 437 : if (!VT.isSimple())
12938 : return false;
12939 :
12940 : // There are quite a few FP16 instructions (e.g. VNMLA, VNMLS, etc.) that
12941 : // negate values directly (fneg is free). So, we don't want to let the DAG
12942 : // combiner rewrite fneg into xors and some other instructions. For f16 and
12943 : // FullFP16 argument passing, some bitcast nodes may be introduced,
12944 : // triggering this DAG combine rewrite, so we are avoiding that with this.
12945 437 : switch (VT.getSimpleVT().SimpleTy) {
12946 : default: break;
12947 144 : case MVT::f16:
12948 144 : return Subtarget->hasFullFP16();
12949 : }
12950 :
12951 : return false;
12952 : }
12953 :
12954 198 : bool ARMTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
12955 198 : EVT VT = ExtVal.getValueType();
12956 :
12957 : if (!isTypeLegal(VT))
12958 : return false;
12959 :
12960 : // Don't create a loadext if we can fold the extension into a wide/long
12961 : // instruction.
12962 : // If there's more than one user instruction, the loadext is desirable no
12963 : // matter what. There can be two uses by the same instruction.
12964 388 : if (ExtVal->use_empty() ||
12965 194 : !ExtVal->use_begin()->isOnlyUserOf(ExtVal.getNode()))
12966 13 : return true;
12967 :
12968 181 : SDNode *U = *ExtVal->use_begin();
12969 181 : if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB ||
12970 281 : U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHL))
12971 93 : return false;
12972 :
12973 : return true;
12974 : }
12975 :
12976 7 : bool ARMTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
12977 7 : if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
12978 : return false;
12979 :
12980 7 : if (!isTypeLegal(EVT::getEVT(Ty1)))
12981 1 : return false;
12982 :
12983 : assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop");
12984 :
12985 : // Assuming the caller doesn't have a zeroext or signext return parameter,
12986 : // truncation all the way down to i1 is valid.
12987 : return true;
12988 : }
12989 :
12990 4630 : int ARMTargetLowering::getScalingFactorCost(const DataLayout &DL,
12991 : const AddrMode &AM, Type *Ty,
12992 : unsigned AS) const {
12993 4630 : if (isLegalAddressingMode(DL, AM, Ty, AS)) {
12994 4630 : if (Subtarget->hasFPAO())
12995 120 : return AM.Scale < 0 ? 1 : 0; // positive offsets execute faster
12996 : return 0;
12997 : }
12998 : return -1;
12999 : }
13000 :
13001 0 : static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
13002 0 : if (V < 0)
13003 0 : return false;
13004 :
13005 : unsigned Scale = 1;
13006 0 : switch (VT.getSimpleVT().SimpleTy) {
13007 : default: return false;
13008 : case MVT::i1:
13009 : case MVT::i8:
13010 : // Scale == 1;
13011 : break;
13012 0 : case MVT::i16:
13013 : // Scale == 2;
13014 : Scale = 2;
13015 0 : break;
13016 0 : case MVT::i32:
13017 : // Scale == 4;
13018 : Scale = 4;
13019 0 : break;
13020 : }
13021 :
13022 0 : if ((V & (Scale - 1)) != 0)
13023 0 : return false;
13024 0 : V /= Scale;
13025 0 : return V == (V & ((1LL << 5) - 1));
13026 : }
13027 :
13028 0 : static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
13029 : const ARMSubtarget *Subtarget) {
13030 : bool isNeg = false;
13031 0 : if (V < 0) {
13032 : isNeg = true;
13033 0 : V = - V;
13034 : }
13035 :
13036 0 : switch (VT.getSimpleVT().SimpleTy) {
13037 : default: return false;
13038 0 : case MVT::i1:
13039 : case MVT::i8:
13040 : case MVT::i16:
13041 : case MVT::i32:
13042 : // + imm12 or - imm8
13043 0 : if (isNeg)
13044 0 : return V == (V & ((1LL << 8) - 1));
13045 0 : return V == (V & ((1LL << 12) - 1));
13046 0 : case MVT::f32:
13047 : case MVT::f64:
13048 : // Same as ARM mode. FIXME: NEON?
13049 0 : if (!Subtarget->hasVFP2())
13050 0 : return false;
13051 0 : if ((V & 3) != 0)
13052 0 : return false;
13053 0 : V >>= 2;
13054 0 : return V == (V & ((1LL << 8) - 1));
13055 : }
13056 : }
13057 :
13058 : /// isLegalAddressImmediate - Return true if the integer value can be used
13059 : /// as the offset of the target addressing mode for load / store of the
13060 : /// given type.
13061 0 : static bool isLegalAddressImmediate(int64_t V, EVT VT,
13062 : const ARMSubtarget *Subtarget) {
13063 0 : if (V == 0)
13064 0 : return true;
13065 :
13066 0 : if (!VT.isSimple())
13067 0 : return false;
13068 :
13069 0 : if (Subtarget->isThumb1Only())
13070 : return isLegalT1AddressImmediate(V, VT);
13071 : else if (Subtarget->isThumb2())
13072 0 : return isLegalT2AddressImmediate(V, VT, Subtarget);
13073 :
13074 : // ARM mode.
13075 0 : if (V < 0)
13076 0 : V = - V;
13077 0 : switch (VT.getSimpleVT().SimpleTy) {
13078 : default: return false;
13079 0 : case MVT::i1:
13080 : case MVT::i8:
13081 : case MVT::i32:
13082 : // +- imm12
13083 0 : return V == (V & ((1LL << 12) - 1));
13084 0 : case MVT::i16:
13085 : // +- imm8
13086 0 : return V == (V & ((1LL << 8) - 1));
13087 0 : case MVT::f32:
13088 : case MVT::f64:
13089 0 : if (!Subtarget->hasVFP2()) // FIXME: NEON?
13090 0 : return false;
13091 0 : if ((V & 3) != 0)
13092 0 : return false;
13093 0 : V >>= 2;
13094 0 : return V == (V & ((1LL << 8) - 1));
13095 : }
13096 : }
13097 :
13098 9390 : bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM,
13099 : EVT VT) const {
13100 9390 : int Scale = AM.Scale;
13101 9390 : if (Scale < 0)
13102 : return false;
13103 :
13104 8847 : switch (VT.getSimpleVT().SimpleTy) {
13105 : default: return false;
13106 8578 : case MVT::i1:
13107 : case MVT::i8:
13108 : case MVT::i16:
13109 : case MVT::i32:
13110 8578 : if (Scale == 1)
13111 : return true;
13112 : // r + r << imm
13113 3128 : Scale = Scale & ~1;
13114 3128 : return Scale == 2 || Scale == 4 || Scale == 8;
13115 191 : case MVT::i64:
13116 : // FIXME: What are we trying to model here? ldrd doesn't have an r + r
13117 : // version in Thumb mode.
13118 : // r + r
13119 191 : if (Scale == 1)
13120 : return true;
13121 : // r * 2 (this can be lowered to r + r).
13122 11 : if (!AM.HasBaseReg && Scale == 2)
13123 0 : return true;
13124 : return false;
13125 0 : case MVT::isVoid:
13126 : // Note, we allow "void" uses (basically, uses that aren't loads or
13127 : // stores), because arm allows folding a scale into many arithmetic
13128 : // operations. This should be made more precise and revisited later.
13129 :
13130 : // Allow r << imm, but the imm has to be a multiple of two.
13131 0 : if (Scale & 1) return false;
13132 0 : return isPowerOf2_32(Scale);
13133 : }
13134 : }
13135 :
13136 2498 : bool ARMTargetLowering::isLegalT1ScaledAddressingMode(const AddrMode &AM,
13137 : EVT VT) const {
13138 2498 : const int Scale = AM.Scale;
13139 :
13140 : // Negative scales are not supported in Thumb1.
13141 2498 : if (Scale < 0)
13142 : return false;
13143 :
13144 : // Thumb1 addressing modes do not support register scaling excepting the
13145 : // following cases:
13146 : // 1. Scale == 1 means no scaling.
13147 : // 2. Scale == 2 this can be lowered to r + r if there is no base register.
13148 2353 : return (Scale == 1) || (!AM.HasBaseReg && Scale == 2);
13149 : }
13150 :
13151 : /// isLegalAddressingMode - Return true if the addressing mode represented
13152 : /// by AM is legal for this target, for a load/store of the specified type.
13153 45604 : bool ARMTargetLowering::isLegalAddressingMode(const DataLayout &DL,
13154 : const AddrMode &AM, Type *Ty,
13155 : unsigned AS, Instruction *I) const {
13156 45604 : EVT VT = getValueType(DL, Ty, true);
13157 45604 : if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
13158 : return false;
13159 :
13160 : // Can never fold addr of global into load/store.
13161 45041 : if (AM.BaseGV)
13162 : return false;
13163 :
13164 43459 : switch (AM.Scale) {
13165 : case 0: // no scale reg, must be "r+i" or "r", or "i".
13166 : break;
13167 22612 : default:
13168 : // ARM doesn't support any R+R*scale+imm addr modes.
13169 22612 : if (AM.BaseOffs)
13170 : return false;
13171 :
13172 20001 : if (!VT.isSimple())
13173 : return false;
13174 :
13175 19989 : if (Subtarget->isThumb1Only())
13176 2498 : return isLegalT1ScaledAddressingMode(AM, VT);
13177 :
13178 : if (Subtarget->isThumb2())
13179 9390 : return isLegalT2ScaledAddressingMode(AM, VT);
13180 :
13181 8101 : int Scale = AM.Scale;
13182 8101 : switch (VT.getSimpleVT().SimpleTy) {
13183 : default: return false;
13184 7698 : case MVT::i1:
13185 : case MVT::i8:
13186 : case MVT::i32:
13187 7698 : if (Scale < 0) Scale = -Scale;
13188 7698 : if (Scale == 1)
13189 : return true;
13190 : // r + r << imm
13191 4272 : return isPowerOf2_32(Scale & ~1);
13192 22 : case MVT::i16:
13193 : case MVT::i64:
13194 : // r +/- r
13195 22 : if (Scale == 1 || (AM.HasBaseReg && Scale == -1))
13196 : return true;
13197 : // r * 2 (this can be lowered to r + r).
13198 16 : if (!AM.HasBaseReg && Scale == 2)
13199 0 : return true;
13200 : return false;
13201 :
13202 0 : case MVT::isVoid:
13203 : // Note, we allow "void" uses (basically, uses that aren't loads or
13204 : // stores), because arm allows folding a scale into many arithmetic
13205 : // operations. This should be made more precise and revisited later.
13206 :
13207 : // Allow r << imm, but the imm has to be a multiple of two.
13208 0 : if (Scale & 1) return false;
13209 0 : return isPowerOf2_32(Scale);
13210 : }
13211 : }
13212 : return true;
13213 : }
13214 :
13215 : /// isLegalICmpImmediate - Return true if the specified immediate is legal
13216 : /// icmp immediate, that is the target has icmp instructions which can compare
13217 : /// a register against the immediate without having to materialize the
13218 : /// immediate into a register.
13219 9038 : bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
13220 : // Thumb2 and ARM modes can use cmn for negative immediates.
13221 9038 : if (!Subtarget->isThumb())
13222 3606 : return ARM_AM::getSOImmVal((uint32_t)Imm) != -1 ||
13223 805 : ARM_AM::getSOImmVal(-(uint32_t)Imm) != -1;
13224 : if (Subtarget->isThumb2())
13225 4206 : return ARM_AM::getT2SOImmVal((uint32_t)Imm) != -1 ||
13226 519 : ARM_AM::getT2SOImmVal(-(uint32_t)Imm) != -1;
13227 : // Thumb1 doesn't have cmn, and only 8-bit immediates.
13228 1226 : return Imm >= 0 && Imm <= 255;
13229 : }
13230 :
13231 : /// isLegalAddImmediate - Return true if the specified immediate is a legal add
13232 : /// *or sub* immediate, that is the target has add or sub instructions which can
13233 : /// add a register with the immediate without having to materialize the
13234 : /// immediate into a register.
13235 1446 : bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const {
13236 : // Same encoding for add/sub, just flip the sign.
13237 : int64_t AbsImm = std::abs(Imm);
13238 1446 : if (!Subtarget->isThumb())
13239 549 : return ARM_AM::getSOImmVal(AbsImm) != -1;
13240 : if (Subtarget->isThumb2())
13241 562 : return ARM_AM::getT2SOImmVal(AbsImm) != -1;
13242 : // Thumb1 only has 8-bit unsigned immediate.
13243 335 : return AbsImm >= 0 && AbsImm <= 255;
13244 : }
13245 :
13246 3109 : static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
13247 : bool isSEXTLoad, SDValue &Base,
13248 : SDValue &Offset, bool &isInc,
13249 : SelectionDAG &DAG) {
13250 6218 : if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
13251 : return false;
13252 :
13253 294 : if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
13254 : // AddressingMode 3
13255 121 : Base = Ptr->getOperand(0);
13256 121 : if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
13257 121 : int RHSC = (int)RHS->getZExtValue();
13258 121 : if (RHSC < 0 && RHSC > -256) {
13259 : assert(Ptr->getOpcode() == ISD::ADD);
13260 3 : isInc = false;
13261 3 : Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
13262 3 : return true;
13263 : }
13264 : }
13265 118 : isInc = (Ptr->getOpcode() == ISD::ADD);
13266 118 : Offset = Ptr->getOperand(1);
13267 118 : return true;
13268 : } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
13269 : // AddressingMode 2
13270 2988 : if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
13271 2947 : int RHSC = (int)RHS->getZExtValue();
13272 2947 : if (RHSC < 0 && RHSC > -0x1000) {
13273 : assert(Ptr->getOpcode() == ISD::ADD);
13274 5 : isInc = false;
13275 5 : Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
13276 5 : Base = Ptr->getOperand(0);
13277 5 : return true;
13278 : }
13279 : }
13280 :
13281 2983 : if (Ptr->getOpcode() == ISD::ADD) {
13282 2971 : isInc = true;
13283 : ARM_AM::ShiftOpc ShOpcVal=
13284 2971 : ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode());
13285 0 : if (ShOpcVal != ARM_AM::no_shift) {
13286 0 : Base = Ptr->getOperand(1);
13287 0 : Offset = Ptr->getOperand(0);
13288 : } else {
13289 2971 : Base = Ptr->getOperand(0);
13290 2971 : Offset = Ptr->getOperand(1);
13291 : }
13292 2971 : return true;
13293 : }
13294 :
13295 12 : isInc = (Ptr->getOpcode() == ISD::ADD);
13296 12 : Base = Ptr->getOperand(0);
13297 12 : Offset = Ptr->getOperand(1);
13298 12 : return true;
13299 : }
13300 :
13301 : // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
13302 : return false;
13303 : }
13304 :
13305 1675 : static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT,
13306 : bool isSEXTLoad, SDValue &Base,
13307 : SDValue &Offset, bool &isInc,
13308 : SelectionDAG &DAG) {
13309 3350 : if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
13310 : return false;
13311 :
13312 1675 : Base = Ptr->getOperand(0);
13313 1675 : if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
13314 1636 : int RHSC = (int)RHS->getZExtValue();
13315 1636 : if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
13316 : assert(Ptr->getOpcode() == ISD::ADD);
13317 30 : isInc = false;
13318 30 : Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
13319 30 : return true;
13320 1606 : } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
13321 1604 : isInc = Ptr->getOpcode() == ISD::ADD;
13322 1619 : Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
13323 1604 : return true;
13324 : }
13325 : }
13326 :
13327 : return false;
13328 : }
13329 :
13330 : /// getPreIndexedAddressParts - returns true by value, base pointer and
13331 : /// offset pointer and addressing mode by reference if the node's address
13332 : /// can be legally represented as pre-indexed load / store address.
13333 : bool
13334 1416 : ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
13335 : SDValue &Offset,
13336 : ISD::MemIndexedMode &AM,
13337 : SelectionDAG &DAG) const {
13338 1416 : if (Subtarget->isThumb1Only())
13339 : return false;
13340 :
13341 1416 : EVT VT;
13342 : SDValue Ptr;
13343 : bool isSEXTLoad = false;
13344 : if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
13345 334 : Ptr = LD->getBasePtr();
13346 334 : VT = LD->getMemoryVT();
13347 334 : isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
13348 : } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
13349 1082 : Ptr = ST->getBasePtr();
13350 1082 : VT = ST->getMemoryVT();
13351 : } else
13352 : return false;
13353 :
13354 : bool isInc;
13355 : bool isLegal = false;
13356 : if (Subtarget->isThumb2())
13357 660 : isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
13358 : Offset, isInc, DAG);
13359 : else
13360 756 : isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
13361 : Offset, isInc, DAG);
13362 1416 : if (!isLegal)
13363 : return false;
13364 :
13365 1397 : AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
13366 1397 : return true;
13367 : }
13368 :
13369 : /// getPostIndexedAddressParts - returns true by value, base pointer and
13370 : /// offset pointer and addressing mode by reference if this node can be
13371 : /// combined with a load / store to form a post-indexed load / store.
13372 4051 : bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
13373 : SDValue &Base,
13374 : SDValue &Offset,
13375 : ISD::MemIndexedMode &AM,
13376 : SelectionDAG &DAG) const {
13377 4051 : EVT VT;
13378 : SDValue Ptr;
13379 : bool isSEXTLoad = false, isNonExt;
13380 : if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
13381 2310 : VT = LD->getMemoryVT();
13382 2310 : Ptr = LD->getBasePtr();
13383 2310 : isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
13384 2310 : isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
13385 : } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
13386 1741 : VT = ST->getMemoryVT();
13387 1741 : Ptr = ST->getBasePtr();
13388 1741 : isNonExt = !ST->isTruncatingStore();
13389 : } else
13390 : return false;
13391 :
13392 4051 : if (Subtarget->isThumb1Only()) {
13393 : // Thumb-1 can do a limited post-inc load or store as an updating LDM. It
13394 : // must be non-extending/truncating, i32, with an offset of 4.
13395 : assert(Op->getValueType(0) == MVT::i32 && "Non-i32 post-inc op?!");
13396 683 : if (Op->getOpcode() != ISD::ADD || !isNonExt)
13397 : return false;
13398 683 : auto *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1));
13399 1258 : if (!RHS || RHS->getZExtValue() != 4)
13400 : return false;
13401 :
13402 173 : Offset = Op->getOperand(1);
13403 173 : Base = Op->getOperand(0);
13404 173 : AM = ISD::POST_INC;
13405 173 : return true;
13406 : }
13407 :
13408 : bool isInc;
13409 : bool isLegal = false;
13410 : if (Subtarget->isThumb2())
13411 1015 : isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
13412 : isInc, DAG);
13413 : else
13414 2353 : isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
13415 : isInc, DAG);
13416 3368 : if (!isLegal)
13417 : return false;
13418 :
13419 : if (Ptr != Base) {
13420 : // Swap base ptr and offset to catch more post-index load / store when
13421 : // it's legal. In Thumb2 mode, offset must be an immediate.
13422 24 : if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
13423 9 : !Subtarget->isThumb2())
13424 : std::swap(Base, Offset);
13425 :
13426 : // Post-indexed load / store update the base pointer.
13427 : if (Ptr != Base)
13428 : return false;
13429 : }
13430 :
13431 3333 : AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
13432 3333 : return true;
13433 : }
13434 :
13435 17620 : void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
13436 : KnownBits &Known,
13437 : const APInt &DemandedElts,
13438 : const SelectionDAG &DAG,
13439 : unsigned Depth) const {
13440 : unsigned BitWidth = Known.getBitWidth();
13441 : Known.resetAll();
13442 17620 : switch (Op.getOpcode()) {
13443 : default: break;
13444 516 : case ARMISD::ADDC:
13445 : case ARMISD::ADDE:
13446 : case ARMISD::SUBC:
13447 : case ARMISD::SUBE:
13448 : // Special cases when we convert a carry to a boolean.
13449 516 : if (Op.getResNo() == 0) {
13450 516 : SDValue LHS = Op.getOperand(0);
13451 516 : SDValue RHS = Op.getOperand(1);
13452 : // (ADDE 0, 0, C) will give us a single bit.
13453 898 : if (Op->getOpcode() == ARMISD::ADDE && isNullConstant(LHS) &&
13454 382 : isNullConstant(RHS)) {
13455 382 : Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
13456 382 : return;
13457 : }
13458 : }
13459 : break;
13460 2024 : case ARMISD::CMOV: {
13461 : // Bits are known zero/one if known on the LHS and RHS.
13462 4048 : DAG.computeKnownBits(Op.getOperand(0), Known, Depth+1);
13463 2024 : if (Known.isUnknown())
13464 : return;
13465 :
13466 1071 : KnownBits KnownRHS;
13467 1071 : DAG.computeKnownBits(Op.getOperand(1), KnownRHS, Depth+1);
13468 : Known.Zero &= KnownRHS.Zero;
13469 : Known.One &= KnownRHS.One;
13470 : return;
13471 : }
13472 5585 : case ISD::INTRINSIC_W_CHAIN: {
13473 5585 : ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
13474 5585 : Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
13475 5585 : switch (IntID) {
13476 : default: return;
13477 : case Intrinsic::arm_ldaex:
13478 : case Intrinsic::arm_ldrex: {
13479 1452 : EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
13480 : unsigned MemBits = VT.getScalarSizeInBits();
13481 2904 : Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
13482 : return;
13483 : }
13484 : }
13485 : }
13486 350 : case ARMISD::BFI: {
13487 : // Conservatively, we can recurse down the first operand
13488 : // and just mask out all affected bits.
13489 700 : DAG.computeKnownBits(Op.getOperand(0), Known, Depth + 1);
13490 :
13491 : // The operand to BFI is already a mask suitable for removing the bits it
13492 : // sets.
13493 : ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2));
13494 350 : const APInt &Mask = CI->getAPIntValue();
13495 : Known.Zero &= Mask;
13496 : Known.One &= Mask;
13497 : return;
13498 : }
13499 : }
13500 : }
13501 :
13502 : bool
13503 18783 : ARMTargetLowering::targetShrinkDemandedConstant(SDValue Op,
13504 : const APInt &DemandedAPInt,
13505 : TargetLoweringOpt &TLO) const {
13506 : // Delay optimization, so we don't have to deal with illegal types, or block
13507 : // optimizations.
13508 18783 : if (!TLO.LegalOps)
13509 : return false;
13510 :
13511 : // Only optimize AND for now.
13512 8128 : if (Op.getOpcode() != ISD::AND)
13513 : return false;
13514 :
13515 11406 : EVT VT = Op.getValueType();
13516 :
13517 : // Ignore vectors.
13518 5703 : if (VT.isVector())
13519 : return false;
13520 :
13521 : assert(VT == MVT::i32 && "Unexpected integer type");
13522 :
13523 : // Make sure the RHS really is a constant.
13524 : ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
13525 : if (!C)
13526 : return false;
13527 :
13528 9328 : unsigned Mask = C->getZExtValue();
13529 :
13530 4664 : unsigned Demanded = DemandedAPInt.getZExtValue();
13531 4664 : unsigned ShrunkMask = Mask & Demanded;
13532 4664 : unsigned ExpandedMask = Mask | ~Demanded;
13533 :
13534 : // If the mask is all zeros, let the target-independent code replace the
13535 : // result with zero.
13536 4664 : if (ShrunkMask == 0)
13537 : return false;
13538 :
13539 : // If the mask is all ones, erase the AND. (Currently, the target-independent
13540 : // code won't do this, so we have to do it explicitly to avoid an infinite
13541 : // loop in obscure cases.)
13542 4664 : if (ExpandedMask == ~0U)
13543 2 : return TLO.CombineTo(Op, Op.getOperand(0));
13544 :
13545 : auto IsLegalMask = [ShrunkMask, ExpandedMask](unsigned Mask) -> bool {
13546 4662 : return (ShrunkMask & Mask) == ShrunkMask && (~ExpandedMask & Mask) == 0;
13547 : };
13548 : auto UseMask = [Mask, Op, VT, &TLO](unsigned NewMask) -> bool {
13549 : if (NewMask == Mask)
13550 : return true;
13551 : SDLoc DL(Op);
13552 : SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
13553 : SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
13554 : return TLO.CombineTo(Op, NewOp);
13555 4662 : };
13556 :
13557 : // Prefer uxtb mask.
13558 : if (IsLegalMask(0xFF))
13559 575 : return UseMask(0xFF);
13560 :
13561 : // Prefer uxth mask.
13562 : if (IsLegalMask(0xFFFF))
13563 1346 : return UseMask(0xFFFF);
13564 :
13565 : // [1, 255] is Thumb1 movs+ands, legal immediate for ARM/Thumb2.
13566 : // FIXME: Prefer a contiguous sequence of bits for other optimizations.
13567 2741 : if (ShrunkMask < 256)
13568 1021 : return UseMask(ShrunkMask);
13569 :
13570 : // [-256, -2] is Thumb1 movs+bics, legal immediate for ARM/Thumb2.
13571 : // FIXME: Prefer a contiguous sequence of bits for other optimizations.
13572 1720 : if ((int)ExpandedMask <= -2 && (int)ExpandedMask >= -256)
13573 685 : return UseMask(ExpandedMask);
13574 :
13575 : // Potential improvements:
13576 : //
13577 : // We could try to recognize lsls+lsrs or lsrs+lsls pairs here.
13578 : // We could try to prefer Thumb1 immediates which can be lowered to a
13579 : // two-instruction sequence.
13580 : // We could try to recognize more legal ARM/Thumb2 immediates here.
13581 :
13582 : return false;
13583 : }
13584 :
13585 :
13586 : //===----------------------------------------------------------------------===//
13587 : // ARM Inline Assembly Support
13588 : //===----------------------------------------------------------------------===//
13589 :
13590 625 : bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const {
13591 : // Looking for "rev" which is V6+.
13592 625 : if (!Subtarget->hasV6Ops())
13593 : return false;
13594 :
13595 : InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
13596 : std::string AsmStr = IA->getAsmString();
13597 : SmallVector<StringRef, 4> AsmPieces;
13598 450 : SplitString(AsmStr, AsmPieces, ";\n");
13599 :
13600 450 : switch (AsmPieces.size()) {
13601 : default: return false;
13602 : case 1:
13603 522 : AsmStr = AsmPieces[0];
13604 : AsmPieces.clear();
13605 261 : SplitString(AsmStr, AsmPieces, " \t,");
13606 :
13607 : // rev $0, $1
13608 261 : if (AsmPieces.size() == 3 &&
13609 262 : AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" &&
13610 1 : IA->getConstraintString().compare(0, 4, "=l,l") == 0) {
13611 1 : IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
13612 1 : if (Ty && Ty->getBitWidth() == 32)
13613 1 : return IntrinsicLowering::LowerToByteSwap(CI);
13614 : }
13615 : break;
13616 : }
13617 :
13618 : return false;
13619 : }
13620 :
13621 37 : const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const {
13622 : // At this point, we have to lower this constraint to something else, so we
13623 : // lower it to an "r" or "w". However, by doing this we will force the result
13624 : // to be in register, while the X constraint is much more permissive.
13625 : //
13626 : // Although we are correct (we are free to emit anything, without
13627 : // constraints), we might break use cases that would expect us to be more
13628 : // efficient and emit something else.
13629 37 : if (!Subtarget->hasVFP2())
13630 : return "r";
13631 25 : if (ConstraintVT.isFloatingPoint())
13632 : return "w";
13633 16 : if (ConstraintVT.isVector() && Subtarget->hasNEON() &&
13634 4 : (ConstraintVT.getSizeInBits() == 64 ||
13635 : ConstraintVT.getSizeInBits() == 128))
13636 4 : return "w";
13637 :
13638 : return "r";
13639 : }
13640 :
13641 : /// getConstraintType - Given a constraint letter, return the type of
13642 : /// constraint it is for this target.
13643 : ARMTargetLowering::ConstraintType
13644 200512 : ARMTargetLowering::getConstraintType(StringRef Constraint) const {
13645 200512 : if (Constraint.size() == 1) {
13646 2199 : switch (Constraint[0]) {
13647 : default: break;
13648 : case 'l': return C_RegisterClass;
13649 : case 'w': return C_RegisterClass;
13650 : case 'h': return C_RegisterClass;
13651 : case 'x': return C_RegisterClass;
13652 : case 't': return C_RegisterClass;
13653 4 : case 'j': return C_Other; // Constant for movw.
13654 : // An address with a single base register. Due to the way we
13655 : // currently handle addresses it is the same as an 'r' memory constraint.
13656 21 : case 'Q': return C_Memory;
13657 : }
13658 198313 : } else if (Constraint.size() == 2) {
13659 6 : switch (Constraint[0]) {
13660 : default: break;
13661 : // All 'U+' constraints are addresses.
13662 : case 'U': return C_Memory;
13663 : }
13664 : }
13665 200272 : return TargetLowering::getConstraintType(Constraint);
13666 : }
13667 :
13668 : /// Examine constraint type and operand type and determine a weight value.
13669 : /// This object must already have been set up with the operand type
13670 : /// and the current alternative constraint selected.
13671 : TargetLowering::ConstraintWeight
13672 225 : ARMTargetLowering::getSingleConstraintMatchWeight(
13673 : AsmOperandInfo &info, const char *constraint) const {
13674 : ConstraintWeight weight = CW_Invalid;
13675 225 : Value *CallOperandVal = info.CallOperandVal;
13676 : // If we don't have a value, we can't do a match,
13677 : // but allow it at the lowest weight.
13678 225 : if (!CallOperandVal)
13679 : return CW_Default;
13680 135 : Type *type = CallOperandVal->getType();
13681 : // Look at the constraint type.
13682 135 : switch (*constraint) {
13683 135 : default:
13684 135 : weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
13685 135 : break;
13686 : case 'l':
13687 0 : if (type->isIntegerTy()) {
13688 0 : if (Subtarget->isThumb())
13689 : weight = CW_SpecificReg;
13690 : else
13691 : weight = CW_Register;
13692 : }
13693 : break;
13694 : case 'w':
13695 : if (type->isFloatingPointTy())
13696 : weight = CW_Register;
13697 : break;
13698 : }
13699 : return weight;
13700 : }
13701 :
13702 : using RCPair = std::pair<unsigned, const TargetRegisterClass *>;
13703 :
13704 8808 : RCPair ARMTargetLowering::getRegForInlineAsmConstraint(
13705 : const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
13706 8808 : if (Constraint.size() == 1) {
13707 : // GCC ARM Constraint Letters
13708 446 : switch (Constraint[0]) {
13709 5 : case 'l': // Low regs or general regs.
13710 5 : if (Subtarget->isThumb())
13711 5 : return RCPair(0U, &ARM::tGPRRegClass);
13712 0 : return RCPair(0U, &ARM::GPRRegClass);
13713 1 : case 'h': // High regs or no regs.
13714 1 : if (Subtarget->isThumb())
13715 1 : return RCPair(0U, &ARM::hGPRRegClass);
13716 : break;
13717 390 : case 'r':
13718 390 : if (Subtarget->isThumb1Only())
13719 41 : return RCPair(0U, &ARM::tGPRRegClass);
13720 349 : return RCPair(0U, &ARM::GPRRegClass);
13721 : case 'w':
13722 23 : if (VT == MVT::Other)
13723 : break;
13724 22 : if (VT == MVT::f32)
13725 7 : return RCPair(0U, &ARM::SPRRegClass);
13726 15 : if (VT.getSizeInBits() == 64)
13727 12 : return RCPair(0U, &ARM::DPRRegClass);
13728 3 : if (VT.getSizeInBits() == 128)
13729 3 : return RCPair(0U, &ARM::QPRRegClass);
13730 : break;
13731 : case 'x':
13732 2 : if (VT == MVT::Other)
13733 : break;
13734 2 : if (VT == MVT::f32)
13735 1 : return RCPair(0U, &ARM::SPR_8RegClass);
13736 1 : if (VT.getSizeInBits() == 64)
13737 1 : return RCPair(0U, &ARM::DPR_8RegClass);
13738 0 : if (VT.getSizeInBits() == 128)
13739 0 : return RCPair(0U, &ARM::QPR_8RegClass);
13740 : break;
13741 : case 't':
13742 25 : if (VT == MVT::Other)
13743 : break;
13744 25 : if (VT == MVT::f32 || VT == MVT::i32)
13745 3 : return RCPair(0U, &ARM::SPRRegClass);
13746 22 : if (VT.getSizeInBits() == 64)
13747 5 : return RCPair(0U, &ARM::DPR_VFP2RegClass);
13748 17 : if (VT.getSizeInBits() == 128)
13749 17 : return RCPair(0U, &ARM::QPR_VFP2RegClass);
13750 : break;
13751 : }
13752 : }
13753 8339 : if (StringRef("{cc}").equals_lower(Constraint))
13754 24 : return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass);
13755 :
13756 8339 : return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
13757 : }
13758 :
13759 : /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
13760 : /// vector. If it is invalid, don't add anything to Ops.
13761 55 : void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
13762 : std::string &Constraint,
13763 : std::vector<SDValue>&Ops,
13764 : SelectionDAG &DAG) const {
13765 55 : SDValue Result;
13766 :
13767 : // Currently only support length 1 constraints.
13768 55 : if (Constraint.length() != 1) return;
13769 :
13770 55 : char ConstraintLetter = Constraint[0];
13771 : switch (ConstraintLetter) {
13772 : default: break;
13773 : case 'j':
13774 : case 'I': case 'J': case 'K': case 'L':
13775 : case 'M': case 'N': case 'O':
13776 : ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
13777 : if (!C)
13778 : return;
13779 :
13780 22 : int64_t CVal64 = C->getSExtValue();
13781 22 : int CVal = (int) CVal64;
13782 : // None of these constraints allow values larger than 32 bits. Check
13783 : // that the value fits in an int.
13784 22 : if (CVal != CVal64)
13785 : return;
13786 :
13787 : switch (ConstraintLetter) {
13788 1 : case 'j':
13789 : // Constant suitable for movw, must be between 0 and
13790 : // 65535.
13791 1 : if (Subtarget->hasV6T2Ops())
13792 1 : if (CVal >= 0 && CVal <= 65535)
13793 : break;
13794 : return;
13795 4 : case 'I':
13796 4 : if (Subtarget->isThumb1Only()) {
13797 : // This must be a constant between 0 and 255, for ADD
13798 : // immediates.
13799 2 : if (CVal >= 0 && CVal <= 255)
13800 : break;
13801 : } else if (Subtarget->isThumb2()) {
13802 : // A constant that can be used as an immediate value in a
13803 : // data-processing instruction.
13804 1 : if (ARM_AM::getT2SOImmVal(CVal) != -1)
13805 : break;
13806 : } else {
13807 : // A constant that can be used as an immediate value in a
13808 : // data-processing instruction.
13809 1 : if (ARM_AM::getSOImmVal(CVal) != -1)
13810 : break;
13811 : }
13812 : return;
13813 :
13814 4 : case 'J':
13815 4 : if (Subtarget->isThumb1Only()) {
13816 : // This must be a constant between -255 and -1, for negated ADD
13817 : // immediates. This can be used in GCC with an "n" modifier that
13818 : // prints the negated value, for use with SUB instructions. It is
13819 : // not useful otherwise but is implemented for compatibility.
13820 2 : if (CVal >= -255 && CVal <= -1)
13821 : break;
13822 : } else {
13823 : // This must be a constant between -4095 and 4095. It is not clear
13824 : // what this constraint is intended for. Implemented for
13825 : // compatibility with GCC.
13826 2 : if (CVal >= -4095 && CVal <= 4095)
13827 : break;
13828 : }
13829 : return;
13830 :
13831 3 : case 'K':
13832 3 : if (Subtarget->isThumb1Only()) {
13833 : // A 32-bit value where only one byte has a nonzero value. Exclude
13834 : // zero to match GCC. This constraint is used by GCC internally for
13835 : // constants that can be loaded with a move/shift combination.
13836 : // It is not useful otherwise but is implemented for compatibility.
13837 2 : if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
13838 : break;
13839 : } else if (Subtarget->isThumb2()) {
13840 : // A constant whose bitwise inverse can be used as an immediate
13841 : // value in a data-processing instruction. This can be used in GCC
13842 : // with a "B" modifier that prints the inverted value, for use with
13843 : // BIC and MVN instructions. It is not useful otherwise but is
13844 : // implemented for compatibility.
13845 1 : if (ARM_AM::getT2SOImmVal(~CVal) != -1)
13846 : break;
13847 : } else {
13848 : // A constant whose bitwise inverse can be used as an immediate
13849 : // value in a data-processing instruction. This can be used in GCC
13850 : // with a "B" modifier that prints the inverted value, for use with
13851 : // BIC and MVN instructions. It is not useful otherwise but is
13852 : // implemented for compatibility.
13853 1 : if (ARM_AM::getSOImmVal(~CVal) != -1)
13854 : break;
13855 : }
13856 : return;
13857 :
13858 4 : case 'L':
13859 4 : if (Subtarget->isThumb1Only()) {
13860 : // This must be a constant between -7 and 7,
13861 : // for 3-operand ADD/SUB immediate instructions.
13862 2 : if (CVal >= -7 && CVal < 7)
13863 : break;
13864 : } else if (Subtarget->isThumb2()) {
13865 : // A constant whose negation can be used as an immediate value in a
13866 : // data-processing instruction. This can be used in GCC with an "n"
13867 : // modifier that prints the negated value, for use with SUB
13868 : // instructions. It is not useful otherwise but is implemented for
13869 : // compatibility.
13870 1 : if (ARM_AM::getT2SOImmVal(-CVal) != -1)
13871 : break;
13872 : } else {
13873 : // A constant whose negation can be used as an immediate value in a
13874 : // data-processing instruction. This can be used in GCC with an "n"
13875 : // modifier that prints the negated value, for use with SUB
13876 : // instructions. It is not useful otherwise but is implemented for
13877 : // compatibility.
13878 1 : if (ARM_AM::getSOImmVal(-CVal) != -1)
13879 : break;
13880 : }
13881 : return;
13882 :
13883 3 : case 'M':
13884 3 : if (Subtarget->isThumb1Only()) {
13885 : // This must be a multiple of 4 between 0 and 1020, for
13886 : // ADD sp + immediate.
13887 1 : if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
13888 : break;
13889 : } else {
13890 : // A power of two or a constant between 0 and 32. This is used in
13891 : // GCC for the shift amount on shifted register operands, but it is
13892 : // useful in general for any shift amounts.
13893 2 : if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
13894 : break;
13895 : }
13896 : return;
13897 :
13898 1 : case 'N':
13899 1 : if (Subtarget->isThumb()) { // FIXME thumb2
13900 : // This must be a constant between 0 and 31, for shift amounts.
13901 1 : if (CVal >= 0 && CVal <= 31)
13902 : break;
13903 : }
13904 : return;
13905 :
13906 2 : case 'O':
13907 2 : if (Subtarget->isThumb()) { // FIXME thumb2
13908 : // This must be a multiple of 4 between -508 and 508, for
13909 : // ADD/SUB sp = sp + immediate.
13910 2 : if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
13911 : break;
13912 : }
13913 : return;
13914 : }
13915 22 : Result = DAG.getTargetConstant(CVal, SDLoc(Op), Op.getValueType());
13916 22 : break;
13917 : }
13918 :
13919 54 : if (Result.getNode()) {
13920 22 : Ops.push_back(Result);
13921 22 : return;
13922 : }
13923 32 : return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
13924 : }
13925 :
13926 199 : static RTLIB::Libcall getDivRemLibcall(
13927 : const SDNode *N, MVT::SimpleValueType SVT) {
13928 : assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
13929 : N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
13930 : "Unhandled Opcode in getDivRemLibcall");
13931 398 : bool isSigned = N->getOpcode() == ISD::SDIVREM ||
13932 : N->getOpcode() == ISD::SREM;
13933 : RTLIB::Libcall LC;
13934 199 : switch (SVT) {
13935 0 : default: llvm_unreachable("Unexpected request for libcall!");
13936 0 : case MVT::i8: LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
13937 0 : case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
13938 181 : case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
13939 18 : case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
13940 : }
13941 199 : return LC;
13942 : }
13943 :
13944 199 : static TargetLowering::ArgListTy getDivRemArgList(
13945 : const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget) {
13946 : assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
13947 : N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
13948 : "Unhandled Opcode in getDivRemArgList");
13949 398 : bool isSigned = N->getOpcode() == ISD::SDIVREM ||
13950 : N->getOpcode() == ISD::SREM;
13951 : TargetLowering::ArgListTy Args;
13952 : TargetLowering::ArgListEntry Entry;
13953 597 : for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
13954 796 : EVT ArgVT = N->getOperand(i).getValueType();
13955 398 : Type *ArgTy = ArgVT.getTypeForEVT(*Context);
13956 398 : Entry.Node = N->getOperand(i);
13957 398 : Entry.Ty = ArgTy;
13958 398 : Entry.IsSExt = isSigned;
13959 398 : Entry.IsZExt = !isSigned;
13960 398 : Args.push_back(Entry);
13961 : }
13962 199 : if (Subtarget->isTargetWindows() && Args.size() >= 2)
13963 : std::swap(Args[0], Args[1]);
13964 199 : return Args;
13965 : }
13966 :
13967 205 : SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
13968 : assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
13969 : Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
13970 : Subtarget->isTargetWindows()) &&
13971 : "Register-based DivRem lowering only");
13972 205 : unsigned Opcode = Op->getOpcode();
13973 : assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
13974 : "Invalid opcode for Div/Rem lowering");
13975 205 : bool isSigned = (Opcode == ISD::SDIVREM);
13976 205 : EVT VT = Op->getValueType(0);
13977 205 : Type *Ty = VT.getTypeForEVT(*DAG.getContext());
13978 : SDLoc dl(Op);
13979 :
13980 : // If the target has hardware divide, use divide + multiply + subtract:
13981 : // div = a / b
13982 : // rem = a - b * div
13983 : // return {div, rem}
13984 : // This should be lowered into UDIV/SDIV + MLS later on.
13985 205 : bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
13986 146 : : Subtarget->hasDivideInARMMode();
13987 205 : if (hasDivide && Op->getValueType(0).isSimple() &&
13988 : Op->getSimpleValueType(0) == MVT::i32) {
13989 10 : unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
13990 10 : const SDValue Dividend = Op->getOperand(0);
13991 10 : const SDValue Divisor = Op->getOperand(1);
13992 10 : SDValue Div = DAG.getNode(DivOpcode, dl, VT, Dividend, Divisor);
13993 10 : SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Div, Divisor);
13994 10 : SDValue Rem = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
13995 :
13996 10 : SDValue Values[2] = {Div, Rem};
13997 10 : return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VT, VT), Values);
13998 : }
13999 :
14000 195 : RTLIB::Libcall LC = getDivRemLibcall(Op.getNode(),
14001 : VT.getSimpleVT().SimpleTy);
14002 195 : SDValue InChain = DAG.getEntryNode();
14003 :
14004 : TargetLowering::ArgListTy Args = getDivRemArgList(Op.getNode(),
14005 : DAG.getContext(),
14006 195 : Subtarget);
14007 :
14008 : SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
14009 390 : getPointerTy(DAG.getDataLayout()));
14010 :
14011 195 : Type *RetTy = StructType::get(Ty, Ty);
14012 :
14013 390 : if (Subtarget->isTargetWindows())
14014 41 : InChain = WinDBZCheckDenominator(DAG, Op.getNode(), InChain);
14015 :
14016 195 : TargetLowering::CallLoweringInfo CLI(DAG);
14017 195 : CLI.setDebugLoc(dl).setChain(InChain)
14018 195 : .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
14019 195 : .setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned);
14020 :
14021 195 : std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
14022 195 : return CallInfo.first;
14023 : }
14024 :
14025 : // Lowers REM using divmod helpers
14026 : // see RTABI section 4.2/4.3
14027 4 : SDValue ARMTargetLowering::LowerREM(SDNode *N, SelectionDAG &DAG) const {
14028 : // Build return types (div and rem)
14029 : std::vector<Type*> RetTyParams;
14030 : Type *RetTyElement;
14031 :
14032 8 : switch (N->getValueType(0).getSimpleVT().SimpleTy) {
14033 0 : default: llvm_unreachable("Unexpected request for libcall!");
14034 0 : case MVT::i8: RetTyElement = Type::getInt8Ty(*DAG.getContext()); break;
14035 0 : case MVT::i16: RetTyElement = Type::getInt16Ty(*DAG.getContext()); break;
14036 0 : case MVT::i32: RetTyElement = Type::getInt32Ty(*DAG.getContext()); break;
14037 4 : case MVT::i64: RetTyElement = Type::getInt64Ty(*DAG.getContext()); break;
14038 : }
14039 :
14040 4 : RetTyParams.push_back(RetTyElement);
14041 4 : RetTyParams.push_back(RetTyElement);
14042 : ArrayRef<Type*> ret = ArrayRef<Type*>(RetTyParams);
14043 4 : Type *RetTy = StructType::get(*DAG.getContext(), ret);
14044 :
14045 8 : RTLIB::Libcall LC = getDivRemLibcall(N, N->getValueType(0).getSimpleVT().
14046 : SimpleTy);
14047 4 : SDValue InChain = DAG.getEntryNode();
14048 : TargetLowering::ArgListTy Args = getDivRemArgList(N, DAG.getContext(),
14049 4 : Subtarget);
14050 4 : bool isSigned = N->getOpcode() == ISD::SREM;
14051 : SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
14052 8 : getPointerTy(DAG.getDataLayout()));
14053 :
14054 8 : if (Subtarget->isTargetWindows())
14055 0 : InChain = WinDBZCheckDenominator(DAG, N, InChain);
14056 :
14057 : // Lower call
14058 4 : CallLoweringInfo CLI(DAG);
14059 4 : CLI.setChain(InChain)
14060 4 : .setCallee(CallingConv::ARM_AAPCS, RetTy, Callee, std::move(Args))
14061 4 : .setSExtResult(isSigned).setZExtResult(!isSigned).setDebugLoc(SDLoc(N));
14062 4 : std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
14063 :
14064 : // Return second (rem) result operand (first contains div)
14065 4 : SDNode *ResNode = CallResult.first.getNode();
14066 : assert(ResNode->getNumOperands() == 2 && "divmod should return two operands");
14067 4 : return ResNode->getOperand(1);
14068 : }
14069 :
14070 : SDValue
14071 8 : ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
14072 : assert(Subtarget->isTargetWindows() && "unsupported target platform");
14073 : SDLoc DL(Op);
14074 :
14075 : // Get the inputs.
14076 8 : SDValue Chain = Op.getOperand(0);
14077 8 : SDValue Size = Op.getOperand(1);
14078 :
14079 8 : if (DAG.getMachineFunction().getFunction().hasFnAttribute(
14080 : "no-stack-arg-probe")) {
14081 2 : unsigned Align = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
14082 1 : SDValue SP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
14083 1 : Chain = SP.getValue(1);
14084 1 : SP = DAG.getNode(ISD::SUB, DL, MVT::i32, SP, Size);
14085 1 : if (Align)
14086 0 : SP = DAG.getNode(ISD::AND, DL, MVT::i32, SP.getValue(0),
14087 0 : DAG.getConstant(-(uint64_t)Align, DL, MVT::i32));
14088 1 : Chain = DAG.getCopyToReg(Chain, DL, ARM::SP, SP);
14089 1 : SDValue Ops[2] = { SP, Chain };
14090 1 : return DAG.getMergeValues(Ops, DL);
14091 : }
14092 :
14093 : SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size,
14094 7 : DAG.getConstant(2, DL, MVT::i32));
14095 :
14096 7 : SDValue Flag;
14097 7 : Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Flag);
14098 7 : Flag = Chain.getValue(1);
14099 :
14100 7 : SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
14101 7 : Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Flag);
14102 :
14103 7 : SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
14104 7 : Chain = NewSP.getValue(1);
14105 :
14106 7 : SDValue Ops[2] = { NewSP, Chain };
14107 7 : return DAG.getMergeValues(Ops, DL);
14108 : }
14109 :
14110 9 : SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
14111 : assert(Op.getValueType() == MVT::f64 && Subtarget->isFPOnlySP() &&
14112 : "Unexpected type for custom-lowering FP_EXTEND");
14113 :
14114 : RTLIB::Libcall LC;
14115 18 : LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
14116 :
14117 9 : SDValue SrcVal = Op.getOperand(0);
14118 9 : return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false,
14119 9 : SDLoc(Op)).first;
14120 : }
14121 :
14122 6 : SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
14123 : assert(Op.getOperand(0).getValueType() == MVT::f64 &&
14124 : Subtarget->isFPOnlySP() &&
14125 : "Unexpected type for custom-lowering FP_ROUND");
14126 :
14127 : RTLIB::Libcall LC;
14128 12 : LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
14129 :
14130 6 : SDValue SrcVal = Op.getOperand(0);
14131 6 : return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false,
14132 6 : SDLoc(Op)).first;
14133 : }
14134 :
14135 : bool
14136 1326 : ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
14137 : // The ARM target isn't yet aware of offsets.
14138 1326 : return false;
14139 : }
14140 :
14141 873 : bool ARM::isBitFieldInvertedMask(unsigned v) {
14142 873 : if (v == 0xffffffff)
14143 : return false;
14144 :
14145 : // there can be 1's on either or both "outsides", all the "inside"
14146 : // bits must be 0's
14147 873 : return isShiftedMask_32(~v);
14148 : }
14149 :
14150 : /// isFPImmLegal - Returns true if the target can instruction select the
14151 : /// specified FP immediate natively. If false, the legalizer will
14152 : /// materialize the FP immediate as a load from a constant pool.
14153 1928 : bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
14154 1928 : if (!Subtarget->hasVFP3())
14155 : return false;
14156 602 : if (VT == MVT::f16 && Subtarget->hasFullFP16())
14157 602 : return ARM_AM::getFP16Imm(Imm) != -1;
14158 : if (VT == MVT::f32)
14159 957 : return ARM_AM::getFP32Imm(Imm) != -1;
14160 331 : if (VT == MVT::f64 && !Subtarget->isFPOnlySP())
14161 294 : return ARM_AM::getFP64Imm(Imm) != -1;
14162 : return false;
14163 : }
14164 :
14165 : /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
14166 : /// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
14167 : /// specified in the intrinsic calls.
14168 3037 : bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
14169 : const CallInst &I,
14170 : MachineFunction &MF,
14171 : unsigned Intrinsic) const {
14172 3037 : switch (Intrinsic) {
14173 274 : case Intrinsic::arm_neon_vld1:
14174 : case Intrinsic::arm_neon_vld2:
14175 : case Intrinsic::arm_neon_vld3:
14176 : case Intrinsic::arm_neon_vld4:
14177 : case Intrinsic::arm_neon_vld2lane:
14178 : case Intrinsic::arm_neon_vld3lane:
14179 : case Intrinsic::arm_neon_vld4lane:
14180 : case Intrinsic::arm_neon_vld2dup:
14181 : case Intrinsic::arm_neon_vld3dup:
14182 : case Intrinsic::arm_neon_vld4dup: {
14183 274 : Info.opc = ISD::INTRINSIC_W_CHAIN;
14184 : // Conservatively set memVT to the entire set of vectors loaded.
14185 274 : auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
14186 274 : uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
14187 548 : Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
14188 274 : Info.ptrVal = I.getArgOperand(0);
14189 274 : Info.offset = 0;
14190 274 : Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
14191 274 : Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
14192 : // volatile loads with NEON intrinsics not supported
14193 274 : Info.flags = MachineMemOperand::MOLoad;
14194 274 : return true;
14195 : }
14196 24 : case Intrinsic::arm_neon_vld1x2:
14197 : case Intrinsic::arm_neon_vld1x3:
14198 : case Intrinsic::arm_neon_vld1x4: {
14199 24 : Info.opc = ISD::INTRINSIC_W_CHAIN;
14200 : // Conservatively set memVT to the entire set of vectors loaded.
14201 24 : auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
14202 24 : uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
14203 48 : Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
14204 24 : Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
14205 24 : Info.offset = 0;
14206 24 : Info.align = 0;
14207 : // volatile loads with NEON intrinsics not supported
14208 24 : Info.flags = MachineMemOperand::MOLoad;
14209 24 : return true;
14210 : }
14211 144 : case Intrinsic::arm_neon_vst1:
14212 : case Intrinsic::arm_neon_vst2:
14213 : case Intrinsic::arm_neon_vst3:
14214 : case Intrinsic::arm_neon_vst4:
14215 : case Intrinsic::arm_neon_vst2lane:
14216 : case Intrinsic::arm_neon_vst3lane:
14217 : case Intrinsic::arm_neon_vst4lane: {
14218 144 : Info.opc = ISD::INTRINSIC_VOID;
14219 : // Conservatively set memVT to the entire set of vectors stored.
14220 144 : auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
14221 : unsigned NumElts = 0;
14222 626 : for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
14223 482 : Type *ArgTy = I.getArgOperand(ArgI)->getType();
14224 482 : if (!ArgTy->isVectorTy())
14225 : break;
14226 338 : NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
14227 : }
14228 144 : Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
14229 : Info.ptrVal = I.getArgOperand(0);
14230 144 : Info.offset = 0;
14231 144 : Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
14232 144 : Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
14233 : // volatile stores with NEON intrinsics not supported
14234 144 : Info.flags = MachineMemOperand::MOStore;
14235 144 : return true;
14236 : }
14237 24 : case Intrinsic::arm_neon_vst1x2:
14238 : case Intrinsic::arm_neon_vst1x3:
14239 : case Intrinsic::arm_neon_vst1x4: {
14240 24 : Info.opc = ISD::INTRINSIC_VOID;
14241 : // Conservatively set memVT to the entire set of vectors stored.
14242 24 : auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
14243 : unsigned NumElts = 0;
14244 120 : for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
14245 72 : Type *ArgTy = I.getArgOperand(ArgI)->getType();
14246 72 : if (!ArgTy->isVectorTy())
14247 : break;
14248 72 : NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
14249 : }
14250 24 : Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
14251 : Info.ptrVal = I.getArgOperand(0);
14252 24 : Info.offset = 0;
14253 24 : Info.align = 0;
14254 : // volatile stores with NEON intrinsics not supported
14255 24 : Info.flags = MachineMemOperand::MOStore;
14256 24 : return true;
14257 : }
14258 244 : case Intrinsic::arm_ldaex:
14259 : case Intrinsic::arm_ldrex: {
14260 244 : auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
14261 244 : PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
14262 244 : Info.opc = ISD::INTRINSIC_W_CHAIN;
14263 244 : Info.memVT = MVT::getVT(PtrTy->getElementType());
14264 : Info.ptrVal = I.getArgOperand(0);
14265 244 : Info.offset = 0;
14266 244 : Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
14267 244 : Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
14268 244 : return true;
14269 : }
14270 235 : case Intrinsic::arm_stlex:
14271 : case Intrinsic::arm_strex: {
14272 235 : auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
14273 235 : PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
14274 235 : Info.opc = ISD::INTRINSIC_W_CHAIN;
14275 235 : Info.memVT = MVT::getVT(PtrTy->getElementType());
14276 : Info.ptrVal = I.getArgOperand(1);
14277 235 : Info.offset = 0;
14278 235 : Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
14279 235 : Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
14280 235 : return true;
14281 : }
14282 109 : case Intrinsic::arm_stlexd:
14283 : case Intrinsic::arm_strexd:
14284 109 : Info.opc = ISD::INTRINSIC_W_CHAIN;
14285 109 : Info.memVT = MVT::i64;
14286 109 : Info.ptrVal = I.getArgOperand(2);
14287 109 : Info.offset = 0;
14288 109 : Info.align = 8;
14289 109 : Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
14290 109 : return true;
14291 :
14292 127 : case Intrinsic::arm_ldaexd:
14293 : case Intrinsic::arm_ldrexd:
14294 127 : Info.opc = ISD::INTRINSIC_W_CHAIN;
14295 127 : Info.memVT = MVT::i64;
14296 127 : Info.ptrVal = I.getArgOperand(0);
14297 127 : Info.offset = 0;
14298 127 : Info.align = 8;
14299 127 : Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
14300 127 : return true;
14301 :
14302 : default:
14303 : break;
14304 : }
14305 :
14306 : return false;
14307 : }
14308 :
14309 : /// Returns true if it is beneficial to convert a load of a constant
14310 : /// to just the constant itself.
14311 19 : bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
14312 : Type *Ty) const {
14313 : assert(Ty->isIntegerTy());
14314 :
14315 19 : unsigned Bits = Ty->getPrimitiveSizeInBits();
14316 19 : if (Bits == 0 || Bits > 32)
14317 0 : return false;
14318 : return true;
14319 : }
14320 :
14321 40 : bool ARMTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
14322 : unsigned Index) const {
14323 : if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
14324 5 : return false;
14325 :
14326 70 : return (Index == 0 || Index == ResVT.getVectorNumElements());
14327 : }
14328 :
14329 317 : Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder,
14330 : ARM_MB::MemBOpt Domain) const {
14331 317 : Module *M = Builder.GetInsertBlock()->getParent()->getParent();
14332 :
14333 : // First, if the target has no DMB, see what fallback we can use.
14334 317 : if (!Subtarget->hasDataBarrier()) {
14335 : // Some ARMv6 cpus can support data barriers with an mcr instruction.
14336 : // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
14337 : // here.
14338 9 : if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) {
14339 9 : Function *MCR = Intrinsic::getDeclaration(M, Intrinsic::arm_mcr);
14340 9 : Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0),
14341 18 : Builder.getInt32(0), Builder.getInt32(7),
14342 9 : Builder.getInt32(10), Builder.getInt32(5)};
14343 9 : return Builder.CreateCall(MCR, args);
14344 : } else {
14345 : // Instead of using barriers, atomic accesses on these subtargets use
14346 : // libcalls.
14347 0 : llvm_unreachable("makeDMB on a target so old that it has no barriers");
14348 : }
14349 : } else {
14350 308 : Function *DMB = Intrinsic::getDeclaration(M, Intrinsic::arm_dmb);
14351 : // Only a full system barrier exists in the M-class architectures.
14352 308 : Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain;
14353 308 : Constant *CDomain = Builder.getInt32(Domain);
14354 308 : return Builder.CreateCall(DMB, CDomain);
14355 : }
14356 : }
14357 :
14358 : // Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
14359 186 : Instruction *ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
14360 : Instruction *Inst,
14361 : AtomicOrdering Ord) const {
14362 186 : switch (Ord) {
14363 : case AtomicOrdering::NotAtomic:
14364 : case AtomicOrdering::Unordered:
14365 : llvm_unreachable("Invalid fence: unordered/non-atomic");
14366 : case AtomicOrdering::Monotonic:
14367 : case AtomicOrdering::Acquire:
14368 : return nullptr; // Nothing to do
14369 156 : case AtomicOrdering::SequentiallyConsistent:
14370 156 : if (!Inst->hasAtomicStore())
14371 : return nullptr; // Nothing to do
14372 : LLVM_FALLTHROUGH;
14373 : case AtomicOrdering::Release:
14374 : case AtomicOrdering::AcquireRelease:
14375 134 : if (Subtarget->preferISHSTBarriers())
14376 8 : return makeDMB(Builder, ARM_MB::ISHST);
14377 : // FIXME: add a comment with a link to documentation justifying this.
14378 : else
14379 126 : return makeDMB(Builder, ARM_MB::ISH);
14380 : }
14381 0 : llvm_unreachable("Unknown fence ordering in emitLeadingFence");
14382 : }
14383 :
14384 213 : Instruction *ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
14385 : Instruction *Inst,
14386 : AtomicOrdering Ord) const {
14387 : switch (Ord) {
14388 : case AtomicOrdering::NotAtomic:
14389 : case AtomicOrdering::Unordered:
14390 : llvm_unreachable("Invalid fence: unordered/not-atomic");
14391 : case AtomicOrdering::Monotonic:
14392 : case AtomicOrdering::Release:
14393 : return nullptr; // Nothing to do
14394 183 : case AtomicOrdering::Acquire:
14395 : case AtomicOrdering::AcquireRelease:
14396 : case AtomicOrdering::SequentiallyConsistent:
14397 183 : return makeDMB(Builder, ARM_MB::ISH);
14398 : }
14399 0 : llvm_unreachable("Unknown fence ordering in emitTrailingFence");
14400 : }
14401 :
14402 : // Loads and stores less than 64-bits are already atomic; ones above that
14403 : // are doomed anyway, so defer to the default libcall and blame the OS when
14404 : // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
14405 : // anything for those.
14406 78 : bool ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
14407 78 : unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
14408 78 : return (Size == 64) && !Subtarget->isMClass();
14409 : }
14410 :
14411 : // Loads and stores less than 64-bits are already atomic; ones above that
14412 : // are doomed anyway, so defer to the default libcall and blame the OS when
14413 : // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
14414 : // anything for those.
14415 : // FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that
14416 : // guarantee, see DDI0406C ARM architecture reference manual,
14417 : // sections A8.8.72-74 LDRD)
14418 : TargetLowering::AtomicExpansionKind
14419 76 : ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
14420 76 : unsigned Size = LI->getType()->getPrimitiveSizeInBits();
14421 76 : return ((Size == 64) && !Subtarget->isMClass()) ? AtomicExpansionKind::LLOnly
14422 76 : : AtomicExpansionKind::None;
14423 : }
14424 :
14425 : // For the real atomic operations, we have ldrex/strex up to 32 bits,
14426 : // and up to 64 bits on the non-M profiles
14427 : TargetLowering::AtomicExpansionKind
14428 315 : ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
14429 315 : unsigned Size = AI->getType()->getPrimitiveSizeInBits();
14430 315 : bool hasAtomicRMW = !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps();
14431 627 : return (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW)
14432 315 : ? AtomicExpansionKind::LLSC
14433 315 : : AtomicExpansionKind::None;
14434 : }
14435 :
14436 : TargetLowering::AtomicExpansionKind
14437 116 : ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
14438 : // At -O0, fast-regalloc cannot cope with the live vregs necessary to
14439 : // implement cmpxchg without spilling. If the address being exchanged is also
14440 : // on the stack and close enough to the spill slot, this can lead to a
14441 : // situation where the monitor always gets cleared and the atomic operation
14442 : // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
14443 : bool HasAtomicCmpXchg =
14444 116 : !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps();
14445 116 : if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg)
14446 74 : return AtomicExpansionKind::LLSC;
14447 : return AtomicExpansionKind::None;
14448 : }
14449 :
14450 568 : bool ARMTargetLowering::shouldInsertFencesForAtomic(
14451 : const Instruction *I) const {
14452 568 : return InsertFencesForAtomic;
14453 : }
14454 :
14455 : // This has so far only been implemented for MachO.
14456 126 : bool ARMTargetLowering::useLoadStackGuardNode() const {
14457 252 : return Subtarget->isTargetMachO();
14458 : }
14459 :
14460 219 : bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
14461 : unsigned &Cost) const {
14462 : // If we do not have NEON, vector types are not natively supported.
14463 219 : if (!Subtarget->hasNEON())
14464 : return false;
14465 :
14466 : // Floating point values and vector values map to the same register file.
14467 : // Therefore, although we could do a store extract of a vector type, this is
14468 : // better to leave at float as we have more freedom in the addressing mode for
14469 : // those.
14470 : if (VectorTy->isFPOrFPVectorTy())
14471 : return false;
14472 :
14473 : // If the index is unknown at compile time, this is very expensive to lower
14474 : // and it is not possible to combine the store with the extract.
14475 105 : if (!isa<ConstantInt>(Idx))
14476 : return false;
14477 :
14478 : assert(VectorTy->isVectorTy() && "VectorTy is not a vector type");
14479 : unsigned BitWidth = cast<VectorType>(VectorTy)->getBitWidth();
14480 : // We can do a store + vector extract on any vector that fits perfectly in a D
14481 : // or Q register.
14482 98 : if (BitWidth == 64 || BitWidth == 128) {
14483 91 : Cost = 0;
14484 91 : return true;
14485 : }
14486 : return false;
14487 : }
14488 :
14489 37 : bool ARMTargetLowering::isCheapToSpeculateCttz() const {
14490 37 : return Subtarget->hasV6T2Ops();
14491 : }
14492 :
14493 31 : bool ARMTargetLowering::isCheapToSpeculateCtlz() const {
14494 31 : return Subtarget->hasV6T2Ops();
14495 : }
14496 :
14497 360 : Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
14498 : AtomicOrdering Ord) const {
14499 360 : Module *M = Builder.GetInsertBlock()->getParent()->getParent();
14500 360 : Type *ValTy = cast<PointerType>(Addr->getType())->getElementType();
14501 : bool IsAcquire = isAcquireOrStronger(Ord);
14502 :
14503 : // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
14504 : // intrinsic must return {i32, i32} and we have to recombine them into a
14505 : // single i64 here.
14506 360 : if (ValTy->getPrimitiveSizeInBits() == 64) {
14507 : Intrinsic::ID Int =
14508 126 : IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd;
14509 126 : Function *Ldrex = Intrinsic::getDeclaration(M, Int);
14510 :
14511 126 : Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
14512 126 : Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi");
14513 :
14514 252 : Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
14515 252 : Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
14516 126 : if (!Subtarget->isLittle())
14517 : std::swap (Lo, Hi);
14518 126 : Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
14519 126 : Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
14520 126 : return Builder.CreateOr(
14521 126 : Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 32)), "val64");
14522 : }
14523 :
14524 234 : Type *Tys[] = { Addr->getType() };
14525 234 : Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;
14526 234 : Function *Ldrex = Intrinsic::getDeclaration(M, Int, Tys);
14527 :
14528 234 : return Builder.CreateTruncOrBitCast(
14529 234 : Builder.CreateCall(Ldrex, Addr),
14530 : cast<PointerType>(Addr->getType())->getElementType());
14531 : }
14532 :
14533 61 : void ARMTargetLowering::emitAtomicCmpXchgNoStoreLLBalance(
14534 : IRBuilder<> &Builder) const {
14535 61 : if (!Subtarget->hasV7Ops())
14536 : return;
14537 59 : Module *M = Builder.GetInsertBlock()->getParent()->getParent();
14538 118 : Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::arm_clrex));
14539 : }
14540 :
14541 335 : Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val,
14542 : Value *Addr,
14543 : AtomicOrdering Ord) const {
14544 335 : Module *M = Builder.GetInsertBlock()->getParent()->getParent();
14545 : bool IsRelease = isReleaseOrStronger(Ord);
14546 :
14547 : // Since the intrinsics must have legal type, the i64 intrinsics take two
14548 : // parameters: "i32, i32". We must marshal Val into the appropriate form
14549 : // before the call.
14550 335 : if (Val->getType()->getPrimitiveSizeInBits() == 64) {
14551 : Intrinsic::ID Int =
14552 108 : IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd;
14553 108 : Function *Strex = Intrinsic::getDeclaration(M, Int);
14554 108 : Type *Int32Ty = Type::getInt32Ty(M->getContext());
14555 :
14556 108 : Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo");
14557 108 : Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi");
14558 108 : if (!Subtarget->isLittle())
14559 : std::swap(Lo, Hi);
14560 216 : Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
14561 108 : return Builder.CreateCall(Strex, {Lo, Hi, Addr});
14562 : }
14563 :
14564 227 : Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex;
14565 227 : Type *Tys[] = { Addr->getType() };
14566 227 : Function *Strex = Intrinsic::getDeclaration(M, Int, Tys);
14567 :
14568 227 : return Builder.CreateCall(
14569 681 : Strex, {Builder.CreateZExtOrBitCast(
14570 : Val, Strex->getFunctionType()->getParamType(0)),
14571 : Addr});
14572 : }
14573 :
14574 :
14575 47 : bool ARMTargetLowering::alignLoopsWithOptSize() const {
14576 94 : return Subtarget->isMClass();
14577 : }
14578 :
14579 : /// A helper function for determining the number of interleaved accesses we
14580 : /// will generate when lowering accesses of the given type.
14581 : unsigned
14582 53 : ARMTargetLowering::getNumInterleavedAccesses(VectorType *VecTy,
14583 : const DataLayout &DL) const {
14584 53 : return (DL.getTypeSizeInBits(VecTy) + 127) / 128;
14585 : }
14586 :
14587 71 : bool ARMTargetLowering::isLegalInterleavedAccessType(
14588 : VectorType *VecTy, const DataLayout &DL) const {
14589 :
14590 71 : unsigned VecSize = DL.getTypeSizeInBits(VecTy);
14591 71 : unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
14592 :
14593 : // Ensure the vector doesn't have f16 elements. Even though we could do an
14594 : // i16 vldN, we can't hold the f16 vectors and will end up converting via
14595 : // f32.
14596 142 : if (VecTy->getElementType()->isHalfTy())
14597 : return false;
14598 :
14599 : // Ensure the number of vector elements is greater than 1.
14600 69 : if (VecTy->getNumElements() < 2)
14601 : return false;
14602 :
14603 : // Ensure the element type is legal.
14604 58 : if (ElSize != 8 && ElSize != 16 && ElSize != 32)
14605 : return false;
14606 :
14607 : // Ensure the total vector size is 64 or a multiple of 128. Types larger than
14608 : // 128 will be split into multiple interleaved accesses.
14609 57 : return VecSize == 64 || VecSize % 128 == 0;
14610 : }
14611 :
14612 : /// Lower an interleaved load into a vldN intrinsic.
14613 : ///
14614 : /// E.g. Lower an interleaved load (Factor = 2):
14615 : /// %wide.vec = load <8 x i32>, <8 x i32>* %ptr, align 4
14616 : /// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
14617 : /// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
14618 : ///
14619 : /// Into:
14620 : /// %vld2 = { <4 x i32>, <4 x i32> } call llvm.arm.neon.vld2(%ptr, 4)
14621 : /// %vec0 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 0
14622 : /// %vec1 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 1
14623 42 : bool ARMTargetLowering::lowerInterleavedLoad(
14624 : LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles,
14625 : ArrayRef<unsigned> Indices, unsigned Factor) const {
14626 : assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
14627 : "Invalid interleave factor");
14628 : assert(!Shuffles.empty() && "Empty shufflevector input");
14629 : assert(Shuffles.size() == Indices.size() &&
14630 : "Unmatched number of shufflevectors and indices");
14631 :
14632 42 : VectorType *VecTy = Shuffles[0]->getType();
14633 42 : Type *EltTy = VecTy->getVectorElementType();
14634 :
14635 42 : const DataLayout &DL = LI->getModule()->getDataLayout();
14636 :
14637 : // Skip if we do not have NEON and skip illegal vector types. We can
14638 : // "legalize" wide vector types into multiple interleaved accesses as long as
14639 : // the vector types are divisible by 128.
14640 42 : if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(VecTy, DL))
14641 22 : return false;
14642 :
14643 20 : unsigned NumLoads = getNumInterleavedAccesses(VecTy, DL);
14644 :
14645 : // A pointer vector can not be the return type of the ldN intrinsics. Need to
14646 : // load integer vectors first and then convert to pointer vectors.
14647 20 : if (EltTy->isPointerTy())
14648 : VecTy =
14649 4 : VectorType::get(DL.getIntPtrType(EltTy), VecTy->getVectorNumElements());
14650 :
14651 20 : IRBuilder<> Builder(LI);
14652 :
14653 : // The base address of the load.
14654 : Value *BaseAddr = LI->getPointerOperand();
14655 :
14656 20 : if (NumLoads > 1) {
14657 : // If we're going to generate more than one load, reset the sub-vector type
14658 : // to something legal.
14659 10 : VecTy = VectorType::get(VecTy->getVectorElementType(),
14660 : VecTy->getVectorNumElements() / NumLoads);
14661 :
14662 : // We will compute the pointer operand of each load from the original base
14663 : // address using GEPs. Cast the base address to a pointer to the scalar
14664 : // element type.
14665 5 : BaseAddr = Builder.CreateBitCast(
14666 10 : BaseAddr, VecTy->getVectorElementType()->getPointerTo(
14667 : LI->getPointerAddressSpace()));
14668 : }
14669 :
14670 : assert(isTypeLegal(EVT::getEVT(VecTy)) && "Illegal vldN vector type!");
14671 :
14672 20 : Type *Int8Ptr = Builder.getInt8PtrTy(LI->getPointerAddressSpace());
14673 20 : Type *Tys[] = {VecTy, Int8Ptr};
14674 : static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2,
14675 : Intrinsic::arm_neon_vld3,
14676 : Intrinsic::arm_neon_vld4};
14677 : Function *VldnFunc =
14678 20 : Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
14679 :
14680 : // Holds sub-vectors extracted from the load intrinsic return values. The
14681 : // sub-vectors are associated with the shufflevector instructions they will
14682 : // replace.
14683 : DenseMap<ShuffleVectorInst *, SmallVector<Value *, 4>> SubVecs;
14684 :
14685 46 : for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
14686 : // If we're generating more than one load, compute the base address of
14687 : // subsequent loads as an offset from the previous.
14688 26 : if (LoadCount > 0)
14689 6 : BaseAddr = Builder.CreateConstGEP1_32(
14690 : BaseAddr, VecTy->getVectorNumElements() * Factor);
14691 :
14692 : SmallVector<Value *, 2> Ops;
14693 26 : Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr));
14694 26 : Ops.push_back(Builder.getInt32(LI->getAlignment()));
14695 :
14696 26 : CallInst *VldN = Builder.CreateCall(VldnFunc, Ops, "vldN");
14697 :
14698 : // Replace uses of each shufflevector with the corresponding vector loaded
14699 : // by ldN.
14700 88 : for (unsigned i = 0; i < Shuffles.size(); i++) {
14701 62 : ShuffleVectorInst *SV = Shuffles[i];
14702 124 : unsigned Index = Indices[i];
14703 :
14704 62 : Value *SubVec = Builder.CreateExtractValue(VldN, Index);
14705 :
14706 : // Convert the integer vector to pointer vector if the element is pointer.
14707 62 : if (EltTy->isPointerTy())
14708 12 : SubVec = Builder.CreateIntToPtr(
14709 36 : SubVec, VectorType::get(SV->getType()->getVectorElementType(),
14710 : VecTy->getVectorNumElements()));
14711 :
14712 62 : SubVecs[SV].push_back(SubVec);
14713 : }
14714 : }
14715 :
14716 : // Replace uses of the shufflevector instructions with the sub-vectors
14717 : // returned by the load intrinsic. If a shufflevector instruction is
14718 : // associated with more than one sub-vector, those sub-vectors will be
14719 : // concatenated into a single wide vector.
14720 67 : for (ShuffleVectorInst *SVI : Shuffles) {
14721 : auto &SubVec = SubVecs[SVI];
14722 : auto *WideVec =
14723 94 : SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];
14724 47 : SVI->replaceAllUsesWith(WideVec);
14725 : }
14726 :
14727 : return true;
14728 : }
14729 :
14730 : /// Lower an interleaved store into a vstN intrinsic.
14731 : ///
14732 : /// E.g. Lower an interleaved store (Factor = 3):
14733 : /// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
14734 : /// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
14735 : /// store <12 x i32> %i.vec, <12 x i32>* %ptr, align 4
14736 : ///
14737 : /// Into:
14738 : /// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
14739 : /// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
14740 : /// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
14741 : /// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
14742 : ///
14743 : /// Note that the new shufflevectors will be removed and we'll only generate one
14744 : /// vst3 instruction in CodeGen.
14745 : ///
14746 : /// Example for a more general valid mask (Factor 3). Lower:
14747 : /// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1,
14748 : /// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
14749 : /// store <12 x i32> %i.vec, <12 x i32>* %ptr
14750 : ///
14751 : /// Into:
14752 : /// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7>
14753 : /// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
14754 : /// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
14755 : /// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
14756 62 : bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
14757 : ShuffleVectorInst *SVI,
14758 : unsigned Factor) const {
14759 : assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
14760 : "Invalid interleave factor");
14761 :
14762 : VectorType *VecTy = SVI->getType();
14763 : assert(VecTy->getVectorNumElements() % Factor == 0 &&
14764 : "Invalid interleaved store");
14765 :
14766 62 : unsigned LaneLen = VecTy->getVectorNumElements() / Factor;
14767 62 : Type *EltTy = VecTy->getVectorElementType();
14768 62 : VectorType *SubVecTy = VectorType::get(EltTy, LaneLen);
14769 :
14770 62 : const DataLayout &DL = SI->getModule()->getDataLayout();
14771 :
14772 : // Skip if we do not have NEON and skip illegal vector types. We can
14773 : // "legalize" wide vector types into multiple interleaved accesses as long as
14774 : // the vector types are divisible by 128.
14775 62 : if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(SubVecTy, DL))
14776 39 : return false;
14777 :
14778 23 : unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL);
14779 :
14780 : Value *Op0 = SVI->getOperand(0);
14781 : Value *Op1 = SVI->getOperand(1);
14782 23 : IRBuilder<> Builder(SI);
14783 :
14784 : // StN intrinsics don't support pointer vectors as arguments. Convert pointer
14785 : // vectors to integer vectors.
14786 23 : if (EltTy->isPointerTy()) {
14787 3 : Type *IntTy = DL.getIntPtrType(EltTy);
14788 :
14789 : // Convert to the corresponding integer vector.
14790 : Type *IntVecTy =
14791 6 : VectorType::get(IntTy, Op0->getType()->getVectorNumElements());
14792 3 : Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
14793 3 : Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
14794 :
14795 3 : SubVecTy = VectorType::get(IntTy, LaneLen);
14796 : }
14797 :
14798 : // The base address of the store.
14799 : Value *BaseAddr = SI->getPointerOperand();
14800 :
14801 23 : if (NumStores > 1) {
14802 : // If we're going to generate more than one store, reset the lane length
14803 : // and sub-vector type to something legal.
14804 3 : LaneLen /= NumStores;
14805 6 : SubVecTy = VectorType::get(SubVecTy->getVectorElementType(), LaneLen);
14806 :
14807 : // We will compute the pointer operand of each store from the original base
14808 : // address using GEPs. Cast the base address to a pointer to the scalar
14809 : // element type.
14810 3 : BaseAddr = Builder.CreateBitCast(
14811 6 : BaseAddr, SubVecTy->getVectorElementType()->getPointerTo(
14812 : SI->getPointerAddressSpace()));
14813 : }
14814 :
14815 : assert(isTypeLegal(EVT::getEVT(SubVecTy)) && "Illegal vstN vector type!");
14816 :
14817 : auto Mask = SVI->getShuffleMask();
14818 :
14819 23 : Type *Int8Ptr = Builder.getInt8PtrTy(SI->getPointerAddressSpace());
14820 23 : Type *Tys[] = {Int8Ptr, SubVecTy};
14821 : static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
14822 : Intrinsic::arm_neon_vst3,
14823 : Intrinsic::arm_neon_vst4};
14824 :
14825 49 : for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
14826 : // If we generating more than one store, we compute the base address of
14827 : // subsequent stores as an offset from the previous.
14828 26 : if (StoreCount > 0)
14829 3 : BaseAddr = Builder.CreateConstGEP1_32(BaseAddr, LaneLen * Factor);
14830 :
14831 : SmallVector<Value *, 6> Ops;
14832 26 : Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr));
14833 :
14834 : Function *VstNFunc =
14835 26 : Intrinsic::getDeclaration(SI->getModule(), StoreInts[Factor - 2], Tys);
14836 :
14837 : // Split the shufflevector operands into sub vectors for the new vstN call.
14838 108 : for (unsigned i = 0; i < Factor; i++) {
14839 82 : unsigned IdxI = StoreCount * LaneLen * Factor + i;
14840 164 : if (Mask[IdxI] >= 0) {
14841 71 : Ops.push_back(Builder.CreateShuffleVector(
14842 71 : Op0, Op1, createSequentialMask(Builder, Mask[IdxI], LaneLen, 0)));
14843 : } else {
14844 : unsigned StartMask = 0;
14845 20 : for (unsigned j = 1; j < LaneLen; j++) {
14846 17 : unsigned IdxJ = StoreCount * LaneLen * Factor + j;
14847 34 : if (Mask[IdxJ * Factor + IdxI] >= 0) {
14848 8 : StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ;
14849 8 : break;
14850 : }
14851 : }
14852 : // Note: If all elements in a chunk are undefs, StartMask=0!
14853 : // Note: Filling undef gaps with random elements is ok, since
14854 : // those elements were being written anyway (with undefs).
14855 : // In the case of all undefs we're defaulting to using elems from 0
14856 : // Note: StartMask cannot be negative, it's checked in
14857 : // isReInterleaveMask
14858 11 : Ops.push_back(Builder.CreateShuffleVector(
14859 11 : Op0, Op1, createSequentialMask(Builder, StartMask, LaneLen, 0)));
14860 : }
14861 : }
14862 :
14863 26 : Ops.push_back(Builder.getInt32(SI->getAlignment()));
14864 26 : Builder.CreateCall(VstNFunc, Ops);
14865 : }
14866 : return true;
14867 : }
14868 :
14869 : enum HABaseType {
14870 : HA_UNKNOWN = 0,
14871 : HA_FLOAT,
14872 : HA_DOUBLE,
14873 : HA_VECT64,
14874 : HA_VECT128
14875 : };
14876 :
14877 5585 : static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base,
14878 : uint64_t &Members) {
14879 : if (auto *ST = dyn_cast<StructType>(Ty)) {
14880 78 : for (unsigned i = 0; i < ST->getNumElements(); ++i) {
14881 54 : uint64_t SubMembers = 0;
14882 108 : if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers))
14883 13 : return false;
14884 41 : Members += SubMembers;
14885 : }
14886 : } else if (auto *AT = dyn_cast<ArrayType>(Ty)) {
14887 48 : uint64_t SubMembers = 0;
14888 48 : if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers))
14889 13 : return false;
14890 35 : Members += SubMembers * AT->getNumElements();
14891 5500 : } else if (Ty->isFloatTy()) {
14892 1412 : if (Base != HA_UNKNOWN && Base != HA_FLOAT)
14893 : return false;
14894 1412 : Members = 1;
14895 1412 : Base = HA_FLOAT;
14896 4088 : } else if (Ty->isDoubleTy()) {
14897 1063 : if (Base != HA_UNKNOWN && Base != HA_DOUBLE)
14898 : return false;
14899 1063 : Members = 1;
14900 1063 : Base = HA_DOUBLE;
14901 : } else if (auto *VT = dyn_cast<VectorType>(Ty)) {
14902 630 : Members = 1;
14903 630 : switch (Base) {
14904 : case HA_FLOAT:
14905 : case HA_DOUBLE:
14906 : return false;
14907 : case HA_VECT64:
14908 0 : return VT->getBitWidth() == 64;
14909 : case HA_VECT128:
14910 0 : return VT->getBitWidth() == 128;
14911 : case HA_UNKNOWN:
14912 630 : switch (VT->getBitWidth()) {
14913 315 : case 64:
14914 315 : Base = HA_VECT64;
14915 315 : return true;
14916 301 : case 128:
14917 301 : Base = HA_VECT128;
14918 301 : return true;
14919 : default:
14920 : return false;
14921 : }
14922 : }
14923 : }
14924 :
14925 4929 : return (Members > 0 && Members <= 4);
14926 : }
14927 :
14928 : /// Return the correct alignment for the current calling convention.
14929 : unsigned
14930 33281 : ARMTargetLowering::getABIAlignmentForCallingConv(Type *ArgTy,
14931 : DataLayout DL) const {
14932 33281 : if (!ArgTy->isVectorTy())
14933 31839 : return DL.getABITypeAlignment(ArgTy);
14934 :
14935 : // Avoid over-aligning vector parameters. It would require realigning the
14936 : // stack and waste space for no real benefit.
14937 1487 : return std::min(DL.getABITypeAlignment(ArgTy), DL.getStackAlignment());
14938 : }
14939 :
14940 : /// Return true if a type is an AAPCS-VFP homogeneous aggregate or one of
14941 : /// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when
14942 : /// passing according to AAPCS rules.
14943 34777 : bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters(
14944 : Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
14945 34777 : if (getEffectiveCallingConv(CallConv, isVarArg) !=
14946 : CallingConv::ARM_AAPCS_VFP)
14947 : return false;
14948 :
14949 5483 : HABaseType Base = HA_UNKNOWN;
14950 5483 : uint64_t Members = 0;
14951 5483 : bool IsHA = isHomogeneousAggregate(Ty, Base, Members);
14952 : LLVM_DEBUG(dbgs() << "isHA: " << IsHA << " "; Ty->dump());
14953 :
14954 5483 : bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy();
14955 5483 : return IsHA || IsIntArray;
14956 : }
14957 :
14958 448 : unsigned ARMTargetLowering::getExceptionPointerRegister(
14959 : const Constant *PersonalityFn) const {
14960 : // Platforms which do not use SjLj EH may return values in these registers
14961 : // via the personality function.
14962 448 : return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R0;
14963 : }
14964 :
14965 348 : unsigned ARMTargetLowering::getExceptionSelectorRegister(
14966 : const Constant *PersonalityFn) const {
14967 : // Platforms which do not use SjLj EH may return values in these registers
14968 : // via the personality function.
14969 348 : return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R1;
14970 : }
14971 :
14972 15 : void ARMTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
14973 : // Update IsSplitCSR in ARMFunctionInfo.
14974 15 : ARMFunctionInfo *AFI = Entry->getParent()->getInfo<ARMFunctionInfo>();
14975 : AFI->setIsSplitCSR(true);
14976 15 : }
14977 :
14978 15 : void ARMTargetLowering::insertCopiesSplitCSR(
14979 : MachineBasicBlock *Entry,
14980 : const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
14981 15 : const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
14982 15 : const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
14983 15 : if (!IStart)
14984 0 : return;
14985 :
14986 15 : const TargetInstrInfo *TII = Subtarget->getInstrInfo();
14987 15 : MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
14988 15 : MachineBasicBlock::iterator MBBI = Entry->begin();
14989 600 : for (const MCPhysReg *I = IStart; *I; ++I) {
14990 : const TargetRegisterClass *RC = nullptr;
14991 585 : if (ARM::GPRRegClass.contains(*I))
14992 : RC = &ARM::GPRRegClass;
14993 480 : else if (ARM::DPRRegClass.contains(*I))
14994 : RC = &ARM::DPRRegClass;
14995 : else
14996 0 : llvm_unreachable("Unexpected register class in CSRsViaCopy!");
14997 :
14998 585 : unsigned NewVR = MRI->createVirtualRegister(RC);
14999 : // Create copy from CSR to a virtual register.
15000 : // FIXME: this currently does not emit CFI pseudo-instructions, it works
15001 : // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
15002 : // nounwind. If we want to generalize this later, we may need to emit
15003 : // CFI pseudo-instructions.
15004 : assert(Entry->getParent()->getFunction().hasFnAttribute(
15005 : Attribute::NoUnwind) &&
15006 : "Function should be nounwind in insertCopiesSplitCSR!");
15007 585 : Entry->addLiveIn(*I);
15008 1755 : BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
15009 585 : .addReg(*I);
15010 :
15011 : // Insert the copy-back instructions right before the terminator.
15012 1170 : for (auto *Exit : Exits)
15013 585 : BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
15014 1170 : TII->get(TargetOpcode::COPY), *I)
15015 585 : .addReg(NewVR);
15016 : }
15017 : }
15018 :
15019 14772 : void ARMTargetLowering::finalizeLowering(MachineFunction &MF) const {
15020 14772 : MF.getFrameInfo().computeMaxCallFrameSize(MF);
15021 14772 : TargetLoweringBase::finalizeLowering(MF);
15022 14772 : }
|