Line data Source code
1 : //===- ARMFastISel.cpp - ARM FastISel implementation ----------------------===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : // This file defines the ARM-specific support for the FastISel class. Some
11 : // of the target-specific code is generated by tablegen in the file
12 : // ARMGenFastISel.inc, which is #included here.
13 : //
14 : //===----------------------------------------------------------------------===//
15 :
16 : #include "ARM.h"
17 : #include "ARMBaseInstrInfo.h"
18 : #include "ARMBaseRegisterInfo.h"
19 : #include "ARMCallingConv.h"
20 : #include "ARMConstantPoolValue.h"
21 : #include "ARMISelLowering.h"
22 : #include "ARMMachineFunctionInfo.h"
23 : #include "ARMSubtarget.h"
24 : #include "MCTargetDesc/ARMAddressingModes.h"
25 : #include "MCTargetDesc/ARMBaseInfo.h"
26 : #include "Utils/ARMBaseInfo.h"
27 : #include "llvm/ADT/APFloat.h"
28 : #include "llvm/ADT/APInt.h"
29 : #include "llvm/ADT/DenseMap.h"
30 : #include "llvm/ADT/SmallVector.h"
31 : #include "llvm/CodeGen/CallingConvLower.h"
32 : #include "llvm/CodeGen/FastISel.h"
33 : #include "llvm/CodeGen/FunctionLoweringInfo.h"
34 : #include "llvm/CodeGen/ISDOpcodes.h"
35 : #include "llvm/CodeGen/MachineBasicBlock.h"
36 : #include "llvm/CodeGen/MachineConstantPool.h"
37 : #include "llvm/CodeGen/MachineFrameInfo.h"
38 : #include "llvm/CodeGen/MachineFunction.h"
39 : #include "llvm/CodeGen/MachineInstr.h"
40 : #include "llvm/CodeGen/MachineInstrBuilder.h"
41 : #include "llvm/CodeGen/MachineMemOperand.h"
42 : #include "llvm/CodeGen/MachineOperand.h"
43 : #include "llvm/CodeGen/MachineRegisterInfo.h"
44 : #include "llvm/CodeGen/RuntimeLibcalls.h"
45 : #include "llvm/CodeGen/TargetInstrInfo.h"
46 : #include "llvm/CodeGen/TargetLowering.h"
47 : #include "llvm/CodeGen/TargetOpcodes.h"
48 : #include "llvm/CodeGen/TargetRegisterInfo.h"
49 : #include "llvm/CodeGen/ValueTypes.h"
50 : #include "llvm/IR/Argument.h"
51 : #include "llvm/IR/Attributes.h"
52 : #include "llvm/IR/CallSite.h"
53 : #include "llvm/IR/CallingConv.h"
54 : #include "llvm/IR/Constant.h"
55 : #include "llvm/IR/Constants.h"
56 : #include "llvm/IR/DataLayout.h"
57 : #include "llvm/IR/DerivedTypes.h"
58 : #include "llvm/IR/Function.h"
59 : #include "llvm/IR/GetElementPtrTypeIterator.h"
60 : #include "llvm/IR/GlobalValue.h"
61 : #include "llvm/IR/GlobalVariable.h"
62 : #include "llvm/IR/InstrTypes.h"
63 : #include "llvm/IR/Instruction.h"
64 : #include "llvm/IR/Instructions.h"
65 : #include "llvm/IR/IntrinsicInst.h"
66 : #include "llvm/IR/Intrinsics.h"
67 : #include "llvm/IR/Module.h"
68 : #include "llvm/IR/Operator.h"
69 : #include "llvm/IR/Type.h"
70 : #include "llvm/IR/User.h"
71 : #include "llvm/IR/Value.h"
72 : #include "llvm/MC/MCInstrDesc.h"
73 : #include "llvm/MC/MCRegisterInfo.h"
74 : #include "llvm/Support/Casting.h"
75 : #include "llvm/Support/Compiler.h"
76 : #include "llvm/Support/ErrorHandling.h"
77 : #include "llvm/Support/MachineValueType.h"
78 : #include "llvm/Support/MathExtras.h"
79 : #include "llvm/Target/TargetMachine.h"
80 : #include "llvm/Target/TargetOptions.h"
81 : #include <cassert>
82 : #include <cstdint>
83 : #include <utility>
84 :
85 : using namespace llvm;
86 :
87 : namespace {
88 :
89 : // All possible address modes, plus some.
90 : struct Address {
91 : enum {
92 : RegBase,
93 : FrameIndexBase
94 : } BaseType = RegBase;
95 :
96 : union {
97 : unsigned Reg;
98 : int FI;
99 : } Base;
100 :
101 : int Offset = 0;
102 :
103 : // Innocuous defaults for our address.
104 1538 : Address() {
105 962 : Base.Reg = 0;
106 : }
107 : };
108 :
109 : class ARMFastISel final : public FastISel {
110 : /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
111 : /// make the right decision when generating code for different targets.
112 : const ARMSubtarget *Subtarget;
113 : Module &M;
114 : const TargetMachine &TM;
115 : const TargetInstrInfo &TII;
116 : const TargetLowering &TLI;
117 : ARMFunctionInfo *AFI;
118 :
119 : // Convenience variables to avoid some queries.
120 : bool isThumb2;
121 : LLVMContext *Context;
122 :
123 : public:
124 901 : explicit ARMFastISel(FunctionLoweringInfo &funcInfo,
125 : const TargetLibraryInfo *libInfo)
126 901 : : FastISel(funcInfo, libInfo),
127 : Subtarget(
128 901 : &static_cast<const ARMSubtarget &>(funcInfo.MF->getSubtarget())),
129 901 : M(const_cast<Module &>(*funcInfo.Fn->getParent())),
130 1802 : TM(funcInfo.MF->getTarget()), TII(*Subtarget->getInstrInfo()),
131 901 : TLI(*Subtarget->getTargetLowering()) {
132 901 : AFI = funcInfo.MF->getInfo<ARMFunctionInfo>();
133 901 : isThumb2 = AFI->isThumbFunction();
134 901 : Context = &funcInfo.Fn->getContext();
135 901 : }
136 :
137 : private:
138 : // Code from FastISel.cpp.
139 :
140 : unsigned fastEmitInst_r(unsigned MachineInstOpcode,
141 : const TargetRegisterClass *RC,
142 : unsigned Op0, bool Op0IsKill);
143 : unsigned fastEmitInst_rr(unsigned MachineInstOpcode,
144 : const TargetRegisterClass *RC,
145 : unsigned Op0, bool Op0IsKill,
146 : unsigned Op1, bool Op1IsKill);
147 : unsigned fastEmitInst_ri(unsigned MachineInstOpcode,
148 : const TargetRegisterClass *RC,
149 : unsigned Op0, bool Op0IsKill,
150 : uint64_t Imm);
151 : unsigned fastEmitInst_i(unsigned MachineInstOpcode,
152 : const TargetRegisterClass *RC,
153 : uint64_t Imm);
154 :
155 : // Backend specific FastISel code.
156 :
157 : bool fastSelectInstruction(const Instruction *I) override;
158 : unsigned fastMaterializeConstant(const Constant *C) override;
159 : unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
160 : bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
161 : const LoadInst *LI) override;
162 : bool fastLowerArguments() override;
163 :
164 : #include "ARMGenFastISel.inc"
165 :
166 : // Instruction selection routines.
167 :
168 : bool SelectLoad(const Instruction *I);
169 : bool SelectStore(const Instruction *I);
170 : bool SelectBranch(const Instruction *I);
171 : bool SelectIndirectBr(const Instruction *I);
172 : bool SelectCmp(const Instruction *I);
173 : bool SelectFPExt(const Instruction *I);
174 : bool SelectFPTrunc(const Instruction *I);
175 : bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
176 : bool SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode);
177 : bool SelectIToFP(const Instruction *I, bool isSigned);
178 : bool SelectFPToI(const Instruction *I, bool isSigned);
179 : bool SelectDiv(const Instruction *I, bool isSigned);
180 : bool SelectRem(const Instruction *I, bool isSigned);
181 : bool SelectCall(const Instruction *I, const char *IntrMemName);
182 : bool SelectIntrinsicCall(const IntrinsicInst &I);
183 : bool SelectSelect(const Instruction *I);
184 : bool SelectRet(const Instruction *I);
185 : bool SelectTrunc(const Instruction *I);
186 : bool SelectIntExt(const Instruction *I);
187 : bool SelectShift(const Instruction *I, ARM_AM::ShiftOpc ShiftTy);
188 :
189 : // Utility routines.
190 :
191 : bool isPositionIndependent() const;
192 : bool isTypeLegal(Type *Ty, MVT &VT);
193 : bool isLoadTypeLegal(Type *Ty, MVT &VT);
194 : bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
195 : bool isZExt, bool isEquality);
196 : bool ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
197 : unsigned Alignment = 0, bool isZExt = true,
198 : bool allocReg = true);
199 : bool ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
200 : unsigned Alignment = 0);
201 : bool ARMComputeAddress(const Value *Obj, Address &Addr);
202 : void ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3);
203 : bool ARMIsMemCpySmall(uint64_t Len);
204 : bool ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
205 : unsigned Alignment);
206 : unsigned ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
207 : unsigned ARMMaterializeFP(const ConstantFP *CFP, MVT VT);
208 : unsigned ARMMaterializeInt(const Constant *C, MVT VT);
209 : unsigned ARMMaterializeGV(const GlobalValue *GV, MVT VT);
210 : unsigned ARMMoveToFPReg(MVT VT, unsigned SrcReg);
211 : unsigned ARMMoveToIntReg(MVT VT, unsigned SrcReg);
212 : unsigned ARMSelectCallOp(bool UseReg);
213 : unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, MVT VT);
214 :
215 : const TargetLowering *getTargetLowering() { return &TLI; }
216 :
217 : // Call handling routines.
218 :
219 : CCAssignFn *CCAssignFnForCall(CallingConv::ID CC,
220 : bool Return,
221 : bool isVarArg);
222 : bool ProcessCallArgs(SmallVectorImpl<Value*> &Args,
223 : SmallVectorImpl<unsigned> &ArgRegs,
224 : SmallVectorImpl<MVT> &ArgVTs,
225 : SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
226 : SmallVectorImpl<unsigned> &RegArgs,
227 : CallingConv::ID CC,
228 : unsigned &NumBytes,
229 : bool isVarArg);
230 : unsigned getLibcallReg(const Twine &Name);
231 : bool FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
232 : const Instruction *I, CallingConv::ID CC,
233 : unsigned &NumBytes, bool isVarArg);
234 : bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call);
235 :
236 : // OptionalDef handling routines.
237 :
238 : bool isARMNEONPred(const MachineInstr *MI);
239 : bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
240 : const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
241 : void AddLoadStoreOperands(MVT VT, Address &Addr,
242 : const MachineInstrBuilder &MIB,
243 : MachineMemOperand::Flags Flags, bool useAM3);
244 : };
245 :
246 : } // end anonymous namespace
247 :
248 : #include "ARMGenCallingConv.inc"
249 :
250 : // DefinesOptionalPredicate - This is different from DefinesPredicate in that
251 : // we don't care about implicit defs here, just places we'll need to add a
252 : // default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
253 0 : bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
254 0 : if (!MI->hasOptionalDef())
255 0 : return false;
256 :
257 : // Look to see if our OptionalDef is defining CPSR or CCR.
258 0 : for (const MachineOperand &MO : MI->operands()) {
259 0 : if (!MO.isReg() || !MO.isDef()) continue;
260 0 : if (MO.getReg() == ARM::CPSR)
261 0 : *CPSR = true;
262 : }
263 : return true;
264 : }
265 :
266 0 : bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) {
267 0 : const MCInstrDesc &MCID = MI->getDesc();
268 :
269 : // If we're a thumb2 or not NEON function we'll be handled via isPredicable.
270 0 : if ((MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON ||
271 0 : AFI->isThumb2Function())
272 0 : return MI->isPredicable();
273 :
274 0 : for (const MCOperandInfo &opInfo : MCID.operands())
275 0 : if (opInfo.isPredicate())
276 0 : return true;
277 :
278 : return false;
279 : }
280 :
281 : // If the machine is predicable go ahead and add the predicate operands, if
282 : // it needs default CC operands add those.
283 : // TODO: If we want to support thumb1 then we'll need to deal with optional
284 : // CPSR defs that need to be added before the remaining operands. See s_cc_out
285 : // for descriptions why.
286 : const MachineInstrBuilder &
287 5855 : ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
288 5855 : MachineInstr *MI = &*MIB;
289 :
290 : // Do we use a predicate? or...
291 : // Are we NEON in ARM mode and have a predicate operand? If so, I know
292 : // we're not predicable but add it anyways.
293 5855 : if (isARMNEONPred(MI))
294 5596 : MIB.add(predOps(ARMCC::AL));
295 :
296 : // Do we optionally set a predicate? Preds is size > 0 iff the predicate
297 : // defines CPSR. All other OptionalDefines in ARM are the CCR register.
298 5855 : bool CPSR = false;
299 5855 : if (DefinesOptionalPredicate(MI, &CPSR))
300 1686 : MIB.add(CPSR ? t1CondCodeOp() : condCodeOp());
301 5855 : return MIB;
302 : }
303 :
304 83 : unsigned ARMFastISel::fastEmitInst_r(unsigned MachineInstOpcode,
305 : const TargetRegisterClass *RC,
306 : unsigned Op0, bool Op0IsKill) {
307 83 : unsigned ResultReg = createResultReg(RC);
308 83 : const MCInstrDesc &II = TII.get(MachineInstOpcode);
309 :
310 : // Make sure the input operand is sufficiently constrained to be legal
311 : // for this instruction.
312 83 : Op0 = constrainOperandRegClass(II, Op0, 1);
313 83 : if (II.getNumDefs() >= 1) {
314 83 : AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
315 83 : ResultReg).addReg(Op0, Op0IsKill * RegState::Kill));
316 : } else {
317 0 : AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
318 0 : .addReg(Op0, Op0IsKill * RegState::Kill));
319 0 : AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
320 0 : TII.get(TargetOpcode::COPY), ResultReg)
321 0 : .addReg(II.ImplicitDefs[0]));
322 : }
323 83 : return ResultReg;
324 : }
325 :
326 148 : unsigned ARMFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
327 : const TargetRegisterClass *RC,
328 : unsigned Op0, bool Op0IsKill,
329 : unsigned Op1, bool Op1IsKill) {
330 148 : unsigned ResultReg = createResultReg(RC);
331 148 : const MCInstrDesc &II = TII.get(MachineInstOpcode);
332 :
333 : // Make sure the input operands are sufficiently constrained to be legal
334 : // for this instruction.
335 148 : Op0 = constrainOperandRegClass(II, Op0, 1);
336 148 : Op1 = constrainOperandRegClass(II, Op1, 2);
337 :
338 148 : if (II.getNumDefs() >= 1) {
339 : AddOptionalDefs(
340 296 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
341 148 : .addReg(Op0, Op0IsKill * RegState::Kill)
342 148 : .addReg(Op1, Op1IsKill * RegState::Kill));
343 : } else {
344 0 : AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
345 0 : .addReg(Op0, Op0IsKill * RegState::Kill)
346 0 : .addReg(Op1, Op1IsKill * RegState::Kill));
347 0 : AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
348 0 : TII.get(TargetOpcode::COPY), ResultReg)
349 0 : .addReg(II.ImplicitDefs[0]));
350 : }
351 148 : return ResultReg;
352 : }
353 :
354 465 : unsigned ARMFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
355 : const TargetRegisterClass *RC,
356 : unsigned Op0, bool Op0IsKill,
357 : uint64_t Imm) {
358 465 : unsigned ResultReg = createResultReg(RC);
359 465 : const MCInstrDesc &II = TII.get(MachineInstOpcode);
360 :
361 : // Make sure the input operand is sufficiently constrained to be legal
362 : // for this instruction.
363 465 : Op0 = constrainOperandRegClass(II, Op0, 1);
364 465 : if (II.getNumDefs() >= 1) {
365 : AddOptionalDefs(
366 930 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
367 465 : .addReg(Op0, Op0IsKill * RegState::Kill)
368 465 : .addImm(Imm));
369 : } else {
370 0 : AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
371 0 : .addReg(Op0, Op0IsKill * RegState::Kill)
372 0 : .addImm(Imm));
373 0 : AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
374 0 : TII.get(TargetOpcode::COPY), ResultReg)
375 0 : .addReg(II.ImplicitDefs[0]));
376 : }
377 465 : return ResultReg;
378 : }
379 :
380 15 : unsigned ARMFastISel::fastEmitInst_i(unsigned MachineInstOpcode,
381 : const TargetRegisterClass *RC,
382 : uint64_t Imm) {
383 15 : unsigned ResultReg = createResultReg(RC);
384 15 : const MCInstrDesc &II = TII.get(MachineInstOpcode);
385 :
386 15 : if (II.getNumDefs() >= 1) {
387 15 : AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
388 15 : ResultReg).addImm(Imm));
389 : } else {
390 0 : AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
391 0 : .addImm(Imm));
392 0 : AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
393 0 : TII.get(TargetOpcode::COPY), ResultReg)
394 0 : .addReg(II.ImplicitDefs[0]));
395 : }
396 15 : return ResultReg;
397 : }
398 :
399 : // TODO: Don't worry about 64-bit now, but when this is fixed remove the
400 : // checks from the various callers.
401 37 : unsigned ARMFastISel::ARMMoveToFPReg(MVT VT, unsigned SrcReg) {
402 37 : if (VT == MVT::f64) return 0;
403 :
404 37 : unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
405 74 : AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
406 74 : TII.get(ARM::VMOVSR), MoveReg)
407 37 : .addReg(SrcReg));
408 37 : return MoveReg;
409 : }
410 :
411 12 : unsigned ARMFastISel::ARMMoveToIntReg(MVT VT, unsigned SrcReg) {
412 12 : if (VT == MVT::i64) return 0;
413 :
414 12 : unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
415 24 : AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
416 24 : TII.get(ARM::VMOVRS), MoveReg)
417 12 : .addReg(SrcReg));
418 12 : return MoveReg;
419 : }
420 :
421 : // For double width floating point we need to materialize two constants
422 : // (the high and the low) into integer registers then use a move to get
423 : // the combined constant into an FP reg.
424 309 : unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) {
425 : const APFloat Val = CFP->getValueAPF();
426 309 : bool is64bit = VT == MVT::f64;
427 :
428 : // This checks to see if we can use VFP3 instructions to materialize
429 : // a constant, otherwise we have to go through the constant pool.
430 618 : if (TLI.isFPImmLegal(Val, VT)) {
431 : int Imm;
432 : unsigned Opc;
433 47 : if (is64bit) {
434 1 : Imm = ARM_AM::getFP64Imm(Val);
435 : Opc = ARM::FCONSTD;
436 : } else {
437 46 : Imm = ARM_AM::getFP32Imm(Val);
438 : Opc = ARM::FCONSTS;
439 : }
440 47 : unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
441 94 : AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
442 94 : TII.get(Opc), DestReg).addImm(Imm));
443 47 : return DestReg;
444 : }
445 :
446 : // Require VFP2 for loading fp constants.
447 262 : if (!Subtarget->hasVFP2()) return false;
448 :
449 : // MachineConstantPool wants an explicit alignment.
450 262 : unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
451 262 : if (Align == 0) {
452 : // TODO: Figure out if this is correct.
453 0 : Align = DL.getTypeAllocSize(CFP->getType());
454 : }
455 262 : unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
456 262 : unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
457 262 : unsigned Opc = is64bit ? ARM::VLDRD : ARM::VLDRS;
458 :
459 : // The extra reg is for addrmode5.
460 : AddOptionalDefs(
461 524 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
462 : .addConstantPoolIndex(Idx)
463 262 : .addReg(0));
464 262 : return DestReg;
465 : }
466 :
467 514 : unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
468 514 : if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1)
469 : return 0;
470 :
471 : // If we can do this in a single instruction without a constant pool entry
472 : // do so now.
473 : const ConstantInt *CI = cast<ConstantInt>(C);
474 1025 : if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getZExtValue())) {
475 304 : unsigned Opc = isThumb2 ? ARM::t2MOVi16 : ARM::MOVi16;
476 304 : const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass :
477 : &ARM::GPRRegClass;
478 304 : unsigned ImmReg = createResultReg(RC);
479 608 : AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
480 608 : TII.get(Opc), ImmReg)
481 304 : .addImm(CI->getZExtValue()));
482 304 : return ImmReg;
483 : }
484 :
485 : // Use MVN to emit negative constants.
486 417 : if (VT == MVT::i32 && Subtarget->hasV6T2Ops() && CI->isNegative()) {
487 205 : unsigned Imm = (unsigned)~(CI->getSExtValue());
488 205 : bool UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
489 110 : (ARM_AM::getSOImmVal(Imm) != -1);
490 205 : if (UseImm) {
491 190 : unsigned Opc = isThumb2 ? ARM::t2MVNi : ARM::MVNi;
492 190 : const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass :
493 : &ARM::GPRRegClass;
494 190 : unsigned ImmReg = createResultReg(RC);
495 380 : AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
496 380 : TII.get(Opc), ImmReg)
497 190 : .addImm(Imm));
498 190 : return ImmReg;
499 : }
500 : }
501 :
502 : unsigned ResultReg = 0;
503 20 : if (Subtarget->useMovt(*FuncInfo.MF))
504 : ResultReg = fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
505 :
506 6 : if (ResultReg)
507 : return ResultReg;
508 :
509 : // Load from constant pool. For now 32-bit only.
510 16 : if (VT != MVT::i32)
511 : return 0;
512 :
513 : // MachineConstantPool wants an explicit alignment.
514 16 : unsigned Align = DL.getPrefTypeAlignment(C->getType());
515 16 : if (Align == 0) {
516 : // TODO: Figure out if this is correct.
517 0 : Align = DL.getTypeAllocSize(C->getType());
518 : }
519 16 : unsigned Idx = MCP.getConstantPoolIndex(C, Align);
520 16 : ResultReg = createResultReg(TLI.getRegClassFor(VT));
521 16 : if (isThumb2)
522 7 : AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
523 14 : TII.get(ARM::t2LDRpci), ResultReg)
524 7 : .addConstantPoolIndex(Idx));
525 : else {
526 : // The extra immediate is for addrmode2.
527 18 : ResultReg = constrainOperandRegClass(TII.get(ARM::LDRcp), ResultReg, 0);
528 9 : AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
529 18 : TII.get(ARM::LDRcp), ResultReg)
530 : .addConstantPoolIndex(Idx)
531 9 : .addImm(0));
532 : }
533 : return ResultReg;
534 : }
535 :
536 0 : bool ARMFastISel::isPositionIndependent() const {
537 0 : return TLI.isPositionIndependent();
538 : }
539 :
540 259 : unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
541 : // For now 32-bit only.
542 259 : if (VT != MVT::i32 || GV->isThreadLocal()) return 0;
543 :
544 : // ROPI/RWPI not currently supported.
545 248 : if (Subtarget->isROPI() || Subtarget->isRWPI())
546 0 : return 0;
547 :
548 248 : bool IsIndirect = Subtarget->isGVIndirectSymbol(GV);
549 248 : const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass
550 : : &ARM::GPRRegClass;
551 248 : unsigned DestReg = createResultReg(RC);
552 :
553 : // FastISel TLS support on non-MachO is broken, punt to SelectionDAG.
554 : const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
555 180 : bool IsThreadLocal = GVar && GVar->isThreadLocal();
556 496 : if (!Subtarget->isTargetMachO() && IsThreadLocal) return 0;
557 :
558 248 : bool IsPositionIndependent = isPositionIndependent();
559 : // Use movw+movt when possible, it avoids constant pool entries.
560 : // Non-darwin targets only support static movt relocations in FastISel.
561 248 : if (Subtarget->useMovt(*FuncInfo.MF) &&
562 246 : (Subtarget->isTargetMachO() || !IsPositionIndependent)) {
563 : unsigned Opc;
564 : unsigned char TF = 0;
565 241 : if (Subtarget->isTargetMachO())
566 : TF = ARMII::MO_NONLAZY;
567 :
568 241 : if (IsPositionIndependent)
569 60 : Opc = isThumb2 ? ARM::t2MOV_ga_pcrel : ARM::MOV_ga_pcrel;
570 : else
571 181 : Opc = isThumb2 ? ARM::t2MOVi32imm : ARM::MOVi32imm;
572 241 : AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
573 723 : TII.get(Opc), DestReg).addGlobalAddress(GV, 0, TF));
574 : } else {
575 : // MachineConstantPool wants an explicit alignment.
576 14 : unsigned Align = DL.getPrefTypeAlignment(GV->getType());
577 7 : if (Align == 0) {
578 : // TODO: Figure out if this is correct.
579 0 : Align = DL.getTypeAllocSize(GV->getType());
580 : }
581 :
582 14 : if (Subtarget->isTargetELF() && IsPositionIndependent)
583 6 : return ARMLowerPICELF(GV, Align, VT);
584 :
585 : // Grab index.
586 2 : unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
587 2 : unsigned Id = AFI->createPICLabelUId();
588 2 : ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id,
589 : ARMCP::CPValue,
590 : PCAdj);
591 2 : unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
592 :
593 : // Load value.
594 2 : MachineInstrBuilder MIB;
595 2 : if (isThumb2) {
596 1 : unsigned Opc = IsPositionIndependent ? ARM::t2LDRpci_pic : ARM::t2LDRpci;
597 3 : MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
598 2 : DestReg).addConstantPoolIndex(Idx);
599 1 : if (IsPositionIndependent)
600 1 : MIB.addImm(Id);
601 1 : AddOptionalDefs(MIB);
602 : } else {
603 : // The extra immediate is for addrmode2.
604 2 : DestReg = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg, 0);
605 2 : MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
606 2 : TII.get(ARM::LDRcp), DestReg)
607 : .addConstantPoolIndex(Idx)
608 : .addImm(0);
609 1 : AddOptionalDefs(MIB);
610 :
611 1 : if (IsPositionIndependent) {
612 1 : unsigned Opc = IsIndirect ? ARM::PICLDR : ARM::PICADD;
613 1 : unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
614 :
615 2 : MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
616 2 : DbgLoc, TII.get(Opc), NewDestReg)
617 1 : .addReg(DestReg)
618 1 : .addImm(Id);
619 1 : AddOptionalDefs(MIB);
620 : return NewDestReg;
621 : }
622 : }
623 : }
624 :
625 242 : if (IsIndirect) {
626 84 : MachineInstrBuilder MIB;
627 84 : unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
628 84 : if (isThumb2)
629 49 : MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
630 98 : TII.get(ARM::t2LDRi12), NewDestReg)
631 49 : .addReg(DestReg)
632 : .addImm(0);
633 : else
634 35 : MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
635 70 : TII.get(ARM::LDRi12), NewDestReg)
636 35 : .addReg(DestReg)
637 : .addImm(0);
638 : DestReg = NewDestReg;
639 84 : AddOptionalDefs(MIB);
640 : }
641 :
642 : return DestReg;
643 : }
644 :
645 1172 : unsigned ARMFastISel::fastMaterializeConstant(const Constant *C) {
646 1172 : EVT CEVT = TLI.getValueType(DL, C->getType(), true);
647 :
648 : // Only handle simple types.
649 1172 : if (!CEVT.isSimple()) return 0;
650 1172 : MVT VT = CEVT.getSimpleVT();
651 :
652 : if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
653 309 : return ARMMaterializeFP(CFP, VT);
654 : else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
655 247 : return ARMMaterializeGV(GV, VT);
656 616 : else if (isa<ConstantInt>(C))
657 514 : return ARMMaterializeInt(C, VT);
658 :
659 : return 0;
660 : }
661 :
662 : // TODO: unsigned ARMFastISel::TargetMaterializeFloatZero(const ConstantFP *CF);
663 :
664 205 : unsigned ARMFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
665 : // Don't handle dynamic allocas.
666 205 : if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
667 :
668 205 : MVT VT;
669 205 : if (!isLoadTypeLegal(AI->getType(), VT)) return 0;
670 :
671 : DenseMap<const AllocaInst*, int>::iterator SI =
672 205 : FuncInfo.StaticAllocaMap.find(AI);
673 :
674 : // This will get lowered later into the correct offsets and registers
675 : // via rewriteXFrameIndex.
676 410 : if (SI != FuncInfo.StaticAllocaMap.end()) {
677 205 : unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
678 205 : const TargetRegisterClass* RC = TLI.getRegClassFor(VT);
679 205 : unsigned ResultReg = createResultReg(RC);
680 410 : ResultReg = constrainOperandRegClass(TII.get(Opc), ResultReg, 0);
681 :
682 410 : AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
683 410 : TII.get(Opc), ResultReg)
684 205 : .addFrameIndex(SI->second)
685 205 : .addImm(0));
686 205 : return ResultReg;
687 : }
688 :
689 : return 0;
690 : }
691 :
692 0 : bool ARMFastISel::isTypeLegal(Type *Ty, MVT &VT) {
693 0 : EVT evt = TLI.getValueType(DL, Ty, true);
694 :
695 : // Only handle simple types.
696 0 : if (evt == MVT::Other || !evt.isSimple()) return false;
697 0 : VT = evt.getSimpleVT();
698 :
699 : // Handle all legal types, i.e. a register that will directly hold this
700 : // value.
701 0 : return TLI.isTypeLegal(VT);
702 : }
703 :
704 1302 : bool ARMFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
705 1302 : if (isTypeLegal(Ty, VT)) return true;
706 :
707 : // If this is a type than can be sign or zero-extended to a basic operation
708 : // go ahead and accept it now.
709 175 : if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
710 167 : return true;
711 :
712 : return false;
713 : }
714 :
715 : // Computes the address to get to an object.
716 1535 : bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
717 : // Some boilerplate from the X86 FastISel.
718 : const User *U = nullptr;
719 : unsigned Opcode = Instruction::UserOp1;
720 : if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
721 : // Don't walk into other basic blocks unless the object is an alloca from
722 : // another block, otherwise it may not have a virtual register assigned.
723 1653 : if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
724 1100 : FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
725 : Opcode = I->getOpcode();
726 : U = I;
727 : }
728 : } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
729 : Opcode = C->getOpcode();
730 : U = C;
731 : }
732 :
733 1535 : if (PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
734 1532 : if (Ty->getAddressSpace() > 255)
735 : // Fast instruction selection doesn't support the special
736 : // address spaces.
737 : return false;
738 :
739 1535 : switch (Opcode) {
740 : default:
741 : break;
742 : case Instruction::BitCast:
743 : // Look through bitcasts.
744 22 : return ARMComputeAddress(U->getOperand(0), Addr);
745 3 : case Instruction::IntToPtr:
746 : // Look past no-op inttoptrs.
747 12 : if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
748 : TLI.getPointerTy(DL))
749 3 : return ARMComputeAddress(U->getOperand(0), Addr);
750 : break;
751 0 : case Instruction::PtrToInt:
752 : // Look past no-op ptrtoints.
753 0 : if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
754 0 : return ARMComputeAddress(U->getOperand(0), Addr);
755 : break;
756 532 : case Instruction::GetElementPtr: {
757 532 : Address SavedAddr = Addr;
758 532 : int TmpOffset = Addr.Offset;
759 :
760 : // Iterate through the GEP folding the constants into offsets where
761 : // we can.
762 532 : gep_type_iterator GTI = gep_type_begin(U);
763 1540 : for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
764 2548 : i != e; ++i, ++GTI) {
765 1018 : const Value *Op = *i;
766 299 : if (StructType *STy = GTI.getStructTypeOrNull()) {
767 299 : const StructLayout *SL = DL.getStructLayout(STy);
768 299 : unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
769 299 : TmpOffset += SL->getElementOffset(Idx);
770 : } else {
771 719 : uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
772 : while (true) {
773 : if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
774 : // Constant-offset addressing.
775 709 : TmpOffset += CI->getSExtValue() * S;
776 709 : break;
777 : }
778 10 : if (canFoldAddIntoGEP(U, Op)) {
779 : // A compatible add with a constant operand. Fold the constant.
780 : ConstantInt *CI =
781 0 : cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
782 0 : TmpOffset += CI->getSExtValue() * S;
783 : // Iterate on the other operand.
784 : Op = cast<AddOperator>(Op)->getOperand(0);
785 : continue;
786 : }
787 : // Unsupported
788 : goto unsupported_gep;
789 0 : }
790 : }
791 : }
792 :
793 : // Try to grab the base operand now.
794 522 : Addr.Offset = TmpOffset;
795 522 : if (ARMComputeAddress(U->getOperand(0), Addr)) return true;
796 :
797 : // We failed, restore everything and try the other options.
798 2 : Addr = SavedAddr;
799 :
800 12 : unsupported_gep:
801 12 : break;
802 : }
803 : case Instruction::Alloca: {
804 : const AllocaInst *AI = cast<AllocaInst>(Obj);
805 : DenseMap<const AllocaInst*, int>::iterator SI =
806 553 : FuncInfo.StaticAllocaMap.find(AI);
807 1106 : if (SI != FuncInfo.StaticAllocaMap.end()) {
808 553 : Addr.BaseType = Address::FrameIndexBase;
809 553 : Addr.Base.FI = SI->second;
810 553 : return true;
811 : }
812 0 : break;
813 : }
814 : }
815 :
816 : // Try to get this in a register if nothing else has worked.
817 437 : if (Addr.Base.Reg == 0) Addr.Base.Reg = getRegForValue(Obj);
818 437 : return Addr.Base.Reg != 0;
819 : }
820 :
821 1708 : void ARMFastISel::ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3) {
822 : bool needsLowering = false;
823 1708 : switch (VT.SimpleTy) {
824 0 : default: llvm_unreachable("Unhandled load/store type!");
825 1136 : case MVT::i1:
826 : case MVT::i8:
827 : case MVT::i16:
828 : case MVT::i32:
829 1136 : if (!useAM3) {
830 : // Integer loads/stores handle 12-bit offsets.
831 1020 : needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset);
832 : // Handle negative offsets.
833 1020 : if (needsLowering && isThumb2)
834 19 : needsLowering = !(Subtarget->hasV6T2Ops() && Addr.Offset < 0 &&
835 : Addr.Offset > -256);
836 : } else {
837 : // ARM halfword load/stores and signed byte loads use +/-imm8 offsets.
838 116 : needsLowering = (Addr.Offset > 255 || Addr.Offset < -255);
839 : }
840 : break;
841 572 : case MVT::f32:
842 : case MVT::f64:
843 : // Floating point operands handle 8-bit offsets.
844 572 : needsLowering = ((Addr.Offset & 0xff) != Addr.Offset);
845 572 : break;
846 : }
847 :
848 : // If this is a stack pointer and the offset needs to be simplified then
849 : // put the alloca address into a register, set the base type back to
850 : // register and continue. This should almost never happen.
851 1696 : if (needsLowering && Addr.BaseType == Address::FrameIndexBase) {
852 0 : const TargetRegisterClass *RC = isThumb2 ? &ARM::tGPRRegClass
853 : : &ARM::GPRRegClass;
854 0 : unsigned ResultReg = createResultReg(RC);
855 0 : unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
856 0 : AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
857 0 : TII.get(Opc), ResultReg)
858 0 : .addFrameIndex(Addr.Base.FI)
859 0 : .addImm(0));
860 0 : Addr.Base.Reg = ResultReg;
861 0 : Addr.BaseType = Address::RegBase;
862 : }
863 :
864 : // Since the offset is too large for the load/store instruction
865 : // get the reg+offset into a register.
866 1708 : if (needsLowering) {
867 808 : Addr.Base.Reg = fastEmit_ri_(MVT::i32, ISD::ADD, Addr.Base.Reg,
868 404 : /*Op0IsKill*/false, Addr.Offset, MVT::i32);
869 404 : Addr.Offset = 0;
870 : }
871 1708 : }
872 :
873 1708 : void ARMFastISel::AddLoadStoreOperands(MVT VT, Address &Addr,
874 : const MachineInstrBuilder &MIB,
875 : MachineMemOperand::Flags Flags,
876 : bool useAM3) {
877 : // addrmode5 output depends on the selection dag addressing dividing the
878 : // offset by 4 that it then later multiplies. Do this here as well.
879 1708 : if (VT.SimpleTy == MVT::f32 || VT.SimpleTy == MVT::f64)
880 572 : Addr.Offset /= 4;
881 :
882 : // Frame base works a bit differently. Handle it separately.
883 1708 : if (Addr.BaseType == Address::FrameIndexBase) {
884 556 : int FI = Addr.Base.FI;
885 556 : int Offset = Addr.Offset;
886 556 : MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
887 556 : MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
888 556 : MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
889 : // Now add the rest of the operands.
890 : MIB.addFrameIndex(FI);
891 :
892 : // ARM halfword load/stores and signed byte loads need an additional
893 : // operand.
894 556 : if (useAM3) {
895 6 : int Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
896 6 : MIB.addReg(0);
897 6 : MIB.addImm(Imm);
898 : } else {
899 550 : MIB.addImm(Addr.Offset);
900 : }
901 : MIB.addMemOperand(MMO);
902 : } else {
903 : // Now add the rest of the operands.
904 1152 : MIB.addReg(Addr.Base.Reg);
905 :
906 : // ARM halfword load/stores and signed byte loads need an additional
907 : // operand.
908 1152 : if (useAM3) {
909 110 : int Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
910 110 : MIB.addReg(0);
911 110 : MIB.addImm(Imm);
912 : } else {
913 1042 : MIB.addImm(Addr.Offset);
914 : }
915 : }
916 1708 : AddOptionalDefs(MIB);
917 1708 : }
918 :
919 483 : bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
920 : unsigned Alignment, bool isZExt, bool allocReg) {
921 : unsigned Opc;
922 : bool useAM3 = false;
923 : bool needVMOV = false;
924 : const TargetRegisterClass *RC;
925 483 : switch (VT.SimpleTy) {
926 : // This is mostly going to be Neon/vector support.
927 : default: return false;
928 96 : case MVT::i1:
929 : case MVT::i8:
930 96 : if (isThumb2) {
931 31 : if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
932 2 : Opc = isZExt ? ARM::t2LDRBi8 : ARM::t2LDRSBi8;
933 : else
934 29 : Opc = isZExt ? ARM::t2LDRBi12 : ARM::t2LDRSBi12;
935 : } else {
936 65 : if (isZExt) {
937 : Opc = ARM::LDRBi12;
938 : } else {
939 : Opc = ARM::LDRSB;
940 : useAM3 = true;
941 : }
942 : }
943 96 : RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
944 : break;
945 85 : case MVT::i16:
946 85 : if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem())
947 : return false;
948 :
949 81 : if (isThumb2) {
950 24 : if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
951 2 : Opc = isZExt ? ARM::t2LDRHi8 : ARM::t2LDRSHi8;
952 : else
953 22 : Opc = isZExt ? ARM::t2LDRHi12 : ARM::t2LDRSHi12;
954 : } else {
955 57 : Opc = isZExt ? ARM::LDRH : ARM::LDRSH;
956 : useAM3 = true;
957 : }
958 81 : RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
959 : break;
960 278 : case MVT::i32:
961 278 : if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem())
962 : return false;
963 :
964 274 : if (isThumb2) {
965 138 : if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
966 : Opc = ARM::t2LDRi8;
967 : else
968 : Opc = ARM::t2LDRi12;
969 : } else {
970 : Opc = ARM::LDRi12;
971 : }
972 274 : RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
973 : break;
974 18 : case MVT::f32:
975 18 : if (!Subtarget->hasVFP2()) return false;
976 : // Unaligned loads need special handling. Floats require word-alignment.
977 18 : if (Alignment && Alignment < 4) {
978 : needVMOV = true;
979 : VT = MVT::i32;
980 8 : Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
981 8 : RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
982 : } else {
983 : Opc = ARM::VLDRS;
984 10 : RC = TLI.getRegClassFor(VT);
985 : }
986 : break;
987 2 : case MVT::f64:
988 2 : if (!Subtarget->hasVFP2()) return false;
989 : // FIXME: Unaligned loads need special handling. Doublewords require
990 : // word-alignment.
991 2 : if (Alignment && Alignment < 4)
992 : return false;
993 :
994 : Opc = ARM::VLDRD;
995 2 : RC = TLI.getRegClassFor(VT);
996 2 : break;
997 : }
998 : // Simplify this down to something we can handle.
999 471 : ARMSimplifyAddress(Addr, VT, useAM3);
1000 :
1001 : // Create the base instruction, then add the operands.
1002 471 : if (allocReg)
1003 420 : ResultReg = createResultReg(RC);
1004 : assert(ResultReg > 255 && "Expected an allocated virtual register.");
1005 471 : MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1006 942 : TII.get(Opc), ResultReg);
1007 471 : AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad, useAM3);
1008 :
1009 : // If we had an unaligned load of a float we've converted it to an regular
1010 : // load. Now we must move from the GRP to the FP register.
1011 471 : if (needVMOV) {
1012 16 : unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::f32));
1013 16 : AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1014 16 : TII.get(ARM::VMOVSR), MoveReg)
1015 8 : .addReg(ResultReg));
1016 8 : ResultReg = MoveReg;
1017 : }
1018 : return true;
1019 : }
1020 :
1021 316 : bool ARMFastISel::SelectLoad(const Instruction *I) {
1022 : // Atomic loads need special handling.
1023 316 : if (cast<LoadInst>(I)->isAtomic())
1024 : return false;
1025 :
1026 315 : const Value *SV = I->getOperand(0);
1027 315 : if (TLI.supportSwiftError()) {
1028 : // Swifterror values can come from either a function parameter with
1029 : // swifterror attribute or an alloca with swifterror attribute.
1030 : if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1031 28 : if (Arg->hasSwiftErrorAttr())
1032 : return false;
1033 : }
1034 :
1035 : if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1036 124 : if (Alloca->isSwiftError())
1037 : return false;
1038 : }
1039 : }
1040 :
1041 : // Verify we have a legal type before going any further.
1042 310 : MVT VT;
1043 310 : if (!isLoadTypeLegal(I->getType(), VT))
1044 : return false;
1045 :
1046 : // See if we can handle this address.
1047 : Address Addr;
1048 309 : if (!ARMComputeAddress(I->getOperand(0), Addr)) return false;
1049 :
1050 : unsigned ResultReg;
1051 304 : if (!ARMEmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment()))
1052 : return false;
1053 292 : updateValueMap(I, ResultReg);
1054 292 : return true;
1055 : }
1056 :
1057 1250 : bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
1058 : unsigned Alignment) {
1059 : unsigned StrOpc;
1060 : bool useAM3 = false;
1061 1250 : switch (VT.SimpleTy) {
1062 : // This is mostly going to be Neon/vector support.
1063 : default: return false;
1064 11 : case MVT::i1: {
1065 17 : unsigned Res = createResultReg(isThumb2 ? &ARM::tGPRRegClass
1066 : : &ARM::GPRRegClass);
1067 11 : unsigned Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;
1068 22 : SrcReg = constrainOperandRegClass(TII.get(Opc), SrcReg, 1);
1069 11 : AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1070 22 : TII.get(Opc), Res)
1071 22 : .addReg(SrcReg).addImm(1));
1072 : SrcReg = Res;
1073 : LLVM_FALLTHROUGH;
1074 : }
1075 102 : case MVT::i8:
1076 102 : if (isThumb2) {
1077 35 : if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
1078 : StrOpc = ARM::t2STRBi8;
1079 : else
1080 : StrOpc = ARM::t2STRBi12;
1081 : } else {
1082 : StrOpc = ARM::STRBi12;
1083 : }
1084 : break;
1085 78 : case MVT::i16:
1086 78 : if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem())
1087 : return false;
1088 :
1089 74 : if (isThumb2) {
1090 23 : if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
1091 : StrOpc = ARM::t2STRHi8;
1092 : else
1093 : StrOpc = ARM::t2STRHi12;
1094 : } else {
1095 : StrOpc = ARM::STRH;
1096 : useAM3 = true;
1097 : }
1098 : break;
1099 497 : case MVT::i32:
1100 497 : if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem())
1101 : return false;
1102 :
1103 493 : if (isThumb2) {
1104 298 : if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
1105 : StrOpc = ARM::t2STRi8;
1106 : else
1107 : StrOpc = ARM::t2STRi12;
1108 : } else {
1109 : StrOpc = ARM::STRi12;
1110 : }
1111 : break;
1112 119 : case MVT::f32:
1113 119 : if (!Subtarget->hasVFP2()) return false;
1114 : // Unaligned stores need special handling. Floats require word-alignment.
1115 119 : if (Alignment && Alignment < 4) {
1116 16 : unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::i32));
1117 16 : AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1118 16 : TII.get(ARM::VMOVRS), MoveReg)
1119 8 : .addReg(SrcReg));
1120 : SrcReg = MoveReg;
1121 : VT = MVT::i32;
1122 8 : StrOpc = isThumb2 ? ARM::t2STRi12 : ARM::STRi12;
1123 : } else {
1124 : StrOpc = ARM::VSTRS;
1125 : }
1126 : break;
1127 449 : case MVT::f64:
1128 449 : if (!Subtarget->hasVFP2()) return false;
1129 : // FIXME: Unaligned stores need special handling. Doublewords require
1130 : // word-alignment.
1131 449 : if (Alignment && Alignment < 4)
1132 : return false;
1133 :
1134 : StrOpc = ARM::VSTRD;
1135 : break;
1136 : }
1137 : // Simplify this down to something we can handle.
1138 1237 : ARMSimplifyAddress(Addr, VT, useAM3);
1139 :
1140 : // Create the base instruction, then add the operands.
1141 2474 : SrcReg = constrainOperandRegClass(TII.get(StrOpc), SrcReg, 0);
1142 2474 : MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1143 2474 : TII.get(StrOpc))
1144 1237 : .addReg(SrcReg);
1145 1237 : AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOStore, useAM3);
1146 1237 : return true;
1147 : }
1148 :
1149 588 : bool ARMFastISel::SelectStore(const Instruction *I) {
1150 588 : Value *Op0 = I->getOperand(0);
1151 : unsigned SrcReg = 0;
1152 :
1153 : // Atomic stores need special handling.
1154 588 : if (cast<StoreInst>(I)->isAtomic())
1155 : return false;
1156 :
1157 : const Value *PtrV = I->getOperand(1);
1158 585 : if (TLI.supportSwiftError()) {
1159 : // Swifterror values can come from either a function parameter with
1160 : // swifterror attribute or an alloca with swifterror attribute.
1161 : if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
1162 63 : if (Arg->hasSwiftErrorAttr())
1163 : return false;
1164 : }
1165 :
1166 : if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
1167 253 : if (Alloca->isSwiftError())
1168 : return false;
1169 : }
1170 : }
1171 :
1172 : // Verify we have a legal type before going any further.
1173 583 : MVT VT;
1174 583 : if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT))
1175 : return false;
1176 :
1177 : // Get the value to be stored into a register.
1178 576 : SrcReg = getRegForValue(Op0);
1179 576 : if (SrcReg == 0) return false;
1180 :
1181 : // See if we can handle this address.
1182 : Address Addr;
1183 576 : if (!ARMComputeAddress(I->getOperand(1), Addr))
1184 : return false;
1185 :
1186 572 : if (!ARMEmitStore(VT, SrcReg, Addr, cast<StoreInst>(I)->getAlignment()))
1187 13 : return false;
1188 : return true;
1189 : }
1190 :
1191 : static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) {
1192 : switch (Pred) {
1193 : // Needs two compares...
1194 : case CmpInst::FCMP_ONE:
1195 : case CmpInst::FCMP_UEQ:
1196 : default:
1197 : // AL is our "false" for now. The other two need more compares.
1198 : return ARMCC::AL;
1199 : case CmpInst::ICMP_EQ:
1200 : case CmpInst::FCMP_OEQ:
1201 : return ARMCC::EQ;
1202 : case CmpInst::ICMP_SGT:
1203 : case CmpInst::FCMP_OGT:
1204 : return ARMCC::GT;
1205 : case CmpInst::ICMP_SGE:
1206 : case CmpInst::FCMP_OGE:
1207 : return ARMCC::GE;
1208 : case CmpInst::ICMP_UGT:
1209 : case CmpInst::FCMP_UGT:
1210 : return ARMCC::HI;
1211 : case CmpInst::FCMP_OLT:
1212 : return ARMCC::MI;
1213 : case CmpInst::ICMP_ULE:
1214 : case CmpInst::FCMP_OLE:
1215 : return ARMCC::LS;
1216 : case CmpInst::FCMP_ORD:
1217 : return ARMCC::VC;
1218 : case CmpInst::FCMP_UNO:
1219 : return ARMCC::VS;
1220 : case CmpInst::FCMP_UGE:
1221 : return ARMCC::PL;
1222 : case CmpInst::ICMP_SLT:
1223 : case CmpInst::FCMP_ULT:
1224 : return ARMCC::LT;
1225 : case CmpInst::ICMP_SLE:
1226 : case CmpInst::FCMP_ULE:
1227 : return ARMCC::LE;
1228 : case CmpInst::FCMP_UNE:
1229 : case CmpInst::ICMP_NE:
1230 : return ARMCC::NE;
1231 : case CmpInst::ICMP_UGE:
1232 : return ARMCC::HS;
1233 : case CmpInst::ICMP_ULT:
1234 : return ARMCC::LO;
1235 : }
1236 : }
1237 :
1238 79 : bool ARMFastISel::SelectBranch(const Instruction *I) {
1239 : const BranchInst *BI = cast<BranchInst>(I);
1240 79 : MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
1241 79 : MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
1242 :
1243 : // Simple branch support.
1244 :
1245 : // If we can, avoid recomputing the compare - redoing it could lead to wonky
1246 : // behavior.
1247 : if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
1248 58 : if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
1249 : // Get the compare predicate.
1250 : // Try to take advantage of fallthrough opportunities.
1251 : CmpInst::Predicate Predicate = CI->getPredicate();
1252 58 : if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1253 : std::swap(TBB, FBB);
1254 52 : Predicate = CmpInst::getInversePredicate(Predicate);
1255 : }
1256 :
1257 : ARMCC::CondCodes ARMPred = getComparePred(Predicate);
1258 :
1259 : // We may not handle every CC for now.
1260 58 : if (ARMPred == ARMCC::AL) return false;
1261 :
1262 : // Emit the compare.
1263 116 : if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
1264 58 : CI->isEquality()))
1265 : return false;
1266 :
1267 58 : unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
1268 116 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
1269 58 : .addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR);
1270 58 : finishCondBranch(BI->getParent(), TBB, FBB);
1271 58 : return true;
1272 : }
1273 : } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
1274 3 : MVT SourceVT;
1275 6 : if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
1276 3 : (isLoadTypeLegal(TI->getOperand(0)->getType(), SourceVT))) {
1277 3 : unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
1278 3 : unsigned OpReg = getRegForValue(TI->getOperand(0));
1279 6 : OpReg = constrainOperandRegClass(TII.get(TstOpc), OpReg, 0);
1280 6 : AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1281 6 : TII.get(TstOpc))
1282 6 : .addReg(OpReg).addImm(1));
1283 :
1284 : unsigned CCMode = ARMCC::NE;
1285 3 : if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1286 : std::swap(TBB, FBB);
1287 : CCMode = ARMCC::EQ;
1288 : }
1289 :
1290 3 : unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
1291 6 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
1292 3 : .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
1293 :
1294 3 : finishCondBranch(BI->getParent(), TBB, FBB);
1295 3 : return true;
1296 : }
1297 : } else if (const ConstantInt *CI =
1298 : dyn_cast<ConstantInt>(BI->getCondition())) {
1299 : uint64_t Imm = CI->getZExtValue();
1300 12 : MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
1301 12 : fastEmitBranch(Target, DbgLoc);
1302 12 : return true;
1303 : }
1304 :
1305 12 : unsigned CmpReg = getRegForValue(BI->getCondition());
1306 6 : if (CmpReg == 0) return false;
1307 :
1308 : // We've been divorced from our compare! Our block was split, and
1309 : // now our compare lives in a predecessor block. We musn't
1310 : // re-compare here, as the children of the compare aren't guaranteed
1311 : // live across the block boundary (we *could* check for this).
1312 : // Regardless, the compare has been done in the predecessor block,
1313 : // and it left a value for us in a virtual register. Ergo, we test
1314 : // the one-bit value left in the virtual register.
1315 6 : unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
1316 12 : CmpReg = constrainOperandRegClass(TII.get(TstOpc), CmpReg, 0);
1317 : AddOptionalDefs(
1318 12 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TstOpc))
1319 6 : .addReg(CmpReg)
1320 6 : .addImm(1));
1321 :
1322 : unsigned CCMode = ARMCC::NE;
1323 6 : if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1324 : std::swap(TBB, FBB);
1325 : CCMode = ARMCC::EQ;
1326 : }
1327 :
1328 6 : unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
1329 12 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
1330 6 : .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
1331 6 : finishCondBranch(BI->getParent(), TBB, FBB);
1332 6 : return true;
1333 : }
1334 :
1335 4 : bool ARMFastISel::SelectIndirectBr(const Instruction *I) {
1336 8 : unsigned AddrReg = getRegForValue(I->getOperand(0));
1337 4 : if (AddrReg == 0) return false;
1338 :
1339 4 : unsigned Opc = isThumb2 ? ARM::tBRIND : ARM::BX;
1340 : assert(isThumb2 || Subtarget->hasV4TOps());
1341 :
1342 4 : AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1343 8 : TII.get(Opc)).addReg(AddrReg));
1344 :
1345 : const IndirectBrInst *IB = cast<IndirectBrInst>(I);
1346 12 : for (const BasicBlock *SuccBB : IB->successors())
1347 8 : FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[SuccBB]);
1348 :
1349 4 : return true;
1350 : }
1351 :
1352 84 : bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
1353 : bool isZExt, bool isEquality) {
1354 84 : Type *Ty = Src1Value->getType();
1355 84 : EVT SrcEVT = TLI.getValueType(DL, Ty, true);
1356 84 : if (!SrcEVT.isSimple()) return false;
1357 : MVT SrcVT = SrcEVT.getSimpleVT();
1358 :
1359 84 : if (Ty->isFloatTy() && !Subtarget->hasVFP2())
1360 : return false;
1361 :
1362 84 : if (Ty->isDoubleTy() && (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP()))
1363 : return false;
1364 :
1365 : // Check to see if the 2nd operand is a constant that we can encode directly
1366 : // in the compare.
1367 : int Imm = 0;
1368 : bool UseImm = false;
1369 : bool isNegativeImm = false;
1370 : // FIXME: At -O0 we don't have anything that canonicalizes operand order.
1371 : // Thus, Src1Value may be a ConstantInt, but we're missing it.
1372 : if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) {
1373 40 : if (SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8 ||
1374 : SrcVT == MVT::i1) {
1375 : const APInt &CIVal = ConstInt->getValue();
1376 80 : Imm = (isZExt) ? (int)CIVal.getZExtValue() : (int)CIVal.getSExtValue();
1377 : // For INT_MIN/LONG_MIN (i.e., 0x80000000) we need to use a cmp, rather
1378 : // then a cmn, because there is no way to represent 2147483648 as a
1379 : // signed 32-bit int.
1380 40 : if (Imm < 0 && Imm != (int)0x80000000) {
1381 : isNegativeImm = true;
1382 9 : Imm = -Imm;
1383 : }
1384 40 : UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
1385 24 : (ARM_AM::getSOImmVal(Imm) != -1);
1386 : }
1387 : } else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Src2Value)) {
1388 22 : if (SrcVT == MVT::f32 || SrcVT == MVT::f64)
1389 22 : if (ConstFP->isZero() && !ConstFP->isNegative())
1390 : UseImm = true;
1391 : }
1392 :
1393 : unsigned CmpOpc;
1394 : bool isICmp = true;
1395 : bool needsExt = false;
1396 83 : switch (SrcVT.SimpleTy) {
1397 : default: return false;
1398 : // TODO: Verify compares.
1399 16 : case MVT::f32:
1400 : isICmp = false;
1401 : // Equality comparisons shouldn't raise Invalid on uordered inputs.
1402 16 : if (isEquality)
1403 14 : CmpOpc = UseImm ? ARM::VCMPZS : ARM::VCMPS;
1404 : else
1405 2 : CmpOpc = UseImm ? ARM::VCMPEZS : ARM::VCMPES;
1406 : break;
1407 6 : case MVT::f64:
1408 : isICmp = false;
1409 : // Equality comparisons shouldn't raise Invalid on uordered inputs.
1410 6 : if (isEquality)
1411 6 : CmpOpc = UseImm ? ARM::VCMPZD : ARM::VCMPD;
1412 : else
1413 0 : CmpOpc = UseImm ? ARM::VCMPEZD : ARM::VCMPED;
1414 : break;
1415 30 : case MVT::i1:
1416 : case MVT::i8:
1417 : case MVT::i16:
1418 : needsExt = true;
1419 : LLVM_FALLTHROUGH;
1420 61 : case MVT::i32:
1421 61 : if (isThumb2) {
1422 22 : if (!UseImm)
1423 : CmpOpc = ARM::t2CMPrr;
1424 : else
1425 15 : CmpOpc = isNegativeImm ? ARM::t2CMNri : ARM::t2CMPri;
1426 : } else {
1427 39 : if (!UseImm)
1428 : CmpOpc = ARM::CMPrr;
1429 : else
1430 24 : CmpOpc = isNegativeImm ? ARM::CMNri : ARM::CMPri;
1431 : }
1432 : break;
1433 : }
1434 :
1435 83 : unsigned SrcReg1 = getRegForValue(Src1Value);
1436 83 : if (SrcReg1 == 0) return false;
1437 :
1438 : unsigned SrcReg2 = 0;
1439 83 : if (!UseImm) {
1440 30 : SrcReg2 = getRegForValue(Src2Value);
1441 30 : if (SrcReg2 == 0) return false;
1442 : }
1443 :
1444 : // We have i1, i8, or i16, we need to either zero extend or sign extend.
1445 83 : if (needsExt) {
1446 60 : SrcReg1 = ARMEmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt);
1447 30 : if (SrcReg1 == 0) return false;
1448 30 : if (!UseImm) {
1449 15 : SrcReg2 = ARMEmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt);
1450 15 : if (SrcReg2 == 0) return false;
1451 : }
1452 : }
1453 :
1454 83 : const MCInstrDesc &II = TII.get(CmpOpc);
1455 83 : SrcReg1 = constrainOperandRegClass(II, SrcReg1, 0);
1456 83 : if (!UseImm) {
1457 30 : SrcReg2 = constrainOperandRegClass(II, SrcReg2, 1);
1458 60 : AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
1459 30 : .addReg(SrcReg1).addReg(SrcReg2));
1460 : } else {
1461 53 : MachineInstrBuilder MIB;
1462 53 : MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
1463 53 : .addReg(SrcReg1);
1464 :
1465 : // Only add immediate for icmp as the immediate for fcmp is an implicit 0.0.
1466 53 : if (isICmp)
1467 39 : MIB.addImm(Imm);
1468 53 : AddOptionalDefs(MIB);
1469 : }
1470 :
1471 : // For floating point we need to move the result to a comparison register
1472 : // that we can then use for branches.
1473 83 : if (Ty->isFloatTy() || Ty->isDoubleTy())
1474 22 : AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1475 44 : TII.get(ARM::FMSTAT)));
1476 : return true;
1477 : }
1478 :
1479 26 : bool ARMFastISel::SelectCmp(const Instruction *I) {
1480 : const CmpInst *CI = cast<CmpInst>(I);
1481 :
1482 : // Get the compare predicate.
1483 : ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());
1484 :
1485 : // We may not handle every CC for now.
1486 26 : if (ARMPred == ARMCC::AL) return false;
1487 :
1488 : // Emit the compare.
1489 52 : if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
1490 26 : CI->isEquality()))
1491 : return false;
1492 :
1493 : // Now set a register based on the comparison. Explicitly set the predicates
1494 : // here.
1495 25 : unsigned MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
1496 25 : const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass
1497 : : &ARM::GPRRegClass;
1498 25 : unsigned DestReg = createResultReg(RC);
1499 25 : Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0);
1500 25 : unsigned ZeroReg = fastMaterializeConstant(Zero);
1501 : // ARMEmitCmp emits a FMSTAT when necessary, so it's always safe to use CPSR.
1502 50 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovCCOpc), DestReg)
1503 25 : .addReg(ZeroReg).addImm(1)
1504 25 : .addImm(ARMPred).addReg(ARM::CPSR);
1505 :
1506 25 : updateValueMap(I, DestReg);
1507 25 : return true;
1508 : }
1509 :
1510 1 : bool ARMFastISel::SelectFPExt(const Instruction *I) {
1511 : // Make sure we have VFP and that we're extending float to double.
1512 1 : if (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP()) return false;
1513 :
1514 0 : Value *V = I->getOperand(0);
1515 0 : if (!I->getType()->isDoubleTy() ||
1516 0 : !V->getType()->isFloatTy()) return false;
1517 :
1518 0 : unsigned Op = getRegForValue(V);
1519 0 : if (Op == 0) return false;
1520 :
1521 0 : unsigned Result = createResultReg(&ARM::DPRRegClass);
1522 0 : AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1523 0 : TII.get(ARM::VCVTDS), Result)
1524 0 : .addReg(Op));
1525 0 : updateValueMap(I, Result);
1526 0 : return true;
1527 : }
1528 :
1529 1 : bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
1530 : // Make sure we have VFP and that we're truncating double to float.
1531 1 : if (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP()) return false;
1532 :
1533 0 : Value *V = I->getOperand(0);
1534 0 : if (!(I->getType()->isFloatTy() &&
1535 0 : V->getType()->isDoubleTy())) return false;
1536 :
1537 0 : unsigned Op = getRegForValue(V);
1538 0 : if (Op == 0) return false;
1539 :
1540 0 : unsigned Result = createResultReg(&ARM::SPRRegClass);
1541 0 : AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1542 0 : TII.get(ARM::VCVTSD), Result)
1543 0 : .addReg(Op));
1544 0 : updateValueMap(I, Result);
1545 0 : return true;
1546 : }
1547 :
1548 37 : bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
1549 : // Make sure we have VFP.
1550 37 : if (!Subtarget->hasVFP2()) return false;
1551 :
1552 37 : MVT DstVT;
1553 37 : Type *Ty = I->getType();
1554 37 : if (!isTypeLegal(Ty, DstVT))
1555 : return false;
1556 :
1557 37 : Value *Src = I->getOperand(0);
1558 37 : EVT SrcEVT = TLI.getValueType(DL, Src->getType(), true);
1559 37 : if (!SrcEVT.isSimple())
1560 : return false;
1561 : MVT SrcVT = SrcEVT.getSimpleVT();
1562 37 : if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
1563 : return false;
1564 :
1565 37 : unsigned SrcReg = getRegForValue(Src);
1566 37 : if (SrcReg == 0) return false;
1567 :
1568 : // Handle sign-extension.
1569 37 : if (SrcVT == MVT::i16 || SrcVT == MVT::i8) {
1570 24 : SrcReg = ARMEmitIntExt(SrcVT, SrcReg, MVT::i32,
1571 24 : /*isZExt*/!isSigned);
1572 24 : if (SrcReg == 0) return false;
1573 : }
1574 :
1575 : // The conversion routine works on fp-reg to fp-reg and the operand above
1576 : // was an integer, move it to the fp registers if possible.
1577 37 : unsigned FP = ARMMoveToFPReg(MVT::f32, SrcReg);
1578 37 : if (FP == 0) return false;
1579 :
1580 : unsigned Opc;
1581 37 : if (Ty->isFloatTy()) Opc = isSigned ? ARM::VSITOS : ARM::VUITOS;
1582 19 : else if (Ty->isDoubleTy() && !Subtarget->isFPOnlySP())
1583 18 : Opc = isSigned ? ARM::VSITOD : ARM::VUITOD;
1584 : else return false;
1585 :
1586 36 : unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT));
1587 72 : AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1588 72 : TII.get(Opc), ResultReg).addReg(FP));
1589 36 : updateValueMap(I, ResultReg);
1590 36 : return true;
1591 : }
1592 :
1593 13 : bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) {
1594 : // Make sure we have VFP.
1595 13 : if (!Subtarget->hasVFP2()) return false;
1596 :
1597 13 : MVT DstVT;
1598 13 : Type *RetTy = I->getType();
1599 13 : if (!isTypeLegal(RetTy, DstVT))
1600 : return false;
1601 :
1602 26 : unsigned Op = getRegForValue(I->getOperand(0));
1603 13 : if (Op == 0) return false;
1604 :
1605 : unsigned Opc;
1606 13 : Type *OpTy = I->getOperand(0)->getType();
1607 13 : if (OpTy->isFloatTy()) Opc = isSigned ? ARM::VTOSIZS : ARM::VTOUIZS;
1608 7 : else if (OpTy->isDoubleTy() && !Subtarget->isFPOnlySP())
1609 6 : Opc = isSigned ? ARM::VTOSIZD : ARM::VTOUIZD;
1610 : else return false;
1611 :
1612 : // f64->s32/u32 or f32->s32/u32 both need an intermediate f32 reg.
1613 24 : unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32));
1614 24 : AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1615 24 : TII.get(Opc), ResultReg).addReg(Op));
1616 :
1617 : // This result needs to be in an integer register, but the conversion only
1618 : // takes place in fp-regs.
1619 12 : unsigned IntReg = ARMMoveToIntReg(DstVT, ResultReg);
1620 12 : if (IntReg == 0) return false;
1621 :
1622 12 : updateValueMap(I, IntReg);
1623 12 : return true;
1624 : }
1625 :
1626 24 : bool ARMFastISel::SelectSelect(const Instruction *I) {
1627 24 : MVT VT;
1628 24 : if (!isTypeLegal(I->getType(), VT))
1629 : return false;
1630 :
1631 : // Things need to be register sized for register moves.
1632 24 : if (VT != MVT::i32) return false;
1633 :
1634 48 : unsigned CondReg = getRegForValue(I->getOperand(0));
1635 24 : if (CondReg == 0) return false;
1636 24 : unsigned Op1Reg = getRegForValue(I->getOperand(1));
1637 24 : if (Op1Reg == 0) return false;
1638 :
1639 : // Check to see if we can use an immediate in the conditional move.
1640 : int Imm = 0;
1641 : bool UseImm = false;
1642 : bool isNegativeImm = false;
1643 : if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(2))) {
1644 : assert(VT == MVT::i32 && "Expecting an i32.");
1645 20 : Imm = (int)ConstInt->getValue().getZExtValue();
1646 20 : if (Imm < 0) {
1647 : isNegativeImm = true;
1648 12 : Imm = ~Imm;
1649 : }
1650 20 : UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
1651 10 : (ARM_AM::getSOImmVal(Imm) != -1);
1652 : }
1653 :
1654 : unsigned Op2Reg = 0;
1655 24 : if (!UseImm) {
1656 4 : Op2Reg = getRegForValue(I->getOperand(2));
1657 4 : if (Op2Reg == 0) return false;
1658 : }
1659 :
1660 24 : unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
1661 48 : CondReg = constrainOperandRegClass(TII.get(TstOpc), CondReg, 0);
1662 : AddOptionalDefs(
1663 48 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TstOpc))
1664 24 : .addReg(CondReg)
1665 24 : .addImm(1));
1666 :
1667 : unsigned MovCCOpc;
1668 : const TargetRegisterClass *RC;
1669 24 : if (!UseImm) {
1670 4 : RC = isThumb2 ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
1671 4 : MovCCOpc = isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr;
1672 : } else {
1673 20 : RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass;
1674 20 : if (!isNegativeImm)
1675 8 : MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
1676 : else
1677 12 : MovCCOpc = isThumb2 ? ARM::t2MVNCCi : ARM::MVNCCi;
1678 : }
1679 24 : unsigned ResultReg = createResultReg(RC);
1680 24 : if (!UseImm) {
1681 8 : Op2Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op2Reg, 1);
1682 8 : Op1Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op1Reg, 2);
1683 4 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovCCOpc),
1684 8 : ResultReg)
1685 4 : .addReg(Op2Reg)
1686 4 : .addReg(Op1Reg)
1687 : .addImm(ARMCC::NE)
1688 4 : .addReg(ARM::CPSR);
1689 : } else {
1690 40 : Op1Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op1Reg, 1);
1691 20 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovCCOpc),
1692 40 : ResultReg)
1693 20 : .addReg(Op1Reg)
1694 20 : .addImm(Imm)
1695 : .addImm(ARMCC::EQ)
1696 20 : .addReg(ARM::CPSR);
1697 : }
1698 24 : updateValueMap(I, ResultReg);
1699 24 : return true;
1700 : }
1701 :
1702 14 : bool ARMFastISel::SelectDiv(const Instruction *I, bool isSigned) {
1703 14 : MVT VT;
1704 14 : Type *Ty = I->getType();
1705 14 : if (!isTypeLegal(Ty, VT))
1706 : return false;
1707 :
1708 : // If we have integer div support we should have selected this automagically.
1709 : // In case we have a real miss go ahead and return false and we'll pick
1710 : // it up later.
1711 14 : if (Subtarget->hasDivideInThumbMode())
1712 : return false;
1713 :
1714 : // Otherwise emit a libcall.
1715 : RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
1716 14 : if (VT == MVT::i8)
1717 0 : LC = isSigned ? RTLIB::SDIV_I8 : RTLIB::UDIV_I8;
1718 14 : else if (VT == MVT::i16)
1719 0 : LC = isSigned ? RTLIB::SDIV_I16 : RTLIB::UDIV_I16;
1720 14 : else if (VT == MVT::i32)
1721 14 : LC = isSigned ? RTLIB::SDIV_I32 : RTLIB::UDIV_I32;
1722 0 : else if (VT == MVT::i64)
1723 0 : LC = isSigned ? RTLIB::SDIV_I64 : RTLIB::UDIV_I64;
1724 0 : else if (VT == MVT::i128)
1725 0 : LC = isSigned ? RTLIB::SDIV_I128 : RTLIB::UDIV_I128;
1726 : assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
1727 :
1728 14 : return ARMEmitLibcall(I, LC);
1729 : }
1730 :
1731 36 : bool ARMFastISel::SelectRem(const Instruction *I, bool isSigned) {
1732 36 : MVT VT;
1733 36 : Type *Ty = I->getType();
1734 36 : if (!isTypeLegal(Ty, VT))
1735 : return false;
1736 :
1737 : // Many ABIs do not provide a libcall for standalone remainder, so we need to
1738 : // use divrem (see the RTABI 4.3.1). Since FastISel can't handle non-double
1739 : // multi-reg returns, we'll have to bail out.
1740 64 : if (!TLI.hasStandaloneRem(VT)) {
1741 : return false;
1742 : }
1743 :
1744 : RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
1745 11 : if (VT == MVT::i8)
1746 0 : LC = isSigned ? RTLIB::SREM_I8 : RTLIB::UREM_I8;
1747 11 : else if (VT == MVT::i16)
1748 0 : LC = isSigned ? RTLIB::SREM_I16 : RTLIB::UREM_I16;
1749 11 : else if (VT == MVT::i32)
1750 11 : LC = isSigned ? RTLIB::SREM_I32 : RTLIB::UREM_I32;
1751 0 : else if (VT == MVT::i64)
1752 0 : LC = isSigned ? RTLIB::SREM_I64 : RTLIB::UREM_I64;
1753 0 : else if (VT == MVT::i128)
1754 0 : LC = isSigned ? RTLIB::SREM_I128 : RTLIB::UREM_I128;
1755 : assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
1756 :
1757 11 : return ARMEmitLibcall(I, LC);
1758 : }
1759 :
1760 37 : bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
1761 37 : EVT DestVT = TLI.getValueType(DL, I->getType(), true);
1762 :
1763 : // We can get here in the case when we have a binary operation on a non-legal
1764 : // type and the target independent selector doesn't know how to handle it.
1765 : if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
1766 1 : return false;
1767 :
1768 : unsigned Opc;
1769 36 : switch (ISDOpcode) {
1770 : default: return false;
1771 21 : case ISD::ADD:
1772 21 : Opc = isThumb2 ? ARM::t2ADDrr : ARM::ADDrr;
1773 : break;
1774 6 : case ISD::OR:
1775 6 : Opc = isThumb2 ? ARM::t2ORRrr : ARM::ORRrr;
1776 : break;
1777 9 : case ISD::SUB:
1778 9 : Opc = isThumb2 ? ARM::t2SUBrr : ARM::SUBrr;
1779 : break;
1780 : }
1781 :
1782 72 : unsigned SrcReg1 = getRegForValue(I->getOperand(0));
1783 36 : if (SrcReg1 == 0) return false;
1784 :
1785 : // TODO: Often the 2nd operand is an immediate, which can be encoded directly
1786 : // in the instruction, rather then materializing the value in a register.
1787 36 : unsigned SrcReg2 = getRegForValue(I->getOperand(1));
1788 36 : if (SrcReg2 == 0) return false;
1789 :
1790 36 : unsigned ResultReg = createResultReg(&ARM::GPRnopcRegClass);
1791 72 : SrcReg1 = constrainOperandRegClass(TII.get(Opc), SrcReg1, 1);
1792 72 : SrcReg2 = constrainOperandRegClass(TII.get(Opc), SrcReg2, 2);
1793 72 : AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1794 72 : TII.get(Opc), ResultReg)
1795 36 : .addReg(SrcReg1).addReg(SrcReg2));
1796 36 : updateValueMap(I, ResultReg);
1797 36 : return true;
1798 : }
1799 :
1800 2 : bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {
1801 2 : EVT FPVT = TLI.getValueType(DL, I->getType(), true);
1802 2 : if (!FPVT.isSimple()) return false;
1803 : MVT VT = FPVT.getSimpleVT();
1804 :
1805 : // FIXME: Support vector types where possible.
1806 2 : if (VT.isVector())
1807 : return false;
1808 :
1809 : // We can get here in the case when we want to use NEON for our fp
1810 : // operations, but can't figure out how to. Just use the vfp instructions
1811 : // if we have them.
1812 : // FIXME: It'd be nice to use NEON instructions.
1813 1 : Type *Ty = I->getType();
1814 1 : if (Ty->isFloatTy() && !Subtarget->hasVFP2())
1815 : return false;
1816 1 : if (Ty->isDoubleTy() && (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP()))
1817 : return false;
1818 :
1819 : unsigned Opc;
1820 0 : bool is64bit = VT == MVT::f64 || VT == MVT::i64;
1821 0 : switch (ISDOpcode) {
1822 : default: return false;
1823 0 : case ISD::FADD:
1824 0 : Opc = is64bit ? ARM::VADDD : ARM::VADDS;
1825 : break;
1826 0 : case ISD::FSUB:
1827 0 : Opc = is64bit ? ARM::VSUBD : ARM::VSUBS;
1828 : break;
1829 0 : case ISD::FMUL:
1830 0 : Opc = is64bit ? ARM::VMULD : ARM::VMULS;
1831 : break;
1832 : }
1833 0 : unsigned Op1 = getRegForValue(I->getOperand(0));
1834 0 : if (Op1 == 0) return false;
1835 :
1836 0 : unsigned Op2 = getRegForValue(I->getOperand(1));
1837 0 : if (Op2 == 0) return false;
1838 :
1839 0 : unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT.SimpleTy));
1840 0 : AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1841 0 : TII.get(Opc), ResultReg)
1842 0 : .addReg(Op1).addReg(Op2));
1843 0 : updateValueMap(I, ResultReg);
1844 0 : return true;
1845 : }
1846 :
1847 : // Call Handling Code
1848 :
1849 : // This is largely taken directly from CCAssignFnForNode
1850 : // TODO: We may not support all of this.
1851 1142 : CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
1852 : bool Return,
1853 : bool isVarArg) {
1854 1142 : switch (CC) {
1855 0 : default:
1856 0 : report_fatal_error("Unsupported calling convention");
1857 7 : case CallingConv::Fast:
1858 7 : if (Subtarget->hasVFP2() && !isVarArg) {
1859 7 : if (!Subtarget->isAAPCS_ABI())
1860 10 : return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
1861 : // For AAPCS ABI targets, just use VFP variant of the calling convention.
1862 4 : return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1863 : }
1864 : LLVM_FALLTHROUGH;
1865 : case CallingConv::C:
1866 : case CallingConv::CXX_FAST_TLS:
1867 : // Use target triple & subtarget features to do actual dispatch.
1868 1098 : if (Subtarget->isAAPCS_ABI()) {
1869 551 : if (Subtarget->hasVFP2() &&
1870 297 : TM.Options.FloatABIType == FloatABI::Hard && !isVarArg)
1871 11 : return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
1872 : else
1873 375 : return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
1874 : } else {
1875 1160 : return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
1876 : }
1877 30 : case CallingConv::ARM_AAPCS_VFP:
1878 : case CallingConv::Swift:
1879 30 : if (!isVarArg)
1880 38 : return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
1881 : // Fall through to soft float variant, variadic functions don't
1882 : // use hard floating point ABI.
1883 : LLVM_FALLTHROUGH;
1884 : case CallingConv::ARM_AAPCS:
1885 6 : return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
1886 1 : case CallingConv::ARM_APCS:
1887 1 : return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
1888 0 : case CallingConv::GHC:
1889 0 : if (Return)
1890 0 : report_fatal_error("Can't return in GHC call convention");
1891 : else
1892 : return CC_ARM_APCS_GHC;
1893 : }
1894 : }
1895 :
1896 466 : bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
1897 : SmallVectorImpl<unsigned> &ArgRegs,
1898 : SmallVectorImpl<MVT> &ArgVTs,
1899 : SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
1900 : SmallVectorImpl<unsigned> &RegArgs,
1901 : CallingConv::ID CC,
1902 : unsigned &NumBytes,
1903 : bool isVarArg) {
1904 : SmallVector<CCValAssign, 16> ArgLocs;
1905 932 : CCState CCInfo(CC, isVarArg, *FuncInfo.MF, ArgLocs, *Context);
1906 466 : CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags,
1907 : CCAssignFnForCall(CC, false, isVarArg));
1908 :
1909 : // Check that we can handle all of the arguments. If we can't, then bail out
1910 : // now before we add code to the MBB.
1911 1754 : for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1912 1294 : CCValAssign &VA = ArgLocs[i];
1913 1294 : MVT ArgVT = ArgVTs[VA.getValNo()];
1914 :
1915 : // We don't handle NEON/vector parameters yet.
1916 2588 : if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64)
1917 6 : return false;
1918 :
1919 : // Now copy/store arg to correct locations.
1920 1289 : if (VA.isRegLoc() && !VA.needsCustom()) {
1921 727 : continue;
1922 562 : } else if (VA.needsCustom()) {
1923 : // TODO: We need custom lowering for vector (v2f64) args.
1924 1 : if (VA.getLocVT() != MVT::f64 ||
1925 : // TODO: Only handle register args for now.
1926 3 : !VA.isRegLoc() || !ArgLocs[++i].isRegLoc())
1927 : return false;
1928 : } else {
1929 561 : switch (ArgVT.SimpleTy) {
1930 : default:
1931 : return false;
1932 : case MVT::i1:
1933 : case MVT::i8:
1934 : case MVT::i16:
1935 : case MVT::i32:
1936 : break;
1937 4 : case MVT::f32:
1938 4 : if (!Subtarget->hasVFP2())
1939 : return false;
1940 : break;
1941 417 : case MVT::f64:
1942 417 : if (!Subtarget->hasVFP2())
1943 : return false;
1944 : break;
1945 : }
1946 : }
1947 : }
1948 :
1949 : // At the point, we are able to handle the call's arguments in fast isel.
1950 :
1951 : // Get a count of how many bytes are to be pushed on the stack.
1952 460 : NumBytes = CCInfo.getNextStackOffset();
1953 :
1954 : // Issue CALLSEQ_START
1955 460 : unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
1956 920 : AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1957 920 : TII.get(AdjStackDown))
1958 920 : .addImm(NumBytes).addImm(0));
1959 :
1960 : // Process the args.
1961 1739 : for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1962 1279 : CCValAssign &VA = ArgLocs[i];
1963 2558 : const Value *ArgVal = Args[VA.getValNo()];
1964 1279 : unsigned Arg = ArgRegs[VA.getValNo()];
1965 1279 : MVT ArgVT = ArgVTs[VA.getValNo()];
1966 :
1967 : assert((!ArgVT.isVector() && ArgVT.getSizeInBits() <= 64) &&
1968 : "We don't handle NEON/vector parameters yet.");
1969 :
1970 : // Handle arg promotion, etc.
1971 1279 : switch (VA.getLocInfo()) {
1972 : case CCValAssign::Full: break;
1973 18 : case CCValAssign::SExt: {
1974 : MVT DestVT = VA.getLocVT();
1975 18 : Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/false);
1976 : assert(Arg != 0 && "Failed to emit a sext");
1977 18 : ArgVT = DestVT;
1978 : break;
1979 : }
1980 115 : case CCValAssign::AExt:
1981 : // Intentional fall-through. Handle AExt and ZExt.
1982 : case CCValAssign::ZExt: {
1983 : MVT DestVT = VA.getLocVT();
1984 115 : Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/true);
1985 : assert(Arg != 0 && "Failed to emit a zext");
1986 115 : ArgVT = DestVT;
1987 : break;
1988 : }
1989 81 : case CCValAssign::BCvt: {
1990 81 : unsigned BC = fastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, Arg,
1991 : /*TODO: Kill=*/false);
1992 : assert(BC != 0 && "Failed to emit a bitcast!");
1993 : Arg = BC;
1994 81 : ArgVT = VA.getLocVT();
1995 81 : break;
1996 : }
1997 0 : default: llvm_unreachable("Unknown arg promotion!");
1998 : }
1999 :
2000 : // Now copy/store arg to correct locations.
2001 1279 : if (VA.isRegLoc() && !VA.needsCustom()) {
2002 1440 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2003 1440 : TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(Arg);
2004 720 : RegArgs.push_back(VA.getLocReg());
2005 559 : } else if (VA.needsCustom()) {
2006 : // TODO: We need custom lowering for vector (v2f64) args.
2007 : assert(VA.getLocVT() == MVT::f64 &&
2008 : "Custom lowering for v2f64 args not available");
2009 :
2010 : // FIXME: ArgLocs[++i] may extend beyond ArgLocs.size()
2011 0 : CCValAssign &NextVA = ArgLocs[++i];
2012 :
2013 : assert(VA.isRegLoc() && NextVA.isRegLoc() &&
2014 : "We only handle register args!");
2015 :
2016 0 : AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2017 0 : TII.get(ARM::VMOVRRD), VA.getLocReg())
2018 0 : .addReg(NextVA.getLocReg(), RegState::Define)
2019 0 : .addReg(Arg));
2020 0 : RegArgs.push_back(VA.getLocReg());
2021 0 : RegArgs.push_back(NextVA.getLocReg());
2022 : } else {
2023 : assert(VA.isMemLoc());
2024 : // Need to store on the stack.
2025 :
2026 : // Don't emit stores for undef values.
2027 559 : if (isa<UndefValue>(ArgVal))
2028 9 : continue;
2029 :
2030 : Address Addr;
2031 : Addr.BaseType = Address::RegBase;
2032 550 : Addr.Base.Reg = ARM::SP;
2033 550 : Addr.Offset = VA.getLocMemOffset();
2034 :
2035 550 : bool EmitRet = ARMEmitStore(ArgVT, Arg, Addr); (void)EmitRet;
2036 : assert(EmitRet && "Could not emit a store for argument!");
2037 : }
2038 : }
2039 :
2040 : return true;
2041 : }
2042 :
2043 460 : bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
2044 : const Instruction *I, CallingConv::ID CC,
2045 : unsigned &NumBytes, bool isVarArg) {
2046 : // Issue CALLSEQ_END
2047 460 : unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
2048 920 : AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2049 920 : TII.get(AdjStackUp))
2050 920 : .addImm(NumBytes).addImm(0));
2051 :
2052 : // Now the return value.
2053 460 : if (RetVT != MVT::isVoid) {
2054 : SmallVector<CCValAssign, 16> RVLocs;
2055 454 : CCState CCInfo(CC, isVarArg, *FuncInfo.MF, RVLocs, *Context);
2056 227 : CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg));
2057 :
2058 : // Copy all of the result registers out of their specified physreg.
2059 227 : if (RVLocs.size() == 2 && RetVT == MVT::f64) {
2060 : // For this move we copy into two registers and then move into the
2061 : // double fp reg we want.
2062 0 : MVT DestVT = RVLocs[0].getValVT();
2063 0 : const TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT);
2064 0 : unsigned ResultReg = createResultReg(DstRC);
2065 0 : AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2066 0 : TII.get(ARM::VMOVDRR), ResultReg)
2067 0 : .addReg(RVLocs[0].getLocReg())
2068 0 : .addReg(RVLocs[1].getLocReg()));
2069 :
2070 0 : UsedRegs.push_back(RVLocs[0].getLocReg());
2071 0 : UsedRegs.push_back(RVLocs[1].getLocReg());
2072 :
2073 : // Finally update the result.
2074 0 : updateValueMap(I, ResultReg);
2075 : } else {
2076 : assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!");
2077 227 : MVT CopyVT = RVLocs[0].getValVT();
2078 :
2079 : // Special handling for extended integers.
2080 227 : if (RetVT == MVT::i1 || RetVT == MVT::i8 || RetVT == MVT::i16)
2081 45 : CopyVT = MVT::i32;
2082 :
2083 227 : const TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
2084 :
2085 227 : unsigned ResultReg = createResultReg(DstRC);
2086 454 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2087 227 : TII.get(TargetOpcode::COPY),
2088 454 : ResultReg).addReg(RVLocs[0].getLocReg());
2089 454 : UsedRegs.push_back(RVLocs[0].getLocReg());
2090 :
2091 : // Finally update the result.
2092 227 : updateValueMap(I, ResultReg);
2093 : }
2094 : }
2095 :
2096 460 : return true;
2097 : }
2098 :
2099 890 : bool ARMFastISel::SelectRet(const Instruction *I) {
2100 : const ReturnInst *Ret = cast<ReturnInst>(I);
2101 890 : const Function &F = *I->getParent()->getParent();
2102 :
2103 890 : if (!FuncInfo.CanLowerReturn)
2104 : return false;
2105 :
2106 1776 : if (TLI.supportSwiftError() &&
2107 888 : F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
2108 12 : return false;
2109 :
2110 876 : if (TLI.supportSplitCSR(FuncInfo.MF))
2111 : return false;
2112 :
2113 : // Build a list of return value registers.
2114 : SmallVector<unsigned, 4> RetRegs;
2115 :
2116 : CallingConv::ID CC = F.getCallingConv();
2117 870 : if (Ret->getNumOperands() > 0) {
2118 : SmallVector<ISD::OutputArg, 4> Outs;
2119 892 : GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
2120 :
2121 : // Analyze operands of the call, assigning locations to each operand.
2122 : SmallVector<CCValAssign, 16> ValLocs;
2123 446 : CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
2124 446 : CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */,
2125 : F.isVarArg()));
2126 :
2127 : const Value *RV = Ret->getOperand(0);
2128 446 : unsigned Reg = getRegForValue(RV);
2129 446 : if (Reg == 0)
2130 34 : return false;
2131 :
2132 : // Only handle a single return value for now.
2133 420 : if (ValLocs.size() != 1)
2134 : return false;
2135 :
2136 : CCValAssign &VA = ValLocs[0];
2137 :
2138 : // Don't bother handling odd stuff for now.
2139 419 : if (VA.getLocInfo() != CCValAssign::Full)
2140 : return false;
2141 : // Only handle register returns for now.
2142 412 : if (!VA.isRegLoc())
2143 : return false;
2144 :
2145 412 : unsigned SrcReg = Reg + VA.getValNo();
2146 412 : EVT RVEVT = TLI.getValueType(DL, RV->getType());
2147 412 : if (!RVEVT.isSimple()) return false;
2148 : MVT RVVT = RVEVT.getSimpleVT();
2149 : MVT DestVT = VA.getValVT();
2150 : // Special handling for extended integers.
2151 412 : if (RVVT != DestVT) {
2152 99 : if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
2153 : return false;
2154 :
2155 : assert(DestVT == MVT::i32 && "ARM should always ext to i32");
2156 :
2157 : // Perform extension if flagged as either zext or sext. Otherwise, do
2158 : // nothing.
2159 99 : if (Outs[0].Flags.isZExt() || Outs[0].Flags.isSExt()) {
2160 60 : SrcReg = ARMEmitIntExt(RVVT, SrcReg, DestVT, Outs[0].Flags.isZExt());
2161 60 : if (SrcReg == 0) return false;
2162 : }
2163 : }
2164 :
2165 : // Make the copy.
2166 412 : unsigned DstReg = VA.getLocReg();
2167 412 : const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
2168 : // Avoid a cross-class copy. This is very unlikely.
2169 412 : if (!SrcRC->contains(DstReg))
2170 : return false;
2171 824 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2172 824 : TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg);
2173 :
2174 : // Add register to return instruction.
2175 412 : RetRegs.push_back(VA.getLocReg());
2176 : }
2177 :
2178 836 : MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2179 1916 : TII.get(Subtarget->getReturnOpcode()));
2180 836 : AddOptionalDefs(MIB);
2181 1248 : for (unsigned R : RetRegs)
2182 412 : MIB.addReg(R, RegState::Implicit);
2183 : return true;
2184 : }
2185 :
2186 : unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) {
2187 435 : if (UseReg)
2188 82 : return isThumb2 ? ARM::tBLXr : ARM::BLX;
2189 : else
2190 378 : return isThumb2 ? ARM::tBL : ARM::BL;
2191 : }
2192 :
2193 12 : unsigned ARMFastISel::getLibcallReg(const Twine &Name) {
2194 : // Manually compute the global's type to avoid building it when unnecessary.
2195 12 : Type *GVTy = Type::getInt32PtrTy(*Context, /*AS=*/0);
2196 12 : EVT LCREVT = TLI.getValueType(DL, GVTy);
2197 12 : if (!LCREVT.isSimple()) return 0;
2198 :
2199 12 : GlobalValue *GV = new GlobalVariable(M, Type::getInt32Ty(*Context), false,
2200 : GlobalValue::ExternalLinkage, nullptr,
2201 12 : Name);
2202 : assert(GV->getType() == GVTy && "We miscomputed the type for the global!");
2203 12 : return ARMMaterializeGV(GV, LCREVT.getSimpleVT());
2204 : }
2205 :
2206 : // A quick function that will emit a call for a named libcall in F with the
2207 : // vector of passed arguments for the Instruction in I. We can assume that we
2208 : // can emit a call for any libcall we can produce. This is an abridged version
2209 : // of the full call infrastructure since we won't need to worry about things
2210 : // like computed function pointers or strange arguments at call sites.
2211 : // TODO: Try to unify this and the normal call bits for ARM, then try to unify
2212 : // with X86.
2213 25 : bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
2214 25 : CallingConv::ID CC = TLI.getLibcallCallingConv(Call);
2215 :
2216 : // Handle *simple* calls for now.
2217 25 : Type *RetTy = I->getType();
2218 25 : MVT RetVT;
2219 25 : if (RetTy->isVoidTy())
2220 0 : RetVT = MVT::isVoid;
2221 25 : else if (!isTypeLegal(RetTy, RetVT))
2222 : return false;
2223 :
2224 : // Can't handle non-double multi-reg retvals.
2225 25 : if (RetVT != MVT::isVoid && RetVT != MVT::i32) {
2226 : SmallVector<CCValAssign, 16> RVLocs;
2227 0 : CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
2228 0 : CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, false));
2229 0 : if (RVLocs.size() >= 2 && RetVT != MVT::f64)
2230 0 : return false;
2231 : }
2232 :
2233 : // Set up the argument vectors.
2234 : SmallVector<Value*, 8> Args;
2235 : SmallVector<unsigned, 8> ArgRegs;
2236 : SmallVector<MVT, 8> ArgVTs;
2237 : SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
2238 25 : Args.reserve(I->getNumOperands());
2239 25 : ArgRegs.reserve(I->getNumOperands());
2240 25 : ArgVTs.reserve(I->getNumOperands());
2241 25 : ArgFlags.reserve(I->getNumOperands());
2242 100 : for (Value *Op : I->operands()) {
2243 50 : unsigned Arg = getRegForValue(Op);
2244 50 : if (Arg == 0) return false;
2245 :
2246 50 : Type *ArgTy = Op->getType();
2247 50 : MVT ArgVT;
2248 50 : if (!isTypeLegal(ArgTy, ArgVT)) return false;
2249 :
2250 : ISD::ArgFlagsTy Flags;
2251 50 : unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
2252 : Flags.setOrigAlign(OriginalAlignment);
2253 :
2254 50 : Args.push_back(Op);
2255 50 : ArgRegs.push_back(Arg);
2256 50 : ArgVTs.push_back(ArgVT);
2257 50 : ArgFlags.push_back(Flags);
2258 : }
2259 :
2260 : // Handle the arguments now that we've gotten them.
2261 : SmallVector<unsigned, 4> RegArgs;
2262 : unsigned NumBytes;
2263 25 : if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
2264 : RegArgs, CC, NumBytes, false))
2265 : return false;
2266 :
2267 : unsigned CalleeReg = 0;
2268 25 : if (Subtarget->genLongCalls()) {
2269 6 : CalleeReg = getLibcallReg(TLI.getLibcallName(Call));
2270 3 : if (CalleeReg == 0) return false;
2271 : }
2272 :
2273 : // Issue the call.
2274 25 : unsigned CallOpc = ARMSelectCallOp(Subtarget->genLongCalls());
2275 25 : MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
2276 50 : DbgLoc, TII.get(CallOpc));
2277 : // BL / BLX don't take a predicate, but tBL / tBLX do.
2278 25 : if (isThumb2)
2279 3 : MIB.add(predOps(ARMCC::AL));
2280 25 : if (Subtarget->genLongCalls())
2281 3 : MIB.addReg(CalleeReg);
2282 : else
2283 22 : MIB.addExternalSymbol(TLI.getLibcallName(Call));
2284 :
2285 : // Add implicit physical register uses to the call.
2286 75 : for (unsigned R : RegArgs)
2287 50 : MIB.addReg(R, RegState::Implicit);
2288 :
2289 : // Add a register mask with the call-preserved registers.
2290 : // Proper defs for return values will be added by setPhysRegsDeadExcept().
2291 25 : MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
2292 :
2293 : // Finish off the call including any return values.
2294 : SmallVector<unsigned, 4> UsedRegs;
2295 25 : if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, false)) return false;
2296 :
2297 : // Set all unused physreg defs as dead.
2298 50 : static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
2299 :
2300 25 : return true;
2301 : }
2302 :
2303 616 : bool ARMFastISel::SelectCall(const Instruction *I,
2304 : const char *IntrMemName = nullptr) {
2305 : const CallInst *CI = cast<CallInst>(I);
2306 : const Value *Callee = CI->getCalledValue();
2307 :
2308 : // Can't handle inline asm.
2309 616 : if (isa<InlineAsm>(Callee)) return false;
2310 :
2311 : // Allow SelectionDAG isel to handle tail calls.
2312 606 : if (CI->isTailCall()) return false;
2313 :
2314 : // Check the calling convention.
2315 : ImmutableCallSite CS(CI);
2316 : CallingConv::ID CC = CS.getCallingConv();
2317 :
2318 : // TODO: Avoid some calling conventions?
2319 :
2320 : FunctionType *FTy = CS.getFunctionType();
2321 : bool isVarArg = FTy->isVarArg();
2322 :
2323 : // Handle *simple* calls for now.
2324 515 : Type *RetTy = I->getType();
2325 515 : MVT RetVT;
2326 515 : if (RetTy->isVoidTy())
2327 297 : RetVT = MVT::isVoid;
2328 270 : else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
2329 234 : RetVT != MVT::i8 && RetVT != MVT::i1)
2330 : return false;
2331 :
2332 : // Can't handle non-double multi-reg retvals.
2333 211 : if (RetVT != MVT::isVoid && RetVT != MVT::i1 && RetVT != MVT::i8 &&
2334 674 : RetVT != MVT::i16 && RetVT != MVT::i32) {
2335 : SmallVector<CCValAssign, 16> RVLocs;
2336 3 : CCState CCInfo(CC, isVarArg, *FuncInfo.MF, RVLocs, *Context);
2337 3 : CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg));
2338 3 : if (RVLocs.size() >= 2 && RetVT != MVT::f64)
2339 3 : return false;
2340 : }
2341 :
2342 : // Set up the argument vectors.
2343 : SmallVector<Value*, 8> Args;
2344 : SmallVector<unsigned, 8> ArgRegs;
2345 : SmallVector<MVT, 8> ArgVTs;
2346 : SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
2347 505 : unsigned arg_size = CS.arg_size();
2348 505 : Args.reserve(arg_size);
2349 : ArgRegs.reserve(arg_size);
2350 : ArgVTs.reserve(arg_size);
2351 : ArgFlags.reserve(arg_size);
2352 1762 : for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
2353 1762 : i != e; ++i) {
2354 : // If we're lowering a memory intrinsic instead of a regular call, skip the
2355 : // last argument, which shouldn't be passed to the underlying function.
2356 1339 : if (IntrMemName && e - i <= 1)
2357 : break;
2358 :
2359 : ISD::ArgFlagsTy Flags;
2360 1321 : unsigned ArgIdx = i - CS.arg_begin();
2361 1321 : if (CS.paramHasAttr(ArgIdx, Attribute::SExt))
2362 : Flags.setSExt();
2363 1321 : if (CS.paramHasAttr(ArgIdx, Attribute::ZExt))
2364 : Flags.setZExt();
2365 :
2366 : // FIXME: Only handle *easy* calls for now.
2367 2642 : if (CS.paramHasAttr(ArgIdx, Attribute::InReg) ||
2368 2598 : CS.paramHasAttr(ArgIdx, Attribute::StructRet) ||
2369 2548 : CS.paramHasAttr(ArgIdx, Attribute::SwiftSelf) ||
2370 2541 : CS.paramHasAttr(ArgIdx, Attribute::SwiftError) ||
2371 3861 : CS.paramHasAttr(ArgIdx, Attribute::Nest) ||
2372 1270 : CS.paramHasAttr(ArgIdx, Attribute::ByVal))
2373 64 : return false;
2374 :
2375 1270 : Type *ArgTy = (*i)->getType();
2376 1270 : MVT ArgVT;
2377 1270 : if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8 &&
2378 : ArgVT != MVT::i1)
2379 : return false;
2380 :
2381 1257 : unsigned Arg = getRegForValue(*i);
2382 1257 : if (Arg == 0)
2383 : return false;
2384 :
2385 1257 : unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
2386 : Flags.setOrigAlign(OriginalAlignment);
2387 :
2388 1257 : Args.push_back(*i);
2389 1257 : ArgRegs.push_back(Arg);
2390 1257 : ArgVTs.push_back(ArgVT);
2391 1257 : ArgFlags.push_back(Flags);
2392 : }
2393 :
2394 : // Handle the arguments now that we've gotten them.
2395 : SmallVector<unsigned, 4> RegArgs;
2396 : unsigned NumBytes;
2397 441 : if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
2398 : RegArgs, CC, NumBytes, isVarArg))
2399 : return false;
2400 :
2401 : bool UseReg = false;
2402 : const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
2403 421 : if (!GV || Subtarget->genLongCalls()) UseReg = true;
2404 :
2405 : unsigned CalleeReg = 0;
2406 : if (UseReg) {
2407 79 : if (IntrMemName)
2408 9 : CalleeReg = getLibcallReg(IntrMemName);
2409 : else
2410 70 : CalleeReg = getRegForValue(Callee);
2411 :
2412 79 : if (CalleeReg == 0) return false;
2413 : }
2414 :
2415 : // Issue the call.
2416 : unsigned CallOpc = ARMSelectCallOp(UseReg);
2417 435 : MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
2418 870 : DbgLoc, TII.get(CallOpc));
2419 :
2420 : // ARM calls don't take a predicate, but tBL / tBLX do.
2421 435 : if(isThumb2)
2422 179 : MIB.add(predOps(ARMCC::AL));
2423 435 : if (UseReg)
2424 79 : MIB.addReg(CalleeReg);
2425 356 : else if (!IntrMemName)
2426 : MIB.addGlobalAddress(GV, 0, 0);
2427 : else
2428 : MIB.addExternalSymbol(IntrMemName, 0);
2429 :
2430 : // Add implicit physical register uses to the call.
2431 1105 : for (unsigned R : RegArgs)
2432 670 : MIB.addReg(R, RegState::Implicit);
2433 :
2434 : // Add a register mask with the call-preserved registers.
2435 : // Proper defs for return values will be added by setPhysRegsDeadExcept().
2436 435 : MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
2437 :
2438 : // Finish off the call including any return values.
2439 : SmallVector<unsigned, 4> UsedRegs;
2440 435 : if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, isVarArg))
2441 : return false;
2442 :
2443 : // Set all unused physreg defs as dead.
2444 870 : static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
2445 :
2446 435 : return true;
2447 : }
2448 :
2449 0 : bool ARMFastISel::ARMIsMemCpySmall(uint64_t Len) {
2450 0 : return Len <= 16;
2451 : }
2452 :
2453 26 : bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src,
2454 : uint64_t Len, unsigned Alignment) {
2455 : // Make sure we don't bloat code by inlining very large memcpy's.
2456 26 : if (!ARMIsMemCpySmall(Len))
2457 : return false;
2458 :
2459 154 : while (Len) {
2460 128 : MVT VT;
2461 128 : if (!Alignment || Alignment >= 4) {
2462 26 : if (Len >= 4)
2463 20 : VT = MVT::i32;
2464 6 : else if (Len >= 2)
2465 6 : VT = MVT::i16;
2466 : else {
2467 : assert(Len == 1 && "Expected a length of 1!");
2468 0 : VT = MVT::i8;
2469 : }
2470 : } else {
2471 : // Bound based on alignment.
2472 102 : if (Len >= 2 && Alignment == 2)
2473 36 : VT = MVT::i16;
2474 : else {
2475 66 : VT = MVT::i8;
2476 : }
2477 : }
2478 :
2479 : bool RV;
2480 : unsigned ResultReg;
2481 128 : RV = ARMEmitLoad(VT, ResultReg, Src);
2482 : assert(RV && "Should be able to handle this load.");
2483 128 : RV = ARMEmitStore(VT, ResultReg, Dest);
2484 : assert(RV && "Should be able to handle this store.");
2485 : (void)RV;
2486 :
2487 128 : unsigned Size = VT.getSizeInBits()/8;
2488 128 : Len -= Size;
2489 128 : Dest.Offset += Size;
2490 128 : Src.Offset += Size;
2491 : }
2492 :
2493 : return true;
2494 : }
2495 :
2496 69 : bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
2497 : // FIXME: Handle more intrinsics.
2498 69 : switch (I.getIntrinsicID()) {
2499 : default: return false;
2500 9 : case Intrinsic::frameaddress: {
2501 9 : MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
2502 : MFI.setFrameAddressIsTaken(true);
2503 :
2504 9 : unsigned LdrOpc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
2505 9 : const TargetRegisterClass *RC = isThumb2 ? &ARM::tGPRRegClass
2506 : : &ARM::GPRRegClass;
2507 :
2508 : const ARMBaseRegisterInfo *RegInfo =
2509 9 : static_cast<const ARMBaseRegisterInfo *>(Subtarget->getRegisterInfo());
2510 9 : unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
2511 : unsigned SrcReg = FramePtr;
2512 :
2513 : // Recursively load frame address
2514 : // ldr r0 [fp]
2515 : // ldr r0 [r0]
2516 : // ldr r0 [r0]
2517 : // ...
2518 : unsigned DestReg;
2519 18 : unsigned Depth = cast<ConstantInt>(I.getOperand(0))->getZExtValue();
2520 21 : while (Depth--) {
2521 12 : DestReg = createResultReg(RC);
2522 12 : AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2523 24 : TII.get(LdrOpc), DestReg)
2524 24 : .addReg(SrcReg).addImm(0));
2525 : SrcReg = DestReg;
2526 : }
2527 9 : updateValueMap(&I, SrcReg);
2528 9 : return true;
2529 : }
2530 : case Intrinsic::memcpy:
2531 : case Intrinsic::memmove: {
2532 : const MemTransferInst &MTI = cast<MemTransferInst>(I);
2533 : // Don't handle volatile.
2534 38 : if (MTI.isVolatile())
2535 : return false;
2536 :
2537 : // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
2538 : // we would emit dead code because we don't currently handle memmoves.
2539 : bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy);
2540 38 : if (isa<ConstantInt>(MTI.getLength()) && isMemCpy) {
2541 : // Small memcpy's are common enough that we want to do them without a call
2542 : // if possible.
2543 : uint64_t Len = cast<ConstantInt>(MTI.getLength())->getZExtValue();
2544 32 : if (ARMIsMemCpySmall(Len)) {
2545 : Address Dest, Src;
2546 52 : if (!ARMComputeAddress(MTI.getRawDest(), Dest) ||
2547 26 : !ARMComputeAddress(MTI.getRawSource(), Src))
2548 26 : return false;
2549 : unsigned Alignment = MinAlign(MTI.getDestAlignment(),
2550 26 : MTI.getSourceAlignment());
2551 26 : if (ARMTryEmitSmallMemCpy(Dest, Src, Len, Alignment))
2552 : return true;
2553 : }
2554 : }
2555 :
2556 12 : if (!MTI.getLength()->getType()->isIntegerTy(32))
2557 : return false;
2558 :
2559 24 : if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255)
2560 : return false;
2561 :
2562 12 : const char *IntrMemName = isa<MemCpyInst>(I) ? "memcpy" : "memmove";
2563 12 : return SelectCall(&I, IntrMemName);
2564 : }
2565 : case Intrinsic::memset: {
2566 : const MemSetInst &MSI = cast<MemSetInst>(I);
2567 : // Don't handle volatile.
2568 6 : if (MSI.isVolatile())
2569 : return false;
2570 :
2571 6 : if (!MSI.getLength()->getType()->isIntegerTy(32))
2572 : return false;
2573 :
2574 6 : if (MSI.getDestAddressSpace() > 255)
2575 : return false;
2576 :
2577 6 : return SelectCall(&I, "memset");
2578 : }
2579 2 : case Intrinsic::trap: {
2580 4 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(
2581 6 : Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP));
2582 2 : return true;
2583 : }
2584 : }
2585 : }
2586 :
2587 22 : bool ARMFastISel::SelectTrunc(const Instruction *I) {
2588 : // The high bits for a type smaller than the register size are assumed to be
2589 : // undefined.
2590 22 : Value *Op = I->getOperand(0);
2591 :
2592 : EVT SrcVT, DestVT;
2593 22 : SrcVT = TLI.getValueType(DL, Op->getType(), true);
2594 22 : DestVT = TLI.getValueType(DL, I->getType(), true);
2595 :
2596 : if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
2597 1 : return false;
2598 : if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
2599 0 : return false;
2600 :
2601 21 : unsigned SrcReg = getRegForValue(Op);
2602 21 : if (!SrcReg) return false;
2603 :
2604 : // Because the high bits are undefined, a truncate doesn't generate
2605 : // any code.
2606 21 : updateValueMap(I, SrcReg);
2607 21 : return true;
2608 : }
2609 :
2610 396 : unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
2611 : bool isZExt) {
2612 396 : if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
2613 : return 0;
2614 396 : if (SrcVT != MVT::i16 && SrcVT != MVT::i8 && SrcVT != MVT::i1)
2615 : return 0;
2616 :
2617 : // Table of which combinations can be emitted as a single instruction,
2618 : // and which will require two.
2619 : static const uint8_t isSingleInstrTbl[3][2][2][2] = {
2620 : // ARM Thumb
2621 : // !hasV6Ops hasV6Ops !hasV6Ops hasV6Ops
2622 : // ext: s z s z s z s z
2623 : /* 1 */ { { { 0, 1 }, { 0, 1 } }, { { 0, 0 }, { 0, 1 } } },
2624 : /* 8 */ { { { 0, 1 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } },
2625 : /* 16 */ { { { 0, 0 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } }
2626 : };
2627 :
2628 : // Target registers for:
2629 : // - For ARM can never be PC.
2630 : // - For 16-bit Thumb are restricted to lower 8 registers.
2631 : // - For 32-bit Thumb are restricted to non-SP and non-PC.
2632 : static const TargetRegisterClass *RCTbl[2][2] = {
2633 : // Instructions: Two Single
2634 : /* ARM */ { &ARM::GPRnopcRegClass, &ARM::GPRnopcRegClass },
2635 : /* Thumb */ { &ARM::tGPRRegClass, &ARM::rGPRRegClass }
2636 : };
2637 :
2638 : // Table governing the instruction(s) to be emitted.
2639 : static const struct InstructionTable {
2640 : uint32_t Opc : 16;
2641 : uint32_t hasS : 1; // Some instructions have an S bit, always set it to 0.
2642 : uint32_t Shift : 7; // For shift operand addressing mode, used by MOVsi.
2643 : uint32_t Imm : 8; // All instructions have either a shift or a mask.
2644 : } IT[2][2][3][2] = {
2645 : { // Two instructions (first is left shift, second is in this table).
2646 : { // ARM Opc S Shift Imm
2647 : /* 1 bit sext */ { { ARM::MOVsi , 1, ARM_AM::asr , 31 },
2648 : /* 1 bit zext */ { ARM::MOVsi , 1, ARM_AM::lsr , 31 } },
2649 : /* 8 bit sext */ { { ARM::MOVsi , 1, ARM_AM::asr , 24 },
2650 : /* 8 bit zext */ { ARM::MOVsi , 1, ARM_AM::lsr , 24 } },
2651 : /* 16 bit sext */ { { ARM::MOVsi , 1, ARM_AM::asr , 16 },
2652 : /* 16 bit zext */ { ARM::MOVsi , 1, ARM_AM::lsr , 16 } }
2653 : },
2654 : { // Thumb Opc S Shift Imm
2655 : /* 1 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift, 31 },
2656 : /* 1 bit zext */ { ARM::tLSRri , 0, ARM_AM::no_shift, 31 } },
2657 : /* 8 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift, 24 },
2658 : /* 8 bit zext */ { ARM::tLSRri , 0, ARM_AM::no_shift, 24 } },
2659 : /* 16 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift, 16 },
2660 : /* 16 bit zext */ { ARM::tLSRri , 0, ARM_AM::no_shift, 16 } }
2661 : }
2662 : },
2663 : { // Single instruction.
2664 : { // ARM Opc S Shift Imm
2665 : /* 1 bit sext */ { { ARM::KILL , 0, ARM_AM::no_shift, 0 },
2666 : /* 1 bit zext */ { ARM::ANDri , 1, ARM_AM::no_shift, 1 } },
2667 : /* 8 bit sext */ { { ARM::SXTB , 0, ARM_AM::no_shift, 0 },
2668 : /* 8 bit zext */ { ARM::ANDri , 1, ARM_AM::no_shift, 255 } },
2669 : /* 16 bit sext */ { { ARM::SXTH , 0, ARM_AM::no_shift, 0 },
2670 : /* 16 bit zext */ { ARM::UXTH , 0, ARM_AM::no_shift, 0 } }
2671 : },
2672 : { // Thumb Opc S Shift Imm
2673 : /* 1 bit sext */ { { ARM::KILL , 0, ARM_AM::no_shift, 0 },
2674 : /* 1 bit zext */ { ARM::t2ANDri, 1, ARM_AM::no_shift, 1 } },
2675 : /* 8 bit sext */ { { ARM::t2SXTB , 0, ARM_AM::no_shift, 0 },
2676 : /* 8 bit zext */ { ARM::t2ANDri, 1, ARM_AM::no_shift, 255 } },
2677 : /* 16 bit sext */ { { ARM::t2SXTH , 0, ARM_AM::no_shift, 0 },
2678 : /* 16 bit zext */ { ARM::t2UXTH , 0, ARM_AM::no_shift, 0 } }
2679 : }
2680 : }
2681 : };
2682 :
2683 396 : unsigned SrcBits = SrcVT.getSizeInBits();
2684 : unsigned DestBits = DestVT.getSizeInBits();
2685 : (void) DestBits;
2686 : assert((SrcBits < DestBits) && "can only extend to larger types");
2687 : assert((DestBits == 32 || DestBits == 16 || DestBits == 8) &&
2688 : "other sizes unimplemented");
2689 : assert((SrcBits == 16 || SrcBits == 8 || SrcBits == 1) &&
2690 : "other sizes unimplemented");
2691 :
2692 396 : bool hasV6Ops = Subtarget->hasV6Ops();
2693 396 : unsigned Bitness = SrcBits / 8; // {1,8,16}=>{0,1,2}
2694 : assert((Bitness < 3) && "sanity-check table bounds");
2695 :
2696 396 : bool isSingleInstr = isSingleInstrTbl[Bitness][isThumb2][hasV6Ops][isZExt];
2697 396 : const TargetRegisterClass *RC = RCTbl[isThumb2][isSingleInstr];
2698 : const InstructionTable *ITP = &IT[isSingleInstr][isThumb2][Bitness][isZExt];
2699 396 : unsigned Opc = ITP->Opc;
2700 : assert(ARM::KILL != Opc && "Invalid table entry");
2701 396 : unsigned hasS = ITP->hasS;
2702 396 : ARM_AM::ShiftOpc Shift = (ARM_AM::ShiftOpc) ITP->Shift;
2703 : assert(((Shift == ARM_AM::no_shift) == (Opc != ARM::MOVsi)) &&
2704 : "only MOVsi has shift operand addressing mode");
2705 396 : unsigned Imm = ITP->Imm;
2706 :
2707 : // 16-bit Thumb instructions always set CPSR (unless they're in an IT block).
2708 396 : bool setsCPSR = &ARM::tGPRRegClass == RC;
2709 396 : unsigned LSLOpc = isThumb2 ? ARM::tLSLri : ARM::MOVsi;
2710 : unsigned ResultReg;
2711 : // MOVsi encodes shift and immediate in shift operand addressing mode.
2712 : // The following condition has the same value when emitting two
2713 : // instruction sequences: both are shifts.
2714 : bool ImmIsSO = (Shift != ARM_AM::no_shift);
2715 :
2716 : // Either one or two instructions are emitted.
2717 : // They're always of the form:
2718 : // dst = in OP imm
2719 : // CPSR is set only by 16-bit Thumb instructions.
2720 : // Predicate, if any, is AL.
2721 : // S bit, if available, is always 0.
2722 : // When two are emitted the first's result will feed as the second's input,
2723 : // that value is then dead.
2724 396 : unsigned NumInstrsEmitted = isSingleInstr ? 1 : 2;
2725 801 : for (unsigned Instr = 0; Instr != NumInstrsEmitted; ++Instr) {
2726 405 : ResultReg = createResultReg(RC);
2727 405 : bool isLsl = (0 == Instr) && !isSingleInstr;
2728 405 : unsigned Opcode = isLsl ? LSLOpc : Opc;
2729 405 : ARM_AM::ShiftOpc ShiftAM = isLsl ? ARM_AM::lsl : Shift;
2730 405 : unsigned ImmEnc = ImmIsSO ? ARM_AM::getSORegOpc(ShiftAM, Imm) : Imm;
2731 405 : bool isKill = 1 == Instr;
2732 : MachineInstrBuilder MIB = BuildMI(
2733 810 : *FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opcode), ResultReg);
2734 405 : if (setsCPSR)
2735 6 : MIB.addReg(ARM::CPSR, RegState::Define);
2736 810 : SrcReg = constrainOperandRegClass(TII.get(Opcode), SrcReg, 1 + setsCPSR);
2737 405 : MIB.addReg(SrcReg, isKill * RegState::Kill)
2738 405 : .addImm(ImmEnc)
2739 405 : .add(predOps(ARMCC::AL));
2740 405 : if (hasS)
2741 207 : MIB.add(condCodeOp());
2742 : // Second instruction consumes the first's result.
2743 : SrcReg = ResultReg;
2744 : }
2745 :
2746 : return ResultReg;
2747 : }
2748 :
2749 136 : bool ARMFastISel::SelectIntExt(const Instruction *I) {
2750 : // On ARM, in general, integer casts don't involve legal types; this code
2751 : // handles promotable integers.
2752 136 : Type *DestTy = I->getType();
2753 136 : Value *Src = I->getOperand(0);
2754 136 : Type *SrcTy = Src->getType();
2755 :
2756 : bool isZExt = isa<ZExtInst>(I);
2757 136 : unsigned SrcReg = getRegForValue(Src);
2758 136 : if (!SrcReg) return false;
2759 :
2760 : EVT SrcEVT, DestEVT;
2761 134 : SrcEVT = TLI.getValueType(DL, SrcTy, true);
2762 134 : DestEVT = TLI.getValueType(DL, DestTy, true);
2763 134 : if (!SrcEVT.isSimple()) return false;
2764 134 : if (!DestEVT.isSimple()) return false;
2765 :
2766 134 : MVT SrcVT = SrcEVT.getSimpleVT();
2767 134 : MVT DestVT = DestEVT.getSimpleVT();
2768 134 : unsigned ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
2769 134 : if (ResultReg == 0) return false;
2770 134 : updateValueMap(I, ResultReg);
2771 134 : return true;
2772 : }
2773 :
2774 12 : bool ARMFastISel::SelectShift(const Instruction *I,
2775 : ARM_AM::ShiftOpc ShiftTy) {
2776 : // We handle thumb2 mode by target independent selector
2777 : // or SelectionDAG ISel.
2778 12 : if (isThumb2)
2779 : return false;
2780 :
2781 : // Only handle i32 now.
2782 12 : EVT DestVT = TLI.getValueType(DL, I->getType(), true);
2783 12 : if (DestVT != MVT::i32)
2784 0 : return false;
2785 :
2786 : unsigned Opc = ARM::MOVsr;
2787 : unsigned ShiftImm;
2788 12 : Value *Src2Value = I->getOperand(1);
2789 : if (const ConstantInt *CI = dyn_cast<ConstantInt>(Src2Value)) {
2790 6 : ShiftImm = CI->getZExtValue();
2791 :
2792 : // Fall back to selection DAG isel if the shift amount
2793 : // is zero or greater than the width of the value type.
2794 6 : if (ShiftImm == 0 || ShiftImm >=32)
2795 : return false;
2796 :
2797 : Opc = ARM::MOVsi;
2798 : }
2799 :
2800 : Value *Src1Value = I->getOperand(0);
2801 12 : unsigned Reg1 = getRegForValue(Src1Value);
2802 12 : if (Reg1 == 0) return false;
2803 :
2804 : unsigned Reg2 = 0;
2805 12 : if (Opc == ARM::MOVsr) {
2806 6 : Reg2 = getRegForValue(Src2Value);
2807 6 : if (Reg2 == 0) return false;
2808 : }
2809 :
2810 12 : unsigned ResultReg = createResultReg(&ARM::GPRnopcRegClass);
2811 12 : if(ResultReg == 0) return false;
2812 :
2813 24 : MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2814 24 : TII.get(Opc), ResultReg)
2815 12 : .addReg(Reg1);
2816 :
2817 12 : if (Opc == ARM::MOVsi)
2818 6 : MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, ShiftImm));
2819 : else if (Opc == ARM::MOVsr) {
2820 6 : MIB.addReg(Reg2);
2821 6 : MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, 0));
2822 : }
2823 :
2824 12 : AddOptionalDefs(MIB);
2825 12 : updateValueMap(I, ResultReg);
2826 12 : return true;
2827 : }
2828 :
2829 : // TODO: SoftFP support.
2830 3017 : bool ARMFastISel::fastSelectInstruction(const Instruction *I) {
2831 3017 : switch (I->getOpcode()) {
2832 316 : case Instruction::Load:
2833 316 : return SelectLoad(I);
2834 588 : case Instruction::Store:
2835 588 : return SelectStore(I);
2836 79 : case Instruction::Br:
2837 79 : return SelectBranch(I);
2838 4 : case Instruction::IndirectBr:
2839 4 : return SelectIndirectBr(I);
2840 26 : case Instruction::ICmp:
2841 : case Instruction::FCmp:
2842 26 : return SelectCmp(I);
2843 1 : case Instruction::FPExt:
2844 1 : return SelectFPExt(I);
2845 1 : case Instruction::FPTrunc:
2846 1 : return SelectFPTrunc(I);
2847 19 : case Instruction::SIToFP:
2848 19 : return SelectIToFP(I, /*isSigned*/ true);
2849 18 : case Instruction::UIToFP:
2850 18 : return SelectIToFP(I, /*isSigned*/ false);
2851 6 : case Instruction::FPToSI:
2852 6 : return SelectFPToI(I, /*isSigned*/ true);
2853 7 : case Instruction::FPToUI:
2854 7 : return SelectFPToI(I, /*isSigned*/ false);
2855 22 : case Instruction::Add:
2856 22 : return SelectBinaryIntOp(I, ISD::ADD);
2857 6 : case Instruction::Or:
2858 6 : return SelectBinaryIntOp(I, ISD::OR);
2859 9 : case Instruction::Sub:
2860 9 : return SelectBinaryIntOp(I, ISD::SUB);
2861 2 : case Instruction::FAdd:
2862 2 : return SelectBinaryFPOp(I, ISD::FADD);
2863 0 : case Instruction::FSub:
2864 0 : return SelectBinaryFPOp(I, ISD::FSUB);
2865 0 : case Instruction::FMul:
2866 0 : return SelectBinaryFPOp(I, ISD::FMUL);
2867 4 : case Instruction::SDiv:
2868 4 : return SelectDiv(I, /*isSigned*/ true);
2869 10 : case Instruction::UDiv:
2870 10 : return SelectDiv(I, /*isSigned*/ false);
2871 31 : case Instruction::SRem:
2872 31 : return SelectRem(I, /*isSigned*/ true);
2873 5 : case Instruction::URem:
2874 5 : return SelectRem(I, /*isSigned*/ false);
2875 : case Instruction::Call:
2876 : if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
2877 69 : return SelectIntrinsicCall(*II);
2878 598 : return SelectCall(I);
2879 24 : case Instruction::Select:
2880 24 : return SelectSelect(I);
2881 890 : case Instruction::Ret:
2882 890 : return SelectRet(I);
2883 22 : case Instruction::Trunc:
2884 22 : return SelectTrunc(I);
2885 136 : case Instruction::ZExt:
2886 : case Instruction::SExt:
2887 136 : return SelectIntExt(I);
2888 4 : case Instruction::Shl:
2889 4 : return SelectShift(I, ARM_AM::lsl);
2890 4 : case Instruction::LShr:
2891 4 : return SelectShift(I, ARM_AM::lsr);
2892 4 : case Instruction::AShr:
2893 4 : return SelectShift(I, ARM_AM::asr);
2894 : default: break;
2895 : }
2896 : return false;
2897 : }
2898 :
2899 : // This table describes sign- and zero-extend instructions which can be
2900 : // folded into a preceding load. All of these extends have an immediate
2901 : // (sometimes a mask and sometimes a shift) that's applied after
2902 : // extension.
2903 : static const struct FoldableLoadExtendsStruct {
2904 : uint16_t Opc[2]; // ARM, Thumb.
2905 : uint8_t ExpectedImm;
2906 : uint8_t isZExt : 1;
2907 : uint8_t ExpectedVT : 7;
2908 : } FoldableLoadExtends[] = {
2909 : { { ARM::SXTH, ARM::t2SXTH }, 0, 0, MVT::i16 },
2910 : { { ARM::UXTH, ARM::t2UXTH }, 0, 1, MVT::i16 },
2911 : { { ARM::ANDri, ARM::t2ANDri }, 255, 1, MVT::i8 },
2912 : { { ARM::SXTB, ARM::t2SXTB }, 0, 0, MVT::i8 },
2913 : { { ARM::UXTB, ARM::t2UXTB }, 0, 1, MVT::i8 }
2914 : };
2915 :
2916 : /// The specified machine instr operand is a vreg, and that
2917 : /// vreg is being provided by the specified load instruction. If possible,
2918 : /// try to fold the load as an operand to the instruction, returning true if
2919 : /// successful.
2920 201 : bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
2921 : const LoadInst *LI) {
2922 : // Verify we have a legal type before going any further.
2923 201 : MVT VT;
2924 201 : if (!isLoadTypeLegal(LI->getType(), VT))
2925 : return false;
2926 :
2927 : // Combine load followed by zero- or sign-extend.
2928 : // ldrb r1, [r0] ldrb r1, [r0]
2929 : // uxtb r2, r1 =>
2930 : // mov r3, r2 mov r3, r1
2931 201 : if (MI->getNumOperands() < 3 || !MI->getOperand(2).isImm())
2932 : return false;
2933 107 : const uint64_t Imm = MI->getOperand(2).getImm();
2934 :
2935 : bool Found = false;
2936 : bool isZExt;
2937 642 : for (const FoldableLoadExtendsStruct &FLE : FoldableLoadExtends) {
2938 535 : if (FLE.Opc[isThumb2] == MI->getOpcode() &&
2939 535 : (uint64_t)FLE.ExpectedImm == Imm &&
2940 51 : MVT((MVT::SimpleValueType)FLE.ExpectedVT) == VT) {
2941 : Found = true;
2942 51 : isZExt = FLE.isZExt;
2943 : }
2944 : }
2945 107 : if (!Found) return false;
2946 :
2947 : // See if we can handle this address.
2948 : Address Addr;
2949 51 : if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false;
2950 :
2951 51 : unsigned ResultReg = MI->getOperand(0).getReg();
2952 102 : if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false))
2953 : return false;
2954 51 : MI->eraseFromParent();
2955 51 : return true;
2956 : }
2957 :
2958 0 : unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
2959 : unsigned Align, MVT VT) {
2960 0 : bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
2961 :
2962 0 : LLVMContext *Context = &MF->getFunction().getContext();
2963 0 : unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2964 0 : unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
2965 0 : ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(
2966 : GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj,
2967 : UseGOT_PREL ? ARMCP::GOT_PREL : ARMCP::no_modifier,
2968 : /*AddCurrentAddress=*/UseGOT_PREL);
2969 :
2970 : unsigned ConstAlign =
2971 0 : MF->getDataLayout().getPrefTypeAlignment(Type::getInt32PtrTy(*Context));
2972 0 : unsigned Idx = MF->getConstantPool()->getConstantPoolIndex(CPV, ConstAlign);
2973 :
2974 0 : unsigned TempReg = MF->getRegInfo().createVirtualRegister(&ARM::rGPRRegClass);
2975 0 : unsigned Opc = isThumb2 ? ARM::t2LDRpci : ARM::LDRcp;
2976 : MachineInstrBuilder MIB =
2977 0 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), TempReg)
2978 0 : .addConstantPoolIndex(Idx);
2979 0 : if (Opc == ARM::LDRcp)
2980 : MIB.addImm(0);
2981 0 : MIB.add(predOps(ARMCC::AL));
2982 :
2983 : // Fix the address by adding pc.
2984 0 : unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
2985 0 : Opc = Subtarget->isThumb() ? ARM::tPICADD : UseGOT_PREL ? ARM::PICLDR
2986 : : ARM::PICADD;
2987 0 : DestReg = constrainOperandRegClass(TII.get(Opc), DestReg, 0);
2988 0 : MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
2989 0 : .addReg(TempReg)
2990 0 : .addImm(ARMPCLabelIndex);
2991 0 : if (!Subtarget->isThumb())
2992 0 : MIB.add(predOps(ARMCC::AL));
2993 :
2994 0 : if (UseGOT_PREL && Subtarget->isThumb()) {
2995 0 : unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
2996 0 : MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2997 0 : TII.get(ARM::t2LDRi12), NewDestReg)
2998 0 : .addReg(DestReg)
2999 : .addImm(0);
3000 : DestReg = NewDestReg;
3001 0 : AddOptionalDefs(MIB);
3002 : }
3003 0 : return DestReg;
3004 : }
3005 :
3006 899 : bool ARMFastISel::fastLowerArguments() {
3007 899 : if (!FuncInfo.CanLowerReturn)
3008 : return false;
3009 :
3010 899 : const Function *F = FuncInfo.Fn;
3011 899 : if (F->isVarArg())
3012 : return false;
3013 :
3014 : CallingConv::ID CC = F->getCallingConv();
3015 898 : switch (CC) {
3016 : default:
3017 : return false;
3018 : case CallingConv::Fast:
3019 : case CallingConv::C:
3020 : case CallingConv::ARM_AAPCS_VFP:
3021 : case CallingConv::ARM_AAPCS:
3022 : case CallingConv::ARM_APCS:
3023 : case CallingConv::Swift:
3024 : break;
3025 : }
3026 :
3027 : // Only handle simple cases. i.e. Up to 4 i8/i16/i32 scalar arguments
3028 : // which are passed in r0 - r3.
3029 1499 : for (const Argument &Arg : F->args()) {
3030 822 : if (Arg.getArgNo() >= 4)
3031 211 : return false;
3032 :
3033 1630 : if (Arg.hasAttribute(Attribute::InReg) ||
3034 1625 : Arg.hasAttribute(Attribute::StructRet) ||
3035 1610 : Arg.hasAttribute(Attribute::SwiftSelf) ||
3036 2408 : Arg.hasAttribute(Attribute::SwiftError) ||
3037 793 : Arg.hasAttribute(Attribute::ByVal))
3038 22 : return false;
3039 :
3040 793 : Type *ArgTy = Arg.getType();
3041 793 : if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
3042 : return false;
3043 :
3044 783 : EVT ArgVT = TLI.getValueType(DL, ArgTy);
3045 783 : if (!ArgVT.isSimple()) return false;
3046 783 : switch (ArgVT.getSimpleVT().SimpleTy) {
3047 : case MVT::i8:
3048 : case MVT::i16:
3049 : case MVT::i32:
3050 : break;
3051 : default:
3052 : return false;
3053 : }
3054 : }
3055 :
3056 : static const MCPhysReg GPRArgRegs[] = {
3057 : ARM::R0, ARM::R1, ARM::R2, ARM::R3
3058 : };
3059 :
3060 : const TargetRegisterClass *RC = &ARM::rGPRRegClass;
3061 1216 : for (const Argument &Arg : F->args()) {
3062 539 : unsigned ArgNo = Arg.getArgNo();
3063 539 : unsigned SrcReg = GPRArgRegs[ArgNo];
3064 539 : unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3065 : // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3066 : // Without this, EmitLiveInCopies may eliminate the livein if its only
3067 : // use is a bitcast (which isn't turned into an instruction).
3068 539 : unsigned ResultReg = createResultReg(RC);
3069 1078 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3070 539 : TII.get(TargetOpcode::COPY),
3071 1078 : ResultReg).addReg(DstReg, getKillRegState(true));
3072 539 : updateValueMap(&Arg, ResultReg);
3073 : }
3074 :
3075 : return true;
3076 : }
3077 :
3078 : namespace llvm {
3079 :
3080 1221 : FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo,
3081 : const TargetLibraryInfo *libInfo) {
3082 1221 : if (funcInfo.MF->getSubtarget<ARMSubtarget>().useFastISel())
3083 901 : return new ARMFastISel(funcInfo, libInfo);
3084 :
3085 : return nullptr;
3086 : }
3087 :
3088 : } // end namespace llvm
|