Line data Source code
1 : //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : // This file defines the AArch64-specific support for the FastISel class. Some
11 : // of the target-specific code is generated by tablegen in the file
12 : // AArch64GenFastISel.inc, which is #included here.
13 : //
14 : //===----------------------------------------------------------------------===//
15 :
16 : #include "AArch64.h"
17 : #include "AArch64CallingConvention.h"
18 : #include "AArch64RegisterInfo.h"
19 : #include "AArch64Subtarget.h"
20 : #include "MCTargetDesc/AArch64AddressingModes.h"
21 : #include "Utils/AArch64BaseInfo.h"
22 : #include "llvm/ADT/APFloat.h"
23 : #include "llvm/ADT/APInt.h"
24 : #include "llvm/ADT/DenseMap.h"
25 : #include "llvm/ADT/SmallVector.h"
26 : #include "llvm/Analysis/BranchProbabilityInfo.h"
27 : #include "llvm/CodeGen/CallingConvLower.h"
28 : #include "llvm/CodeGen/FastISel.h"
29 : #include "llvm/CodeGen/FunctionLoweringInfo.h"
30 : #include "llvm/CodeGen/ISDOpcodes.h"
31 : #include "llvm/CodeGen/MachineBasicBlock.h"
32 : #include "llvm/CodeGen/MachineConstantPool.h"
33 : #include "llvm/CodeGen/MachineFrameInfo.h"
34 : #include "llvm/CodeGen/MachineInstr.h"
35 : #include "llvm/CodeGen/MachineInstrBuilder.h"
36 : #include "llvm/CodeGen/MachineMemOperand.h"
37 : #include "llvm/CodeGen/MachineRegisterInfo.h"
38 : #include "llvm/CodeGen/RuntimeLibcalls.h"
39 : #include "llvm/CodeGen/ValueTypes.h"
40 : #include "llvm/IR/Argument.h"
41 : #include "llvm/IR/Attributes.h"
42 : #include "llvm/IR/BasicBlock.h"
43 : #include "llvm/IR/CallingConv.h"
44 : #include "llvm/IR/Constant.h"
45 : #include "llvm/IR/Constants.h"
46 : #include "llvm/IR/DataLayout.h"
47 : #include "llvm/IR/DerivedTypes.h"
48 : #include "llvm/IR/Function.h"
49 : #include "llvm/IR/GetElementPtrTypeIterator.h"
50 : #include "llvm/IR/GlobalValue.h"
51 : #include "llvm/IR/InstrTypes.h"
52 : #include "llvm/IR/Instruction.h"
53 : #include "llvm/IR/Instructions.h"
54 : #include "llvm/IR/IntrinsicInst.h"
55 : #include "llvm/IR/Intrinsics.h"
56 : #include "llvm/IR/Operator.h"
57 : #include "llvm/IR/Type.h"
58 : #include "llvm/IR/User.h"
59 : #include "llvm/IR/Value.h"
60 : #include "llvm/MC/MCInstrDesc.h"
61 : #include "llvm/MC/MCRegisterInfo.h"
62 : #include "llvm/MC/MCSymbol.h"
63 : #include "llvm/Support/AtomicOrdering.h"
64 : #include "llvm/Support/Casting.h"
65 : #include "llvm/Support/CodeGen.h"
66 : #include "llvm/Support/Compiler.h"
67 : #include "llvm/Support/ErrorHandling.h"
68 : #include "llvm/Support/MachineValueType.h"
69 : #include "llvm/Support/MathExtras.h"
70 : #include <algorithm>
71 : #include <cassert>
72 : #include <cstdint>
73 : #include <iterator>
74 : #include <utility>
75 :
76 : using namespace llvm;
77 :
78 : namespace {
79 :
80 : class AArch64FastISel final : public FastISel {
81 : class Address {
82 : public:
83 : using BaseKind = enum {
84 : RegBase,
85 : FrameIndexBase
86 : };
87 :
88 : private:
89 : BaseKind Kind = RegBase;
90 : AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend;
91 : union {
92 : unsigned Reg;
93 : int FI;
94 : } Base;
95 : unsigned OffsetReg = 0;
96 : unsigned Shift = 0;
97 : int64_t Offset = 0;
98 : const GlobalValue *GV = nullptr;
99 :
100 : public:
101 13 : Address() { Base.Reg = 0; }
102 :
103 172 : void setKind(BaseKind K) { Kind = K; }
104 : BaseKind getKind() const { return Kind; }
105 37 : void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
106 0 : AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
107 0 : bool isRegBase() const { return Kind == RegBase; }
108 0 : bool isFIBase() const { return Kind == FrameIndexBase; }
109 :
110 0 : void setReg(unsigned Reg) {
111 : assert(isRegBase() && "Invalid base register access!");
112 3 : Base.Reg = Reg;
113 0 : }
114 :
115 0 : unsigned getReg() const {
116 : assert(isRegBase() && "Invalid base register access!");
117 0 : return Base.Reg;
118 : }
119 :
120 0 : void setOffsetReg(unsigned Reg) {
121 525 : OffsetReg = Reg;
122 0 : }
123 :
124 0 : unsigned getOffsetReg() const {
125 0 : return OffsetReg;
126 : }
127 :
128 0 : void setFI(unsigned FI) {
129 : assert(isFIBase() && "Invalid base frame index access!");
130 169 : Base.FI = FI;
131 0 : }
132 :
133 0 : unsigned getFI() const {
134 : assert(isFIBase() && "Invalid base frame index access!");
135 174 : return Base.FI;
136 : }
137 :
138 8 : void setOffset(int64_t O) { Offset = O; }
139 0 : int64_t getOffset() { return Offset; }
140 43 : void setShift(unsigned S) { Shift = S; }
141 0 : unsigned getShift() { return Shift; }
142 :
143 84 : void setGlobalValue(const GlobalValue *G) { GV = G; }
144 0 : const GlobalValue *getGlobalValue() { return GV; }
145 : };
146 :
147 : /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
148 : /// make the right decision when generating code for different targets.
149 : const AArch64Subtarget *Subtarget;
150 : LLVMContext *Context;
151 :
152 : bool fastLowerArguments() override;
153 : bool fastLowerCall(CallLoweringInfo &CLI) override;
154 : bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
155 :
156 : private:
157 : // Selection routines.
158 : bool selectAddSub(const Instruction *I);
159 : bool selectLogicalOp(const Instruction *I);
160 : bool selectLoad(const Instruction *I);
161 : bool selectStore(const Instruction *I);
162 : bool selectBranch(const Instruction *I);
163 : bool selectIndirectBr(const Instruction *I);
164 : bool selectCmp(const Instruction *I);
165 : bool selectSelect(const Instruction *I);
166 : bool selectFPExt(const Instruction *I);
167 : bool selectFPTrunc(const Instruction *I);
168 : bool selectFPToInt(const Instruction *I, bool Signed);
169 : bool selectIntToFP(const Instruction *I, bool Signed);
170 : bool selectRem(const Instruction *I, unsigned ISDOpcode);
171 : bool selectRet(const Instruction *I);
172 : bool selectTrunc(const Instruction *I);
173 : bool selectIntExt(const Instruction *I);
174 : bool selectMul(const Instruction *I);
175 : bool selectShift(const Instruction *I);
176 : bool selectBitCast(const Instruction *I);
177 : bool selectFRem(const Instruction *I);
178 : bool selectSDiv(const Instruction *I);
179 : bool selectGetElementPtr(const Instruction *I);
180 : bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
181 :
182 : // Utility helper routines.
183 : bool isTypeLegal(Type *Ty, MVT &VT);
184 : bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
185 : bool isValueAvailable(const Value *V) const;
186 : bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
187 : bool computeCallAddress(const Value *V, Address &Addr);
188 : bool simplifyAddress(Address &Addr, MVT VT);
189 : void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
190 : MachineMemOperand::Flags Flags,
191 : unsigned ScaleFactor, MachineMemOperand *MMO);
192 : bool isMemCpySmall(uint64_t Len, unsigned Alignment);
193 : bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
194 : unsigned Alignment);
195 : bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
196 : const Value *Cond);
197 : bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
198 : bool optimizeSelect(const SelectInst *SI);
199 : std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx);
200 :
201 : // Emit helper routines.
202 : unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
203 : const Value *RHS, bool SetFlags = false,
204 : bool WantResult = true, bool IsZExt = false);
205 : unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
206 : bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
207 : bool SetFlags = false, bool WantResult = true);
208 : unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
209 : bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
210 : bool WantResult = true);
211 : unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
212 : bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
213 : AArch64_AM::ShiftExtendType ShiftType,
214 : uint64_t ShiftImm, bool SetFlags = false,
215 : bool WantResult = true);
216 : unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
217 : bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
218 : AArch64_AM::ShiftExtendType ExtType,
219 : uint64_t ShiftImm, bool SetFlags = false,
220 : bool WantResult = true);
221 :
222 : // Emit functions.
223 : bool emitCompareAndBranch(const BranchInst *BI);
224 : bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
225 : bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
226 : bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
227 : bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
228 : unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
229 : MachineMemOperand *MMO = nullptr);
230 : bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
231 : MachineMemOperand *MMO = nullptr);
232 : bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
233 : MachineMemOperand *MMO = nullptr);
234 : unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
235 : unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
236 : unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
237 : bool SetFlags = false, bool WantResult = true,
238 : bool IsZExt = false);
239 : unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm);
240 : unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
241 : bool SetFlags = false, bool WantResult = true,
242 : bool IsZExt = false);
243 : unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
244 : unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
245 : unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
246 : unsigned RHSReg, bool RHSIsKill,
247 : AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
248 : bool WantResult = true);
249 : unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
250 : const Value *RHS);
251 : unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
252 : bool LHSIsKill, uint64_t Imm);
253 : unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
254 : bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
255 : uint64_t ShiftImm);
256 : unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
257 : unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
258 : unsigned Op1, bool Op1IsKill);
259 : unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
260 : unsigned Op1, bool Op1IsKill);
261 : unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
262 : unsigned Op1, bool Op1IsKill);
263 : unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
264 : unsigned Op1Reg, bool Op1IsKill);
265 : unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
266 : uint64_t Imm, bool IsZExt = true);
267 : unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
268 : unsigned Op1Reg, bool Op1IsKill);
269 : unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
270 : uint64_t Imm, bool IsZExt = true);
271 : unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
272 : unsigned Op1Reg, bool Op1IsKill);
273 : unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
274 : uint64_t Imm, bool IsZExt = false);
275 :
276 : unsigned materializeInt(const ConstantInt *CI, MVT VT);
277 : unsigned materializeFP(const ConstantFP *CFP, MVT VT);
278 : unsigned materializeGV(const GlobalValue *GV);
279 :
280 : // Call handling routines.
281 : private:
282 : CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
283 : bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
284 : unsigned &NumBytes);
285 : bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
286 :
287 : public:
288 : // Backend specific FastISel code.
289 : unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
290 : unsigned fastMaterializeConstant(const Constant *C) override;
291 : unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
292 :
293 1222 : explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
294 : const TargetLibraryInfo *LibInfo)
295 1222 : : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
296 1222 : Subtarget =
297 1222 : &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget());
298 1222 : Context = &FuncInfo.Fn->getContext();
299 1222 : }
300 :
301 : bool fastSelectInstruction(const Instruction *I) override;
302 :
303 : #include "AArch64GenFastISel.inc"
304 : };
305 :
306 : } // end anonymous namespace
307 :
308 : #include "AArch64GenCallingConv.inc"
309 :
310 : /// Check if the sign-/zero-extend will be a noop.
311 82 : static bool isIntExtFree(const Instruction *I) {
312 : assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
313 : "Unexpected integer extend instruction.");
314 : assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
315 : "Unexpected value type.");
316 : bool IsZExt = isa<ZExtInst>(I);
317 :
318 82 : if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
319 0 : if (LI->hasOneUse())
320 : return true;
321 :
322 : if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
323 80 : if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
324 4 : return true;
325 :
326 : return false;
327 : }
328 :
329 : /// Determine the implicit scale factor that is applied by a memory
330 : /// operation for a given value type.
331 : static unsigned getImplicitScaleFactor(MVT VT) {
332 : switch (VT.SimpleTy) {
333 : default:
334 : return 0; // invalid
335 : case MVT::i1: // fall-through
336 : case MVT::i8:
337 : return 1;
338 : case MVT::i16:
339 : return 2;
340 : case MVT::i32: // fall-through
341 : case MVT::f32:
342 : return 4;
343 : case MVT::i64: // fall-through
344 : case MVT::f64:
345 : return 8;
346 : }
347 : }
348 :
349 0 : CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
350 183 : if (CC == CallingConv::WebKit_JS)
351 0 : return CC_AArch64_WebKit_JS;
352 176 : if (CC == CallingConv::GHC)
353 0 : return CC_AArch64_GHC;
354 176 : return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
355 : }
356 :
357 16 : unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
358 : assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
359 : "Alloca should always return a pointer.");
360 :
361 : // Don't handle dynamic allocas.
362 16 : if (!FuncInfo.StaticAllocaMap.count(AI))
363 0 : return 0;
364 :
365 : DenseMap<const AllocaInst *, int>::iterator SI =
366 16 : FuncInfo.StaticAllocaMap.find(AI);
367 :
368 32 : if (SI != FuncInfo.StaticAllocaMap.end()) {
369 16 : unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
370 32 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
371 32 : ResultReg)
372 16 : .addFrameIndex(SI->second)
373 : .addImm(0)
374 : .addImm(0);
375 16 : return ResultReg;
376 : }
377 :
378 : return 0;
379 : }
380 :
381 350 : unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
382 350 : if (VT > MVT::i64)
383 : return 0;
384 :
385 350 : if (!CI->isZero())
386 231 : return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
387 :
388 : // Create a copy from the zero register to materialize a "0" value.
389 119 : const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
390 : : &AArch64::GPR32RegClass;
391 119 : unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
392 119 : unsigned ResultReg = createResultReg(RC);
393 357 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
394 238 : ResultReg).addReg(ZeroReg, getKillRegState(true));
395 119 : return ResultReg;
396 : }
397 :
398 19 : unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
399 : // Positive zero (+0.0) has to be materialized with a fmov from the zero
400 : // register, because the immediate version of fmov cannot encode zero.
401 19 : if (CFP->isNullValue())
402 2 : return fastMaterializeFloatZero(CFP);
403 :
404 17 : if (VT != MVT::f32 && VT != MVT::f64)
405 : return 0;
406 :
407 : const APFloat Val = CFP->getValueAPF();
408 : bool Is64Bit = (VT == MVT::f64);
409 : // This checks to see if we can use FMOV instructions to materialize
410 : // a constant, otherwise we have to materialize via the constant pool.
411 34 : if (TLI.isFPImmLegal(Val, VT)) {
412 : int Imm =
413 10 : Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
414 : assert((Imm != -1) && "Cannot encode floating-point constant.");
415 10 : unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
416 10 : return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
417 : }
418 :
419 : // For the MachO large code model materialize the FP constant in code.
420 14 : if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
421 4 : unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
422 4 : const TargetRegisterClass *RC = Is64Bit ?
423 : &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
424 :
425 4 : unsigned TmpReg = createResultReg(RC);
426 8 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
427 8 : .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
428 :
429 4 : unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
430 8 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
431 8 : TII.get(TargetOpcode::COPY), ResultReg)
432 4 : .addReg(TmpReg, getKillRegState(true));
433 :
434 4 : return ResultReg;
435 : }
436 :
437 : // Materialize via constant pool. MachineConstantPool wants an explicit
438 : // alignment.
439 3 : unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
440 3 : if (Align == 0)
441 0 : Align = DL.getTypeAllocSize(CFP->getType());
442 :
443 3 : unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
444 3 : unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
445 9 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
446 6 : ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
447 :
448 3 : unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
449 3 : unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
450 6 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
451 3 : .addReg(ADRPReg)
452 : .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
453 3 : return ResultReg;
454 : }
455 :
456 128 : unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
457 : // We can't handle thread-local variables quickly yet.
458 128 : if (GV->isThreadLocal())
459 : return 0;
460 :
461 : // MachO still uses GOT for large code-model accesses, but ELF requires
462 : // movz/movk sequences, which FastISel doesn't handle yet.
463 187 : if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
464 : return 0;
465 :
466 91 : unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
467 :
468 182 : EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
469 91 : if (!DestEVT.isSimple())
470 : return 0;
471 :
472 91 : unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
473 : unsigned ResultReg;
474 :
475 91 : if (OpFlags & AArch64II::MO_GOT) {
476 : // ADRP + LDRX
477 135 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
478 90 : ADRPReg)
479 45 : .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
480 :
481 45 : ResultReg = createResultReg(&AArch64::GPR64RegClass);
482 45 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
483 90 : ResultReg)
484 45 : .addReg(ADRPReg)
485 : .addGlobalAddress(GV, 0,
486 45 : AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags);
487 : } else {
488 : // ADRP + ADDX
489 138 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
490 92 : ADRPReg)
491 46 : .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
492 :
493 46 : ResultReg = createResultReg(&AArch64::GPR64spRegClass);
494 46 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
495 92 : ResultReg)
496 46 : .addReg(ADRPReg)
497 : .addGlobalAddress(GV, 0,
498 46 : AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags)
499 : .addImm(0);
500 : }
501 : return ResultReg;
502 : }
503 :
504 537 : unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
505 537 : EVT CEVT = TLI.getValueType(DL, C->getType(), true);
506 :
507 : // Only handle simple types.
508 537 : if (!CEVT.isSimple())
509 : return 0;
510 537 : MVT VT = CEVT.getSimpleVT();
511 :
512 : if (const auto *CI = dyn_cast<ConstantInt>(C))
513 344 : return materializeInt(CI, VT);
514 : else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
515 19 : return materializeFP(CFP, VT);
516 : else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
517 121 : return materializeGV(GV);
518 :
519 : return 0;
520 : }
521 :
522 2 : unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
523 : assert(CFP->isNullValue() &&
524 : "Floating-point constant is not a positive zero.");
525 2 : MVT VT;
526 2 : if (!isTypeLegal(CFP->getType(), VT))
527 : return 0;
528 :
529 2 : if (VT != MVT::f32 && VT != MVT::f64)
530 : return 0;
531 :
532 : bool Is64Bit = (VT == MVT::f64);
533 2 : unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
534 2 : unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
535 2 : return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
536 : }
537 :
538 : /// Check if the multiply is by a power-of-2 constant.
539 578 : static bool isMulPowOf2(const Value *I) {
540 : if (const auto *MI = dyn_cast<MulOperator>(I)) {
541 23 : if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
542 0 : if (C->getValue().isPowerOf2())
543 : return true;
544 : if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
545 21 : if (C->getValue().isPowerOf2())
546 19 : return true;
547 : }
548 : return false;
549 : }
550 :
551 : // Computes the address to get to an object.
552 1201 : bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
553 : {
554 : const User *U = nullptr;
555 : unsigned Opcode = Instruction::UserOp1;
556 : if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
557 : // Don't walk into other basic blocks unless the object is an alloca from
558 : // another block, otherwise it may not have a virtual register assigned.
559 762 : if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
560 593 : FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
561 : Opcode = I->getOpcode();
562 : U = I;
563 : }
564 : } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
565 : Opcode = C->getOpcode();
566 : U = C;
567 : }
568 :
569 1201 : if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
570 839 : if (Ty->getAddressSpace() > 255)
571 : // Fast instruction selection doesn't support the special
572 : // address spaces.
573 : return false;
574 :
575 1191 : switch (Opcode) {
576 : default:
577 : break;
578 : case Instruction::BitCast:
579 : // Look through bitcasts.
580 17 : return computeAddress(U->getOperand(0), Addr, Ty);
581 :
582 136 : case Instruction::IntToPtr:
583 : // Look past no-op inttoptrs.
584 544 : if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
585 : TLI.getPointerTy(DL))
586 136 : return computeAddress(U->getOperand(0), Addr, Ty);
587 : break;
588 :
589 6 : case Instruction::PtrToInt:
590 : // Look past no-op ptrtoints.
591 18 : if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
592 6 : return computeAddress(U->getOperand(0), Addr, Ty);
593 : break;
594 :
595 46 : case Instruction::GetElementPtr: {
596 46 : Address SavedAddr = Addr;
597 46 : uint64_t TmpOffset = Addr.getOffset();
598 :
599 : // Iterate through the GEP folding the constants into offsets where
600 : // we can.
601 126 : for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
602 206 : GTI != E; ++GTI) {
603 : const Value *Op = GTI.getOperand();
604 17 : if (StructType *STy = GTI.getStructTypeOrNull()) {
605 17 : const StructLayout *SL = DL.getStructLayout(STy);
606 17 : unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
607 17 : TmpOffset += SL->getElementOffset(Idx);
608 : } else {
609 64 : uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
610 : while (true) {
611 : if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
612 : // Constant-offset addressing.
613 63 : TmpOffset += CI->getSExtValue() * S;
614 63 : break;
615 : }
616 1 : if (canFoldAddIntoGEP(U, Op)) {
617 : // A compatible add with a constant operand. Fold the constant.
618 : ConstantInt *CI =
619 0 : cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
620 0 : TmpOffset += CI->getSExtValue() * S;
621 : // Iterate on the other operand.
622 : Op = cast<AddOperator>(Op)->getOperand(0);
623 : continue;
624 : }
625 : // Unsupported
626 1 : goto unsupported_gep;
627 0 : }
628 : }
629 : }
630 :
631 : // Try to grab the base operand now.
632 45 : Addr.setOffset(TmpOffset);
633 45 : if (computeAddress(U->getOperand(0), Addr, Ty))
634 40 : return true;
635 :
636 : // We failed, restore everything and try the other options.
637 5 : Addr = SavedAddr;
638 :
639 6 : unsupported_gep:
640 6 : break;
641 : }
642 : case Instruction::Alloca: {
643 : const AllocaInst *AI = cast<AllocaInst>(Obj);
644 : DenseMap<const AllocaInst *, int>::iterator SI =
645 169 : FuncInfo.StaticAllocaMap.find(AI);
646 338 : if (SI != FuncInfo.StaticAllocaMap.end()) {
647 : Addr.setKind(Address::FrameIndexBase);
648 169 : Addr.setFI(SI->second);
649 169 : return true;
650 : }
651 0 : break;
652 : }
653 : case Instruction::Add: {
654 : // Adds of constants are common and easy enough.
655 : const Value *LHS = U->getOperand(0);
656 : const Value *RHS = U->getOperand(1);
657 :
658 117 : if (isa<ConstantInt>(LHS))
659 : std::swap(LHS, RHS);
660 :
661 : if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
662 29 : Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
663 117 : return computeAddress(LHS, Addr, Ty);
664 : }
665 :
666 88 : Address Backup = Addr;
667 88 : if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
668 : return true;
669 0 : Addr = Backup;
670 :
671 0 : break;
672 : }
673 : case Instruction::Sub: {
674 : // Subs of constants are common and easy enough.
675 : const Value *LHS = U->getOperand(0);
676 : const Value *RHS = U->getOperand(1);
677 :
678 : if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
679 21 : Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
680 21 : return computeAddress(LHS, Addr, Ty);
681 : }
682 : break;
683 : }
684 26 : case Instruction::Shl: {
685 26 : if (Addr.getOffsetReg())
686 : break;
687 :
688 : const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
689 : if (!CI)
690 : break;
691 :
692 23 : unsigned Val = CI->getZExtValue();
693 23 : if (Val < 1 || Val > 3)
694 : break;
695 :
696 : uint64_t NumBytes = 0;
697 23 : if (Ty && Ty->isSized()) {
698 23 : uint64_t NumBits = DL.getTypeSizeInBits(Ty);
699 23 : NumBytes = NumBits / 8;
700 : if (!isPowerOf2_64(NumBits))
701 : NumBytes = 0;
702 : }
703 :
704 23 : if (NumBytes != (1ULL << Val))
705 : break;
706 :
707 : Addr.setShift(Val);
708 : Addr.setExtendType(AArch64_AM::LSL);
709 :
710 : const Value *Src = U->getOperand(0);
711 : if (const auto *I = dyn_cast<Instruction>(Src)) {
712 17 : if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
713 : // Fold the zext or sext when it won't become a noop.
714 : if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
715 9 : if (!isIntExtFree(ZE) &&
716 4 : ZE->getOperand(0)->getType()->isIntegerTy(32)) {
717 : Addr.setExtendType(AArch64_AM::UXTW);
718 : Src = ZE->getOperand(0);
719 : }
720 : } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
721 15 : if (!isIntExtFree(SE) &&
722 7 : SE->getOperand(0)->getType()->isIntegerTy(32)) {
723 : Addr.setExtendType(AArch64_AM::SXTW);
724 : Src = SE->getOperand(0);
725 : }
726 : }
727 : }
728 : }
729 :
730 : if (const auto *AI = dyn_cast<BinaryOperator>(Src))
731 5 : if (AI->getOpcode() == Instruction::And) {
732 : const Value *LHS = AI->getOperand(0);
733 : const Value *RHS = AI->getOperand(1);
734 :
735 : if (const auto *C = dyn_cast<ConstantInt>(LHS))
736 0 : if (C->getValue() == 0xffffffff)
737 : std::swap(LHS, RHS);
738 :
739 : if (const auto *C = dyn_cast<ConstantInt>(RHS))
740 3 : if (C->getValue() == 0xffffffff) {
741 : Addr.setExtendType(AArch64_AM::UXTW);
742 3 : unsigned Reg = getRegForValue(LHS);
743 3 : if (!Reg)
744 : return false;
745 3 : bool RegIsKill = hasTrivialKill(LHS);
746 6 : Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
747 : AArch64::sub_32);
748 : Addr.setOffsetReg(Reg);
749 3 : return true;
750 : }
751 : }
752 :
753 20 : unsigned Reg = getRegForValue(Src);
754 20 : if (!Reg)
755 : return false;
756 : Addr.setOffsetReg(Reg);
757 20 : return true;
758 : }
759 13 : case Instruction::Mul: {
760 13 : if (Addr.getOffsetReg())
761 : break;
762 :
763 13 : if (!isMulPowOf2(U))
764 : break;
765 :
766 : const Value *LHS = U->getOperand(0);
767 : const Value *RHS = U->getOperand(1);
768 :
769 : // Canonicalize power-of-2 value to the RHS.
770 : if (const auto *C = dyn_cast<ConstantInt>(LHS))
771 0 : if (C->getValue().isPowerOf2())
772 : std::swap(LHS, RHS);
773 :
774 : assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
775 : const auto *C = cast<ConstantInt>(RHS);
776 : unsigned Val = C->getValue().logBase2();
777 13 : if (Val < 1 || Val > 3)
778 : break;
779 :
780 : uint64_t NumBytes = 0;
781 13 : if (Ty && Ty->isSized()) {
782 13 : uint64_t NumBits = DL.getTypeSizeInBits(Ty);
783 13 : NumBytes = NumBits / 8;
784 : if (!isPowerOf2_64(NumBits))
785 : NumBytes = 0;
786 : }
787 :
788 13 : if (NumBytes != (1ULL << Val))
789 : break;
790 :
791 : Addr.setShift(Val);
792 : Addr.setExtendType(AArch64_AM::LSL);
793 :
794 : const Value *Src = LHS;
795 : if (const auto *I = dyn_cast<Instruction>(Src)) {
796 9 : if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
797 : // Fold the zext or sext when it won't become a noop.
798 : if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
799 7 : if (!isIntExtFree(ZE) &&
800 3 : ZE->getOperand(0)->getType()->isIntegerTy(32)) {
801 : Addr.setExtendType(AArch64_AM::UXTW);
802 : Src = ZE->getOperand(0);
803 : }
804 : } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
805 7 : if (!isIntExtFree(SE) &&
806 3 : SE->getOperand(0)->getType()->isIntegerTy(32)) {
807 : Addr.setExtendType(AArch64_AM::SXTW);
808 : Src = SE->getOperand(0);
809 : }
810 : }
811 : }
812 : }
813 :
814 13 : unsigned Reg = getRegForValue(Src);
815 13 : if (!Reg)
816 : return false;
817 : Addr.setOffsetReg(Reg);
818 13 : return true;
819 : }
820 3 : case Instruction::And: {
821 3 : if (Addr.getOffsetReg())
822 : break;
823 :
824 3 : if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
825 : break;
826 :
827 : const Value *LHS = U->getOperand(0);
828 : const Value *RHS = U->getOperand(1);
829 :
830 : if (const auto *C = dyn_cast<ConstantInt>(LHS))
831 0 : if (C->getValue() == 0xffffffff)
832 : std::swap(LHS, RHS);
833 :
834 : if (const auto *C = dyn_cast<ConstantInt>(RHS))
835 1 : if (C->getValue() == 0xffffffff) {
836 : Addr.setShift(0);
837 : Addr.setExtendType(AArch64_AM::LSL);
838 : Addr.setExtendType(AArch64_AM::UXTW);
839 :
840 1 : unsigned Reg = getRegForValue(LHS);
841 1 : if (!Reg)
842 : return false;
843 1 : bool RegIsKill = hasTrivialKill(LHS);
844 2 : Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
845 : AArch64::sub_32);
846 : Addr.setOffsetReg(Reg);
847 1 : return true;
848 : }
849 : break;
850 : }
851 20 : case Instruction::SExt:
852 : case Instruction::ZExt: {
853 20 : if (!Addr.getReg() || Addr.getOffsetReg())
854 : break;
855 :
856 : const Value *Src = nullptr;
857 : // Fold the zext or sext when it won't become a noop.
858 : if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
859 0 : if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
860 : Addr.setExtendType(AArch64_AM::UXTW);
861 : Src = ZE->getOperand(0);
862 : }
863 : } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
864 40 : if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
865 : Addr.setExtendType(AArch64_AM::SXTW);
866 : Src = SE->getOperand(0);
867 : }
868 : }
869 :
870 20 : if (!Src)
871 : break;
872 :
873 : Addr.setShift(0);
874 20 : unsigned Reg = getRegForValue(Src);
875 20 : if (!Reg)
876 : return false;
877 : Addr.setOffsetReg(Reg);
878 20 : return true;
879 : }
880 : } // end switch
881 :
882 628 : if (Addr.isRegBase() && !Addr.getReg()) {
883 594 : unsigned Reg = getRegForValue(Obj);
884 594 : if (!Reg)
885 : return false;
886 : Addr.setReg(Reg);
887 569 : return true;
888 : }
889 :
890 34 : if (!Addr.getOffsetReg()) {
891 34 : unsigned Reg = getRegForValue(Obj);
892 34 : if (!Reg)
893 : return false;
894 : Addr.setOffsetReg(Reg);
895 34 : return true;
896 : }
897 :
898 : return false;
899 : }
900 :
901 103 : bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
902 : const User *U = nullptr;
903 : unsigned Opcode = Instruction::UserOp1;
904 : bool InMBB = true;
905 :
906 : if (const auto *I = dyn_cast<Instruction>(V)) {
907 : Opcode = I->getOpcode();
908 : U = I;
909 14 : InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
910 : } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
911 : Opcode = C->getOpcode();
912 : U = C;
913 : }
914 :
915 117 : switch (Opcode) {
916 : default: break;
917 2 : case Instruction::BitCast:
918 : // Look past bitcasts if its operand is in the same BB.
919 2 : if (InMBB)
920 2 : return computeCallAddress(U->getOperand(0), Addr);
921 : break;
922 12 : case Instruction::IntToPtr:
923 : // Look past no-op inttoptrs if its operand is in the same BB.
924 12 : if (InMBB &&
925 48 : TLI.getValueType(DL, U->getOperand(0)->getType()) ==
926 : TLI.getPointerTy(DL))
927 12 : return computeCallAddress(U->getOperand(0), Addr);
928 : break;
929 0 : case Instruction::PtrToInt:
930 : // Look past no-op ptrtoints if its operand is in the same BB.
931 0 : if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
932 0 : return computeCallAddress(U->getOperand(0), Addr);
933 : break;
934 : }
935 :
936 : if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
937 : Addr.setGlobalValue(GV);
938 84 : return true;
939 : }
940 :
941 : // If all else fails, try to materialize the value in a register.
942 19 : if (!Addr.getGlobalValue()) {
943 19 : Addr.setReg(getRegForValue(V));
944 19 : return Addr.getReg() != 0;
945 : }
946 :
947 : return false;
948 : }
949 :
950 0 : bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
951 0 : EVT evt = TLI.getValueType(DL, Ty, true);
952 :
953 : // Only handle simple types.
954 0 : if (evt == MVT::Other || !evt.isSimple())
955 0 : return false;
956 0 : VT = evt.getSimpleVT();
957 :
958 : // This is a legal type, but it's not something we handle in fast-isel.
959 0 : if (VT == MVT::f128)
960 0 : return false;
961 :
962 : // Handle all other legal types, i.e. a register that will directly hold this
963 : // value.
964 0 : return TLI.isTypeLegal(VT);
965 : }
966 :
967 : /// Determine if the value type is supported by FastISel.
968 : ///
969 : /// FastISel for AArch64 can handle more value types than are legal. This adds
970 : /// simple value type such as i1, i8, and i16.
971 1931 : bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
972 1931 : if (Ty->isVectorTy() && !IsVectorAllowed)
973 : return false;
974 :
975 1925 : if (isTypeLegal(Ty, VT))
976 : return true;
977 :
978 : // If this is a type than can be sign or zero-extended to a basic operation
979 : // go ahead and accept it now.
980 496 : if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
981 473 : return true;
982 :
983 : return false;
984 : }
985 :
986 0 : bool AArch64FastISel::isValueAvailable(const Value *V) const {
987 0 : if (!isa<Instruction>(V))
988 0 : return true;
989 :
990 : const auto *I = cast<Instruction>(V);
991 0 : return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
992 : }
993 :
994 827 : bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
995 : unsigned ScaleFactor = getImplicitScaleFactor(VT);
996 603 : if (!ScaleFactor)
997 : return false;
998 :
999 : bool ImmediateOffsetNeedsLowering = false;
1000 : bool RegisterOffsetNeedsLowering = false;
1001 603 : int64_t Offset = Addr.getOffset();
1002 603 : if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1003 : ImmediateOffsetNeedsLowering = true;
1004 599 : else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1005 105 : !isUInt<12>(Offset / ScaleFactor))
1006 : ImmediateOffsetNeedsLowering = true;
1007 :
1008 : // Cannot encode an offset register and an immediate offset in the same
1009 : // instruction. Fold the immediate offset into the load/store instruction and
1010 : // emit an additional add to take care of the offset register.
1011 593 : if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1012 : RegisterOffsetNeedsLowering = true;
1013 :
1014 : // Cannot encode zero register as base.
1015 603 : if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1016 : RegisterOffsetNeedsLowering = true;
1017 :
1018 : // If this is a stack pointer and the offset needs to be simplified then put
1019 : // the alloca address into a register, set the base type back to register and
1020 : // continue. This should almost never happen.
1021 603 : if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1022 : {
1023 3 : unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1024 6 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
1025 6 : ResultReg)
1026 3 : .addFrameIndex(Addr.getFI())
1027 : .addImm(0)
1028 : .addImm(0);
1029 : Addr.setKind(Address::RegBase);
1030 : Addr.setReg(ResultReg);
1031 : }
1032 :
1033 603 : if (RegisterOffsetNeedsLowering) {
1034 : unsigned ResultReg = 0;
1035 7 : if (Addr.getReg()) {
1036 4 : if (Addr.getExtendType() == AArch64_AM::SXTW ||
1037 : Addr.getExtendType() == AArch64_AM::UXTW )
1038 1 : ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1039 : /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1040 : /*TODO:IsKill=*/false, Addr.getExtendType(),
1041 1 : Addr.getShift());
1042 : else
1043 3 : ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1044 : /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1045 : /*TODO:IsKill=*/false, AArch64_AM::LSL,
1046 3 : Addr.getShift());
1047 : } else {
1048 3 : if (Addr.getExtendType() == AArch64_AM::UXTW)
1049 0 : ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1050 0 : /*Op0IsKill=*/false, Addr.getShift(),
1051 : /*IsZExt=*/true);
1052 3 : else if (Addr.getExtendType() == AArch64_AM::SXTW)
1053 1 : ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1054 1 : /*Op0IsKill=*/false, Addr.getShift(),
1055 : /*IsZExt=*/false);
1056 : else
1057 2 : ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1058 2 : /*Op0IsKill=*/false, Addr.getShift());
1059 : }
1060 7 : if (!ResultReg)
1061 : return false;
1062 :
1063 : Addr.setReg(ResultReg);
1064 : Addr.setOffsetReg(0);
1065 : Addr.setShift(0);
1066 : Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1067 : }
1068 :
1069 : // Since the offset is too large for the load/store instruction get the
1070 : // reg+offset into a register.
1071 603 : if (ImmediateOffsetNeedsLowering) {
1072 : unsigned ResultReg;
1073 10 : if (Addr.getReg())
1074 : // Try to fold the immediate into the add instruction.
1075 10 : ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset);
1076 : else
1077 0 : ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1078 :
1079 10 : if (!ResultReg)
1080 : return false;
1081 : Addr.setReg(ResultReg);
1082 : Addr.setOffset(0);
1083 : }
1084 : return true;
1085 : }
1086 :
1087 601 : void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1088 : const MachineInstrBuilder &MIB,
1089 : MachineMemOperand::Flags Flags,
1090 : unsigned ScaleFactor,
1091 : MachineMemOperand *MMO) {
1092 601 : int64_t Offset = Addr.getOffset() / ScaleFactor;
1093 : // Frame base works a bit differently. Handle it separately.
1094 601 : if (Addr.isFIBase()) {
1095 174 : int FI = Addr.getFI();
1096 : // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
1097 : // and alignment should be based on the VT.
1098 174 : MMO = FuncInfo.MF->getMachineMemOperand(
1099 174 : MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1100 174 : MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
1101 : // Now add the rest of the operands.
1102 : MIB.addFrameIndex(FI).addImm(Offset);
1103 : } else {
1104 : assert(Addr.isRegBase() && "Unexpected address kind.");
1105 427 : const MCInstrDesc &II = MIB->getDesc();
1106 427 : unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1107 427 : Addr.setReg(
1108 427 : constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1109 427 : Addr.setOffsetReg(
1110 854 : constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1111 427 : if (Addr.getOffsetReg()) {
1112 : assert(Addr.getOffset() == 0 && "Unexpected offset");
1113 84 : bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1114 : Addr.getExtendType() == AArch64_AM::SXTX;
1115 84 : MIB.addReg(Addr.getReg());
1116 84 : MIB.addReg(Addr.getOffsetReg());
1117 84 : MIB.addImm(IsSigned);
1118 84 : MIB.addImm(Addr.getShift() != 0);
1119 : } else
1120 343 : MIB.addReg(Addr.getReg()).addImm(Offset);
1121 : }
1122 :
1123 601 : if (MMO)
1124 : MIB.addMemOperand(MMO);
1125 601 : }
1126 :
1127 308 : unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1128 : const Value *RHS, bool SetFlags,
1129 : bool WantResult, bool IsZExt) {
1130 : AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
1131 : bool NeedExtend = false;
1132 308 : switch (RetVT.SimpleTy) {
1133 : default:
1134 : return 0;
1135 2 : case MVT::i1:
1136 : NeedExtend = true;
1137 2 : break;
1138 3 : case MVT::i8:
1139 : NeedExtend = true;
1140 3 : ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1141 : break;
1142 5 : case MVT::i16:
1143 : NeedExtend = true;
1144 5 : ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1145 : break;
1146 : case MVT::i32: // fall-through
1147 : case MVT::i64:
1148 : break;
1149 : }
1150 308 : MVT SrcVT = RetVT;
1151 308 : RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1152 :
1153 : // Canonicalize immediates to the RHS first.
1154 308 : if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1155 : std::swap(LHS, RHS);
1156 :
1157 : // Canonicalize mul by power of 2 to the RHS.
1158 540 : if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1159 197 : if (isMulPowOf2(LHS))
1160 : std::swap(LHS, RHS);
1161 :
1162 : // Canonicalize shift immediate to the RHS.
1163 540 : if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1164 : if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1165 79 : if (isa<ConstantInt>(SI->getOperand(1)))
1166 4 : if (SI->getOpcode() == Instruction::Shl ||
1167 7 : SI->getOpcode() == Instruction::LShr ||
1168 : SI->getOpcode() == Instruction::AShr )
1169 : std::swap(LHS, RHS);
1170 :
1171 308 : unsigned LHSReg = getRegForValue(LHS);
1172 308 : if (!LHSReg)
1173 : return 0;
1174 308 : bool LHSIsKill = hasTrivialKill(LHS);
1175 :
1176 308 : if (NeedExtend)
1177 10 : LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1178 :
1179 : unsigned ResultReg = 0;
1180 : if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1181 113 : uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1182 57 : if (C->isNegative())
1183 8 : ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
1184 : SetFlags, WantResult);
1185 : else
1186 49 : ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
1187 : WantResult);
1188 : } else if (const auto *C = dyn_cast<Constant>(RHS))
1189 2 : if (C->isNullValue())
1190 2 : ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags,
1191 : WantResult);
1192 :
1193 59 : if (ResultReg)
1194 : return ResultReg;
1195 :
1196 : // Only extend the RHS within the instruction if there is a valid extend type.
1197 264 : if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1198 6 : isValueAvailable(RHS)) {
1199 : if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1200 : if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1201 0 : if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1202 0 : unsigned RHSReg = getRegForValue(SI->getOperand(0));
1203 0 : if (!RHSReg)
1204 : return 0;
1205 0 : bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1206 0 : return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1207 : RHSIsKill, ExtendType, C->getZExtValue(),
1208 0 : SetFlags, WantResult);
1209 : }
1210 6 : unsigned RHSReg = getRegForValue(RHS);
1211 6 : if (!RHSReg)
1212 : return 0;
1213 6 : bool RHSIsKill = hasTrivialKill(RHS);
1214 6 : return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1215 6 : ExtendType, 0, SetFlags, WantResult);
1216 : }
1217 :
1218 : // Check if the mul can be folded into the instruction.
1219 246 : if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1220 208 : if (isMulPowOf2(RHS)) {
1221 0 : const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1222 : const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1223 :
1224 : if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1225 0 : if (C->getValue().isPowerOf2())
1226 : std::swap(MulLHS, MulRHS);
1227 :
1228 : assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1229 0 : uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1230 0 : unsigned RHSReg = getRegForValue(MulLHS);
1231 0 : if (!RHSReg)
1232 : return 0;
1233 0 : bool RHSIsKill = hasTrivialKill(MulLHS);
1234 0 : ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1235 : RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags,
1236 : WantResult);
1237 0 : if (ResultReg)
1238 : return ResultReg;
1239 : }
1240 : }
1241 :
1242 : // Check if the shift can be folded into the instruction.
1243 246 : if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1244 : if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1245 : if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1246 : AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
1247 : switch (SI->getOpcode()) {
1248 : default: break;
1249 : case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
1250 : case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1251 : case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1252 : }
1253 : uint64_t ShiftVal = C->getZExtValue();
1254 12 : if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1255 12 : unsigned RHSReg = getRegForValue(SI->getOperand(0));
1256 12 : if (!RHSReg)
1257 : return 0;
1258 12 : bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1259 12 : ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1260 : RHSIsKill, ShiftType, ShiftVal, SetFlags,
1261 : WantResult);
1262 12 : if (ResultReg)
1263 : return ResultReg;
1264 : }
1265 : }
1266 : }
1267 : }
1268 :
1269 236 : unsigned RHSReg = getRegForValue(RHS);
1270 236 : if (!RHSReg)
1271 : return 0;
1272 236 : bool RHSIsKill = hasTrivialKill(RHS);
1273 :
1274 236 : if (NeedExtend)
1275 1 : RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1276 :
1277 236 : return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1278 236 : SetFlags, WantResult);
1279 : }
1280 :
1281 245 : unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1282 : bool LHSIsKill, unsigned RHSReg,
1283 : bool RHSIsKill, bool SetFlags,
1284 : bool WantResult) {
1285 : assert(LHSReg && RHSReg && "Invalid register number.");
1286 :
1287 245 : if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1288 243 : RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1289 : return 0;
1290 :
1291 243 : if (RetVT != MVT::i32 && RetVT != MVT::i64)
1292 : return 0;
1293 :
1294 : static const unsigned OpcTable[2][2][2] = {
1295 : { { AArch64::SUBWrr, AArch64::SUBXrr },
1296 : { AArch64::ADDWrr, AArch64::ADDXrr } },
1297 : { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1298 : { AArch64::ADDSWrr, AArch64::ADDSXrr } }
1299 : };
1300 : bool Is64Bit = RetVT == MVT::i64;
1301 243 : unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1302 : const TargetRegisterClass *RC =
1303 243 : Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1304 : unsigned ResultReg;
1305 243 : if (WantResult)
1306 200 : ResultReg = createResultReg(RC);
1307 : else
1308 43 : ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1309 :
1310 243 : const MCInstrDesc &II = TII.get(Opc);
1311 486 : LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1312 486 : RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1313 243 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1314 243 : .addReg(LHSReg, getKillRegState(LHSIsKill))
1315 243 : .addReg(RHSReg, getKillRegState(RHSIsKill));
1316 243 : return ResultReg;
1317 : }
1318 :
1319 104 : unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1320 : bool LHSIsKill, uint64_t Imm,
1321 : bool SetFlags, bool WantResult) {
1322 : assert(LHSReg && "Invalid register number.");
1323 :
1324 104 : if (RetVT != MVT::i32 && RetVT != MVT::i64)
1325 : return 0;
1326 :
1327 : unsigned ShiftImm;
1328 104 : if (isUInt<12>(Imm))
1329 : ShiftImm = 0;
1330 12 : else if ((Imm & 0xfff000) == Imm) {
1331 : ShiftImm = 12;
1332 4 : Imm >>= 12;
1333 : } else
1334 : return 0;
1335 :
1336 : static const unsigned OpcTable[2][2][2] = {
1337 : { { AArch64::SUBWri, AArch64::SUBXri },
1338 : { AArch64::ADDWri, AArch64::ADDXri } },
1339 : { { AArch64::SUBSWri, AArch64::SUBSXri },
1340 : { AArch64::ADDSWri, AArch64::ADDSXri } }
1341 : };
1342 : bool Is64Bit = RetVT == MVT::i64;
1343 96 : unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1344 : const TargetRegisterClass *RC;
1345 96 : if (SetFlags)
1346 32 : RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1347 : else
1348 64 : RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1349 : unsigned ResultReg;
1350 96 : if (WantResult)
1351 69 : ResultReg = createResultReg(RC);
1352 : else
1353 27 : ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1354 :
1355 96 : const MCInstrDesc &II = TII.get(Opc);
1356 192 : LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1357 96 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1358 96 : .addReg(LHSReg, getKillRegState(LHSIsKill))
1359 96 : .addImm(Imm)
1360 96 : .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1361 96 : return ResultReg;
1362 : }
1363 :
1364 28 : unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1365 : bool LHSIsKill, unsigned RHSReg,
1366 : bool RHSIsKill,
1367 : AArch64_AM::ShiftExtendType ShiftType,
1368 : uint64_t ShiftImm, bool SetFlags,
1369 : bool WantResult) {
1370 : assert(LHSReg && RHSReg && "Invalid register number.");
1371 : assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1372 : RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1373 :
1374 28 : if (RetVT != MVT::i32 && RetVT != MVT::i64)
1375 : return 0;
1376 :
1377 : // Don't deal with undefined shifts.
1378 28 : if (ShiftImm >= RetVT.getSizeInBits())
1379 : return 0;
1380 :
1381 : static const unsigned OpcTable[2][2][2] = {
1382 : { { AArch64::SUBWrs, AArch64::SUBXrs },
1383 : { AArch64::ADDWrs, AArch64::ADDXrs } },
1384 : { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1385 : { AArch64::ADDSWrs, AArch64::ADDSXrs } }
1386 : };
1387 : bool Is64Bit = RetVT == MVT::i64;
1388 26 : unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1389 : const TargetRegisterClass *RC =
1390 26 : Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1391 : unsigned ResultReg;
1392 26 : if (WantResult)
1393 17 : ResultReg = createResultReg(RC);
1394 : else
1395 9 : ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1396 :
1397 26 : const MCInstrDesc &II = TII.get(Opc);
1398 52 : LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1399 52 : RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1400 26 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1401 26 : .addReg(LHSReg, getKillRegState(LHSIsKill))
1402 26 : .addReg(RHSReg, getKillRegState(RHSIsKill))
1403 26 : .addImm(getShifterImm(ShiftType, ShiftImm));
1404 26 : return ResultReg;
1405 : }
1406 :
1407 7 : unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1408 : bool LHSIsKill, unsigned RHSReg,
1409 : bool RHSIsKill,
1410 : AArch64_AM::ShiftExtendType ExtType,
1411 : uint64_t ShiftImm, bool SetFlags,
1412 : bool WantResult) {
1413 : assert(LHSReg && RHSReg && "Invalid register number.");
1414 : assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1415 : RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1416 :
1417 7 : if (RetVT != MVT::i32 && RetVT != MVT::i64)
1418 : return 0;
1419 :
1420 7 : if (ShiftImm >= 4)
1421 : return 0;
1422 :
1423 : static const unsigned OpcTable[2][2][2] = {
1424 : { { AArch64::SUBWrx, AArch64::SUBXrx },
1425 : { AArch64::ADDWrx, AArch64::ADDXrx } },
1426 : { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1427 : { AArch64::ADDSWrx, AArch64::ADDSXrx } }
1428 : };
1429 : bool Is64Bit = RetVT == MVT::i64;
1430 7 : unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1431 : const TargetRegisterClass *RC = nullptr;
1432 7 : if (SetFlags)
1433 5 : RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1434 : else
1435 2 : RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1436 : unsigned ResultReg;
1437 7 : if (WantResult)
1438 2 : ResultReg = createResultReg(RC);
1439 : else
1440 5 : ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1441 :
1442 7 : const MCInstrDesc &II = TII.get(Opc);
1443 14 : LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1444 14 : RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1445 7 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1446 7 : .addReg(LHSReg, getKillRegState(LHSIsKill))
1447 7 : .addReg(RHSReg, getKillRegState(RHSIsKill))
1448 7 : .addImm(getArithExtendImm(ExtType, ShiftImm));
1449 7 : return ResultReg;
1450 : }
1451 :
1452 107 : bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1453 107 : Type *Ty = LHS->getType();
1454 107 : EVT EVT = TLI.getValueType(DL, Ty, true);
1455 107 : if (!EVT.isSimple())
1456 : return false;
1457 : MVT VT = EVT.getSimpleVT();
1458 :
1459 107 : switch (VT.SimpleTy) {
1460 : default:
1461 : return false;
1462 60 : case MVT::i1:
1463 : case MVT::i8:
1464 : case MVT::i16:
1465 : case MVT::i32:
1466 : case MVT::i64:
1467 120 : return emitICmp(VT, LHS, RHS, IsZExt);
1468 47 : case MVT::f32:
1469 : case MVT::f64:
1470 47 : return emitFCmp(VT, LHS, RHS);
1471 : }
1472 : }
1473 :
1474 : bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1475 : bool IsZExt) {
1476 60 : return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1477 60 : IsZExt) != 0;
1478 : }
1479 :
1480 : bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1481 : uint64_t Imm) {
1482 11 : return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
1483 : /*SetFlags=*/true, /*WantResult=*/false) != 0;
1484 : }
1485 :
1486 47 : bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1487 47 : if (RetVT != MVT::f32 && RetVT != MVT::f64)
1488 : return false;
1489 :
1490 : // Check to see if the 2nd operand is a constant that we can encode directly
1491 : // in the compare.
1492 : bool UseImm = false;
1493 : if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1494 4 : if (CFP->isZero() && !CFP->isNegative())
1495 : UseImm = true;
1496 :
1497 47 : unsigned LHSReg = getRegForValue(LHS);
1498 47 : if (!LHSReg)
1499 : return false;
1500 47 : bool LHSIsKill = hasTrivialKill(LHS);
1501 :
1502 47 : if (UseImm) {
1503 2 : unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1504 4 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1505 2 : .addReg(LHSReg, getKillRegState(LHSIsKill));
1506 2 : return true;
1507 : }
1508 :
1509 45 : unsigned RHSReg = getRegForValue(RHS);
1510 45 : if (!RHSReg)
1511 : return false;
1512 45 : bool RHSIsKill = hasTrivialKill(RHS);
1513 :
1514 45 : unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1515 90 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1516 45 : .addReg(LHSReg, getKillRegState(LHSIsKill))
1517 45 : .addReg(RHSReg, getKillRegState(RHSIsKill));
1518 45 : return true;
1519 : }
1520 :
1521 : unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1522 : bool SetFlags, bool WantResult, bool IsZExt) {
1523 210 : return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1524 : IsZExt);
1525 : }
1526 :
1527 : /// This method is a wrapper to simplify add emission.
1528 : ///
1529 : /// First try to emit an add with an immediate operand using emitAddSub_ri. If
1530 : /// that fails, then try to materialize the immediate into a register and use
1531 : /// emitAddSub_rr instead.
1532 34 : unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill,
1533 : int64_t Imm) {
1534 : unsigned ResultReg;
1535 34 : if (Imm < 0)
1536 2 : ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm);
1537 : else
1538 32 : ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm);
1539 :
1540 34 : if (ResultReg)
1541 : return ResultReg;
1542 :
1543 5 : unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1544 5 : if (!CReg)
1545 : return 0;
1546 :
1547 5 : ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true);
1548 5 : return ResultReg;
1549 : }
1550 :
1551 : unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1552 : bool SetFlags, bool WantResult, bool IsZExt) {
1553 63 : return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1554 : IsZExt);
1555 : }
1556 :
1557 : unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1558 : bool LHSIsKill, unsigned RHSReg,
1559 : bool RHSIsKill, bool WantResult) {
1560 4 : return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1561 : RHSIsKill, /*SetFlags=*/true, WantResult);
1562 : }
1563 :
1564 : unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1565 : bool LHSIsKill, unsigned RHSReg,
1566 : bool RHSIsKill,
1567 : AArch64_AM::ShiftExtendType ShiftType,
1568 : uint64_t ShiftImm, bool WantResult) {
1569 9 : return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1570 : RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
1571 : WantResult);
1572 : }
1573 :
1574 89 : unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1575 : const Value *LHS, const Value *RHS) {
1576 : // Canonicalize immediates to the RHS first.
1577 89 : if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1578 : std::swap(LHS, RHS);
1579 :
1580 : // Canonicalize mul by power-of-2 to the RHS.
1581 89 : if (LHS->hasOneUse() && isValueAvailable(LHS))
1582 89 : if (isMulPowOf2(LHS))
1583 : std::swap(LHS, RHS);
1584 :
1585 : // Canonicalize shift immediate to the RHS.
1586 89 : if (LHS->hasOneUse() && isValueAvailable(LHS))
1587 : if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1588 0 : if (isa<ConstantInt>(SI->getOperand(1)))
1589 : std::swap(LHS, RHS);
1590 :
1591 89 : unsigned LHSReg = getRegForValue(LHS);
1592 89 : if (!LHSReg)
1593 : return 0;
1594 89 : bool LHSIsKill = hasTrivialKill(LHS);
1595 :
1596 : unsigned ResultReg = 0;
1597 : if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1598 : uint64_t Imm = C->getZExtValue();
1599 18 : ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
1600 : }
1601 18 : if (ResultReg)
1602 : return ResultReg;
1603 :
1604 : // Check if the mul can be folded into the instruction.
1605 71 : if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1606 71 : if (isMulPowOf2(RHS)) {
1607 6 : const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1608 : const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1609 :
1610 : if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1611 0 : if (C->getValue().isPowerOf2())
1612 : std::swap(MulLHS, MulRHS);
1613 :
1614 : assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1615 6 : uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1616 :
1617 6 : unsigned RHSReg = getRegForValue(MulLHS);
1618 6 : if (!RHSReg)
1619 : return 0;
1620 6 : bool RHSIsKill = hasTrivialKill(MulLHS);
1621 6 : ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1622 : RHSIsKill, ShiftVal);
1623 6 : if (ResultReg)
1624 : return ResultReg;
1625 : }
1626 : }
1627 :
1628 : // Check if the shift can be folded into the instruction.
1629 65 : if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1630 : if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1631 24 : if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1632 : uint64_t ShiftVal = C->getZExtValue();
1633 24 : unsigned RHSReg = getRegForValue(SI->getOperand(0));
1634 24 : if (!RHSReg)
1635 : return 0;
1636 24 : bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1637 24 : ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1638 : RHSIsKill, ShiftVal);
1639 24 : if (ResultReg)
1640 : return ResultReg;
1641 : }
1642 : }
1643 :
1644 53 : unsigned RHSReg = getRegForValue(RHS);
1645 53 : if (!RHSReg)
1646 : return 0;
1647 53 : bool RHSIsKill = hasTrivialKill(RHS);
1648 :
1649 61 : MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1650 53 : ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1651 53 : if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1652 12 : uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1653 12 : ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1654 : }
1655 : return ResultReg;
1656 : }
1657 :
1658 203 : unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1659 : unsigned LHSReg, bool LHSIsKill,
1660 : uint64_t Imm) {
1661 : static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1662 : "ISD nodes are not consecutive!");
1663 : static const unsigned OpcTable[3][2] = {
1664 : { AArch64::ANDWri, AArch64::ANDXri },
1665 : { AArch64::ORRWri, AArch64::ORRXri },
1666 : { AArch64::EORWri, AArch64::EORXri }
1667 : };
1668 : const TargetRegisterClass *RC;
1669 : unsigned Opc;
1670 : unsigned RegSize;
1671 203 : switch (RetVT.SimpleTy) {
1672 : default:
1673 : return 0;
1674 197 : case MVT::i1:
1675 : case MVT::i8:
1676 : case MVT::i16:
1677 : case MVT::i32: {
1678 197 : unsigned Idx = ISDOpc - ISD::AND;
1679 197 : Opc = OpcTable[Idx][0];
1680 : RC = &AArch64::GPR32spRegClass;
1681 : RegSize = 32;
1682 197 : break;
1683 : }
1684 6 : case MVT::i64:
1685 6 : Opc = OpcTable[ISDOpc - ISD::AND][1];
1686 : RC = &AArch64::GPR64spRegClass;
1687 : RegSize = 64;
1688 6 : break;
1689 : }
1690 :
1691 203 : if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1692 : return 0;
1693 :
1694 : unsigned ResultReg =
1695 203 : fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
1696 : AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1697 203 : if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1698 4 : uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1699 4 : ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1700 : }
1701 : return ResultReg;
1702 : }
1703 :
1704 30 : unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1705 : unsigned LHSReg, bool LHSIsKill,
1706 : unsigned RHSReg, bool RHSIsKill,
1707 : uint64_t ShiftImm) {
1708 : static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1709 : "ISD nodes are not consecutive!");
1710 : static const unsigned OpcTable[3][2] = {
1711 : { AArch64::ANDWrs, AArch64::ANDXrs },
1712 : { AArch64::ORRWrs, AArch64::ORRXrs },
1713 : { AArch64::EORWrs, AArch64::EORXrs }
1714 : };
1715 :
1716 : // Don't deal with undefined shifts.
1717 30 : if (ShiftImm >= RetVT.getSizeInBits())
1718 : return 0;
1719 :
1720 : const TargetRegisterClass *RC;
1721 : unsigned Opc;
1722 18 : switch (RetVT.SimpleTy) {
1723 : default:
1724 : return 0;
1725 12 : case MVT::i1:
1726 : case MVT::i8:
1727 : case MVT::i16:
1728 : case MVT::i32:
1729 12 : Opc = OpcTable[ISDOpc - ISD::AND][0];
1730 : RC = &AArch64::GPR32RegClass;
1731 12 : break;
1732 6 : case MVT::i64:
1733 6 : Opc = OpcTable[ISDOpc - ISD::AND][1];
1734 : RC = &AArch64::GPR64RegClass;
1735 6 : break;
1736 : }
1737 : unsigned ResultReg =
1738 18 : fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1739 18 : AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
1740 18 : if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1741 6 : uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1742 6 : ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1743 : }
1744 : return ResultReg;
1745 : }
1746 :
1747 4 : unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1748 : uint64_t Imm) {
1749 170 : return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
1750 : }
1751 :
1752 384 : unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1753 : bool WantZExt, MachineMemOperand *MMO) {
1754 768 : if (!TLI.allowsMisalignedMemoryAccesses(VT))
1755 : return 0;
1756 :
1757 : // Simplify this down to something we can handle.
1758 382 : if (!simplifyAddress(Addr, VT))
1759 : return 0;
1760 :
1761 : unsigned ScaleFactor = getImplicitScaleFactor(VT);
1762 310 : if (!ScaleFactor)
1763 0 : llvm_unreachable("Unexpected value type.");
1764 :
1765 : // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1766 : // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1767 : bool UseScaled = true;
1768 310 : if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1769 : UseScaled = false;
1770 : ScaleFactor = 1;
1771 : }
1772 :
1773 : static const unsigned GPOpcTable[2][8][4] = {
1774 : // Sign-extend.
1775 : { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1776 : AArch64::LDURXi },
1777 : { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
1778 : AArch64::LDURXi },
1779 : { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
1780 : AArch64::LDRXui },
1781 : { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1782 : AArch64::LDRXui },
1783 : { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1784 : AArch64::LDRXroX },
1785 : { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1786 : AArch64::LDRXroX },
1787 : { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1788 : AArch64::LDRXroW },
1789 : { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1790 : AArch64::LDRXroW }
1791 : },
1792 : // Zero-extend.
1793 : { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1794 : AArch64::LDURXi },
1795 : { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1796 : AArch64::LDURXi },
1797 : { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1798 : AArch64::LDRXui },
1799 : { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1800 : AArch64::LDRXui },
1801 : { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1802 : AArch64::LDRXroX },
1803 : { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1804 : AArch64::LDRXroX },
1805 : { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1806 : AArch64::LDRXroW },
1807 : { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1808 : AArch64::LDRXroW }
1809 : }
1810 : };
1811 :
1812 : static const unsigned FPOpcTable[4][2] = {
1813 : { AArch64::LDURSi, AArch64::LDURDi },
1814 : { AArch64::LDRSui, AArch64::LDRDui },
1815 : { AArch64::LDRSroX, AArch64::LDRDroX },
1816 : { AArch64::LDRSroW, AArch64::LDRDroW }
1817 : };
1818 :
1819 : unsigned Opc;
1820 : const TargetRegisterClass *RC;
1821 310 : bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1822 181 : Addr.getOffsetReg();
1823 229 : unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1824 310 : if (Addr.getExtendType() == AArch64_AM::UXTW ||
1825 : Addr.getExtendType() == AArch64_AM::SXTW)
1826 39 : Idx++;
1827 :
1828 : bool IsRet64Bit = RetVT == MVT::i64;
1829 310 : switch (VT.SimpleTy) {
1830 0 : default:
1831 0 : llvm_unreachable("Unexpected value type.");
1832 53 : case MVT::i1: // Intentional fall-through.
1833 : case MVT::i8:
1834 53 : Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1835 53 : RC = (IsRet64Bit && !WantZExt) ?
1836 : &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1837 : break;
1838 42 : case MVT::i16:
1839 42 : Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1840 42 : RC = (IsRet64Bit && !WantZExt) ?
1841 : &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1842 : break;
1843 108 : case MVT::i32:
1844 108 : Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1845 108 : RC = (IsRet64Bit && !WantZExt) ?
1846 : &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1847 : break;
1848 94 : case MVT::i64:
1849 94 : Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1850 : RC = &AArch64::GPR64RegClass;
1851 94 : break;
1852 3 : case MVT::f32:
1853 3 : Opc = FPOpcTable[Idx][0];
1854 : RC = &AArch64::FPR32RegClass;
1855 3 : break;
1856 10 : case MVT::f64:
1857 10 : Opc = FPOpcTable[Idx][1];
1858 : RC = &AArch64::FPR64RegClass;
1859 10 : break;
1860 : }
1861 :
1862 : // Create the base instruction, then add the operands.
1863 310 : unsigned ResultReg = createResultReg(RC);
1864 310 : MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1865 620 : TII.get(Opc), ResultReg);
1866 310 : addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1867 :
1868 : // Loading an i1 requires special handling.
1869 310 : if (VT == MVT::i1) {
1870 3 : unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
1871 : assert(ANDReg && "Unexpected AND instruction emission failure.");
1872 : ResultReg = ANDReg;
1873 : }
1874 :
1875 : // For zero-extending loads to 64bit we emit a 32bit load and then convert
1876 : // the 32bit reg to a 64bit reg.
1877 310 : if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1878 20 : unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
1879 20 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1880 40 : TII.get(AArch64::SUBREG_TO_REG), Reg64)
1881 : .addImm(0)
1882 20 : .addReg(ResultReg, getKillRegState(true))
1883 : .addImm(AArch64::sub_32);
1884 : ResultReg = Reg64;
1885 : }
1886 : return ResultReg;
1887 : }
1888 :
1889 284 : bool AArch64FastISel::selectAddSub(const Instruction *I) {
1890 284 : MVT VT;
1891 284 : if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1892 : return false;
1893 :
1894 568 : if (VT.isVector())
1895 142 : return selectOperator(I, I->getOpcode());
1896 :
1897 : unsigned ResultReg;
1898 213 : switch (I->getOpcode()) {
1899 0 : default:
1900 0 : llvm_unreachable("Unexpected instruction.");
1901 210 : case Instruction::Add:
1902 210 : ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1903 210 : break;
1904 3 : case Instruction::Sub:
1905 3 : ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1906 3 : break;
1907 : }
1908 213 : if (!ResultReg)
1909 : return false;
1910 :
1911 213 : updateValueMap(I, ResultReg);
1912 213 : return true;
1913 : }
1914 :
1915 89 : bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1916 89 : MVT VT;
1917 89 : if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1918 : return false;
1919 :
1920 178 : if (VT.isVector())
1921 0 : return selectOperator(I, I->getOpcode());
1922 :
1923 : unsigned ResultReg;
1924 89 : switch (I->getOpcode()) {
1925 0 : default:
1926 0 : llvm_unreachable("Unexpected instruction.");
1927 50 : case Instruction::And:
1928 100 : ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1929 50 : break;
1930 20 : case Instruction::Or:
1931 40 : ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1932 20 : break;
1933 19 : case Instruction::Xor:
1934 38 : ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1935 19 : break;
1936 : }
1937 89 : if (!ResultReg)
1938 : return false;
1939 :
1940 89 : updateValueMap(I, ResultReg);
1941 89 : return true;
1942 : }
1943 :
1944 375 : bool AArch64FastISel::selectLoad(const Instruction *I) {
1945 375 : MVT VT;
1946 : // Verify we have a legal type before going any further. Currently, we handle
1947 : // simple types that will directly fit in a register (i32/f32/i64/f64) or
1948 : // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1949 745 : if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1950 370 : cast<LoadInst>(I)->isAtomic())
1951 5 : return false;
1952 :
1953 370 : const Value *SV = I->getOperand(0);
1954 370 : if (TLI.supportSwiftError()) {
1955 : // Swifterror values can come from either a function parameter with
1956 : // swifterror attribute or an alloca with swifterror attribute.
1957 : if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1958 103 : if (Arg->hasSwiftErrorAttr())
1959 : return false;
1960 : }
1961 :
1962 : if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1963 65 : if (Alloca->isSwiftError())
1964 : return false;
1965 : }
1966 : }
1967 :
1968 : // See if we can handle this address.
1969 : Address Addr;
1970 730 : if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1971 : return false;
1972 :
1973 : // Fold the following sign-/zero-extend into the load instruction.
1974 : bool WantZExt = true;
1975 343 : MVT RetVT = VT;
1976 : const Value *IntExtVal = nullptr;
1977 343 : if (I->hasOneUse()) {
1978 249 : if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1979 37 : if (isTypeSupported(ZE->getType(), RetVT))
1980 : IntExtVal = ZE;
1981 : else
1982 0 : RetVT = VT;
1983 : } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1984 38 : if (isTypeSupported(SE->getType(), RetVT))
1985 : IntExtVal = SE;
1986 : else
1987 0 : RetVT = VT;
1988 : WantZExt = false;
1989 : }
1990 : }
1991 :
1992 : unsigned ResultReg =
1993 343 : emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1994 343 : if (!ResultReg)
1995 : return false;
1996 :
1997 : // There are a few different cases we have to handle, because the load or the
1998 : // sign-/zero-extend might not be selected by FastISel if we fall-back to
1999 : // SelectionDAG. There is also an ordering issue when both instructions are in
2000 : // different basic blocks.
2001 : // 1.) The load instruction is selected by FastISel, but the integer extend
2002 : // not. This usually happens when the integer extend is in a different
2003 : // basic block and SelectionDAG took over for that basic block.
2004 : // 2.) The load instruction is selected before the integer extend. This only
2005 : // happens when the integer extend is in a different basic block.
2006 : // 3.) The load instruction is selected by SelectionDAG and the integer extend
2007 : // by FastISel. This happens if there are instructions between the load
2008 : // and the integer extend that couldn't be selected by FastISel.
2009 269 : if (IntExtVal) {
2010 : // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2011 : // could select it. Emit a copy to subreg if necessary. FastISel will remove
2012 : // it when it selects the integer extend.
2013 75 : unsigned Reg = lookUpRegForValue(IntExtVal);
2014 75 : auto *MI = MRI.getUniqueVRegDef(Reg);
2015 75 : if (!MI) {
2016 1 : if (RetVT == MVT::i64 && VT <= MVT::i32) {
2017 1 : if (WantZExt) {
2018 : // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2019 1 : std::prev(FuncInfo.InsertPt)->eraseFromParent();
2020 1 : ResultReg = std::prev(FuncInfo.InsertPt)->getOperand(0).getReg();
2021 : } else
2022 0 : ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2023 : /*IsKill=*/true,
2024 : AArch64::sub_32);
2025 : }
2026 1 : updateValueMap(I, ResultReg);
2027 1 : return true;
2028 : }
2029 :
2030 : // The integer extend has already been emitted - delete all the instructions
2031 : // that have been emitted by the integer extend lowering code and use the
2032 : // result from the load instruction directly.
2033 186 : while (MI) {
2034 : Reg = 0;
2035 150 : for (auto &Opnd : MI->uses()) {
2036 150 : if (Opnd.isReg()) {
2037 112 : Reg = Opnd.getReg();
2038 112 : break;
2039 : }
2040 : }
2041 112 : MI->eraseFromParent();
2042 : MI = nullptr;
2043 112 : if (Reg)
2044 112 : MI = MRI.getUniqueVRegDef(Reg);
2045 : }
2046 74 : updateValueMap(IntExtVal, ResultReg);
2047 74 : return true;
2048 : }
2049 :
2050 194 : updateValueMap(I, ResultReg);
2051 194 : return true;
2052 : }
2053 :
2054 16 : bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2055 : unsigned AddrReg,
2056 : MachineMemOperand *MMO) {
2057 : unsigned Opc;
2058 16 : switch (VT.SimpleTy) {
2059 : default: return false;
2060 : case MVT::i8: Opc = AArch64::STLRB; break;
2061 : case MVT::i16: Opc = AArch64::STLRH; break;
2062 : case MVT::i32: Opc = AArch64::STLRW; break;
2063 : case MVT::i64: Opc = AArch64::STLRX; break;
2064 : }
2065 :
2066 16 : const MCInstrDesc &II = TII.get(Opc);
2067 16 : SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2068 16 : AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2069 16 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2070 16 : .addReg(SrcReg)
2071 16 : .addReg(AddrReg)
2072 : .addMemOperand(MMO);
2073 16 : return true;
2074 : }
2075 :
2076 445 : bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2077 : MachineMemOperand *MMO) {
2078 890 : if (!TLI.allowsMisalignedMemoryAccesses(VT))
2079 : return false;
2080 :
2081 : // Simplify this down to something we can handle.
2082 445 : if (!simplifyAddress(Addr, VT))
2083 : return false;
2084 :
2085 : unsigned ScaleFactor = getImplicitScaleFactor(VT);
2086 291 : if (!ScaleFactor)
2087 0 : llvm_unreachable("Unexpected value type.");
2088 :
2089 : // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2090 : // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2091 : bool UseScaled = true;
2092 291 : if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2093 : UseScaled = false;
2094 : ScaleFactor = 1;
2095 : }
2096 :
2097 : static const unsigned OpcTable[4][6] = {
2098 : { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
2099 : AArch64::STURSi, AArch64::STURDi },
2100 : { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
2101 : AArch64::STRSui, AArch64::STRDui },
2102 : { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2103 : AArch64::STRSroX, AArch64::STRDroX },
2104 : { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2105 : AArch64::STRSroW, AArch64::STRDroW }
2106 : };
2107 :
2108 : unsigned Opc;
2109 : bool VTIsi1 = false;
2110 291 : bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2111 133 : Addr.getOffsetReg();
2112 288 : unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2113 291 : if (Addr.getExtendType() == AArch64_AM::UXTW ||
2114 : Addr.getExtendType() == AArch64_AM::SXTW)
2115 0 : Idx++;
2116 :
2117 291 : switch (VT.SimpleTy) {
2118 0 : default: llvm_unreachable("Unexpected value type.");
2119 8 : case MVT::i1: VTIsi1 = true; LLVM_FALLTHROUGH;
2120 50 : case MVT::i8: Opc = OpcTable[Idx][0]; break;
2121 21 : case MVT::i16: Opc = OpcTable[Idx][1]; break;
2122 100 : case MVT::i32: Opc = OpcTable[Idx][2]; break;
2123 101 : case MVT::i64: Opc = OpcTable[Idx][3]; break;
2124 2 : case MVT::f32: Opc = OpcTable[Idx][4]; break;
2125 17 : case MVT::f64: Opc = OpcTable[Idx][5]; break;
2126 : }
2127 :
2128 : // Storing an i1 requires special handling.
2129 291 : if (VTIsi1 && SrcReg != AArch64::WZR) {
2130 7 : unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
2131 : assert(ANDReg && "Unexpected AND instruction emission failure.");
2132 : SrcReg = ANDReg;
2133 : }
2134 : // Create the base instruction, then add the operands.
2135 291 : const MCInstrDesc &II = TII.get(Opc);
2136 582 : SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2137 : MachineInstrBuilder MIB =
2138 291 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
2139 291 : addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2140 :
2141 291 : return true;
2142 : }
2143 :
2144 421 : bool AArch64FastISel::selectStore(const Instruction *I) {
2145 421 : MVT VT;
2146 421 : const Value *Op0 = I->getOperand(0);
2147 : // Verify we have a legal type before going any further. Currently, we handle
2148 : // simple types that will directly fit in a register (i32/f32/i64/f64) or
2149 : // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2150 421 : if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2151 : return false;
2152 :
2153 : const Value *PtrV = I->getOperand(1);
2154 404 : if (TLI.supportSwiftError()) {
2155 : // Swifterror values can come from either a function parameter with
2156 : // swifterror attribute or an alloca with swifterror attribute.
2157 : if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2158 228 : if (Arg->hasSwiftErrorAttr())
2159 : return false;
2160 : }
2161 :
2162 : if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2163 98 : if (Alloca->isSwiftError())
2164 : return false;
2165 : }
2166 : }
2167 :
2168 : // Get the value to be stored into a register. Use the zero register directly
2169 : // when possible to avoid an unnecessary copy and a wasted register.
2170 : unsigned SrcReg = 0;
2171 : if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2172 55 : if (CI->isZero())
2173 36 : SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2174 : } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2175 2 : if (CF->isZero() && !CF->isNegative()) {
2176 2 : VT = MVT::getIntegerVT(VT.getSizeInBits());
2177 2 : SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2178 : }
2179 : }
2180 :
2181 : if (!SrcReg)
2182 364 : SrcReg = getRegForValue(Op0);
2183 :
2184 402 : if (!SrcReg)
2185 : return false;
2186 :
2187 : auto *SI = cast<StoreInst>(I);
2188 :
2189 : // Try to emit a STLR for seq_cst/release.
2190 396 : if (SI->isAtomic()) {
2191 : AtomicOrdering Ord = SI->getOrdering();
2192 : // The non-atomic instructions are sufficient for relaxed stores.
2193 24 : if (isReleaseOrStronger(Ord)) {
2194 : // The STLR addressing mode only supports a base reg; pass that directly.
2195 16 : unsigned AddrReg = getRegForValue(PtrV);
2196 16 : return emitStoreRelease(VT, SrcReg, AddrReg,
2197 16 : createMachineMemOperandFor(I));
2198 : }
2199 : }
2200 :
2201 : // See if we can handle this address.
2202 : Address Addr;
2203 380 : if (!computeAddress(PtrV, Addr, Op0->getType()))
2204 : return false;
2205 :
2206 372 : if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2207 152 : return false;
2208 : return true;
2209 : }
2210 :
2211 : static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
2212 : switch (Pred) {
2213 : case CmpInst::FCMP_ONE:
2214 : case CmpInst::FCMP_UEQ:
2215 : default:
2216 : // AL is our "false" for now. The other two need more compares.
2217 : return AArch64CC::AL;
2218 : case CmpInst::ICMP_EQ:
2219 : case CmpInst::FCMP_OEQ:
2220 : return AArch64CC::EQ;
2221 : case CmpInst::ICMP_SGT:
2222 : case CmpInst::FCMP_OGT:
2223 : return AArch64CC::GT;
2224 : case CmpInst::ICMP_SGE:
2225 : case CmpInst::FCMP_OGE:
2226 : return AArch64CC::GE;
2227 : case CmpInst::ICMP_UGT:
2228 : case CmpInst::FCMP_UGT:
2229 : return AArch64CC::HI;
2230 : case CmpInst::FCMP_OLT:
2231 : return AArch64CC::MI;
2232 : case CmpInst::ICMP_ULE:
2233 : case CmpInst::FCMP_OLE:
2234 : return AArch64CC::LS;
2235 : case CmpInst::FCMP_ORD:
2236 : return AArch64CC::VC;
2237 : case CmpInst::FCMP_UNO:
2238 : return AArch64CC::VS;
2239 : case CmpInst::FCMP_UGE:
2240 : return AArch64CC::PL;
2241 : case CmpInst::ICMP_SLT:
2242 : case CmpInst::FCMP_ULT:
2243 : return AArch64CC::LT;
2244 : case CmpInst::ICMP_SLE:
2245 : case CmpInst::FCMP_ULE:
2246 : return AArch64CC::LE;
2247 : case CmpInst::FCMP_UNE:
2248 : case CmpInst::ICMP_NE:
2249 : return AArch64CC::NE;
2250 : case CmpInst::ICMP_UGE:
2251 : return AArch64CC::HS;
2252 : case CmpInst::ICMP_ULT:
2253 : return AArch64CC::LO;
2254 : }
2255 : }
2256 :
2257 : /// Try to emit a combined compare-and-branch instruction.
2258 88 : bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2259 : assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2260 : const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2261 88 : CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2262 :
2263 : const Value *LHS = CI->getOperand(0);
2264 : const Value *RHS = CI->getOperand(1);
2265 :
2266 88 : MVT VT;
2267 88 : if (!isTypeSupported(LHS->getType(), VT))
2268 : return false;
2269 :
2270 88 : unsigned BW = VT.getSizeInBits();
2271 88 : if (BW > 64)
2272 : return false;
2273 :
2274 88 : MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2275 88 : MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2276 :
2277 : // Try to take advantage of fallthrough opportunities.
2278 88 : if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2279 : std::swap(TBB, FBB);
2280 49 : Predicate = CmpInst::getInversePredicate(Predicate);
2281 : }
2282 :
2283 : int TestBit = -1;
2284 : bool IsCmpNE;
2285 : switch (Predicate) {
2286 : default:
2287 : return false;
2288 44 : case CmpInst::ICMP_EQ:
2289 : case CmpInst::ICMP_NE:
2290 44 : if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2291 : std::swap(LHS, RHS);
2292 :
2293 44 : if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2294 3 : return false;
2295 :
2296 : if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2297 13 : if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2298 : const Value *AndLHS = AI->getOperand(0);
2299 : const Value *AndRHS = AI->getOperand(1);
2300 :
2301 : if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2302 0 : if (C->getValue().isPowerOf2())
2303 : std::swap(AndLHS, AndRHS);
2304 :
2305 : if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2306 12 : if (C->getValue().isPowerOf2()) {
2307 12 : TestBit = C->getValue().logBase2();
2308 : LHS = AndLHS;
2309 : }
2310 : }
2311 :
2312 41 : if (VT == MVT::i1)
2313 : TestBit = 0;
2314 :
2315 41 : IsCmpNE = Predicate == CmpInst::ICMP_NE;
2316 41 : break;
2317 13 : case CmpInst::ICMP_SLT:
2318 : case CmpInst::ICMP_SGE:
2319 13 : if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2320 7 : return false;
2321 :
2322 6 : TestBit = BW - 1;
2323 6 : IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2324 6 : break;
2325 10 : case CmpInst::ICMP_SGT:
2326 : case CmpInst::ICMP_SLE:
2327 10 : if (!isa<ConstantInt>(RHS))
2328 : return false;
2329 :
2330 8 : if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2331 : return false;
2332 :
2333 8 : TestBit = BW - 1;
2334 8 : IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2335 8 : break;
2336 : } // end switch
2337 :
2338 : static const unsigned OpcTable[2][2][2] = {
2339 : { {AArch64::CBZW, AArch64::CBZX },
2340 : {AArch64::CBNZW, AArch64::CBNZX} },
2341 : { {AArch64::TBZW, AArch64::TBZX },
2342 : {AArch64::TBNZW, AArch64::TBNZX} }
2343 : };
2344 :
2345 55 : bool IsBitTest = TestBit != -1;
2346 55 : bool Is64Bit = BW == 64;
2347 55 : if (TestBit < 32 && TestBit >= 0)
2348 : Is64Bit = false;
2349 :
2350 55 : unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2351 55 : const MCInstrDesc &II = TII.get(Opc);
2352 :
2353 55 : unsigned SrcReg = getRegForValue(LHS);
2354 55 : if (!SrcReg)
2355 : return false;
2356 55 : bool SrcIsKill = hasTrivialKill(LHS);
2357 :
2358 55 : if (BW == 64 && !Is64Bit)
2359 4 : SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
2360 : AArch64::sub_32);
2361 :
2362 55 : if ((BW < 32) && !IsBitTest)
2363 8 : SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*IsZExt=*/true);
2364 :
2365 : // Emit the combined compare and branch instruction.
2366 110 : SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2367 : MachineInstrBuilder MIB =
2368 110 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
2369 55 : .addReg(SrcReg, getKillRegState(SrcIsKill));
2370 55 : if (IsBitTest)
2371 27 : MIB.addImm(TestBit);
2372 : MIB.addMBB(TBB);
2373 :
2374 55 : finishCondBranch(BI->getParent(), TBB, FBB);
2375 55 : return true;
2376 : }
2377 :
2378 272 : bool AArch64FastISel::selectBranch(const Instruction *I) {
2379 : const BranchInst *BI = cast<BranchInst>(I);
2380 272 : if (BI->isUnconditional()) {
2381 150 : MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2382 300 : fastEmitBranch(MSucc, BI->getDebugLoc());
2383 150 : return true;
2384 : }
2385 :
2386 122 : MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2387 122 : MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2388 :
2389 : if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2390 89 : if (CI->hasOneUse() && isValueAvailable(CI)) {
2391 : // Try to optimize or fold the cmp.
2392 88 : CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2393 88 : switch (Predicate) {
2394 : default:
2395 : break;
2396 0 : case CmpInst::FCMP_FALSE:
2397 0 : fastEmitBranch(FBB, DbgLoc);
2398 0 : return true;
2399 0 : case CmpInst::FCMP_TRUE:
2400 0 : fastEmitBranch(TBB, DbgLoc);
2401 0 : return true;
2402 : }
2403 :
2404 : // Try to emit a combined compare-and-branch first.
2405 88 : if (emitCompareAndBranch(BI))
2406 : return true;
2407 :
2408 : // Try to take advantage of fallthrough opportunities.
2409 33 : if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2410 : std::swap(TBB, FBB);
2411 6 : Predicate = CmpInst::getInversePredicate(Predicate);
2412 : }
2413 :
2414 : // Emit the cmp.
2415 66 : if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2416 : return false;
2417 :
2418 : // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2419 : // instruction.
2420 : AArch64CC::CondCode CC = getCompareCC(Predicate);
2421 : AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2422 33 : switch (Predicate) {
2423 : default:
2424 : break;
2425 : case CmpInst::FCMP_UEQ:
2426 : ExtraCC = AArch64CC::EQ;
2427 : CC = AArch64CC::VS;
2428 : break;
2429 : case CmpInst::FCMP_ONE:
2430 : ExtraCC = AArch64CC::MI;
2431 : CC = AArch64CC::GT;
2432 : break;
2433 : }
2434 : assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2435 :
2436 : // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2437 : if (ExtraCC != AArch64CC::AL) {
2438 4 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2439 2 : .addImm(ExtraCC)
2440 : .addMBB(TBB);
2441 : }
2442 :
2443 : // Emit the branch.
2444 66 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2445 33 : .addImm(CC)
2446 : .addMBB(TBB);
2447 :
2448 33 : finishCondBranch(BI->getParent(), TBB, FBB);
2449 33 : return true;
2450 : }
2451 : } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2452 : uint64_t Imm = CI->getZExtValue();
2453 2 : MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2454 4 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
2455 : .addMBB(Target);
2456 :
2457 : // Obtain the branch probability and add the target to the successor list.
2458 2 : if (FuncInfo.BPI) {
2459 : auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2460 0 : BI->getParent(), Target->getBasicBlock());
2461 0 : FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2462 : } else
2463 2 : FuncInfo.MBB->addSuccessorWithoutProb(Target);
2464 2 : return true;
2465 : } else {
2466 31 : AArch64CC::CondCode CC = AArch64CC::NE;
2467 31 : if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2468 : // Fake request the condition, otherwise the intrinsic might be completely
2469 : // optimized away.
2470 28 : unsigned CondReg = getRegForValue(BI->getCondition());
2471 14 : if (!CondReg)
2472 14 : return false;
2473 :
2474 : // Emit the branch.
2475 28 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2476 14 : .addImm(CC)
2477 : .addMBB(TBB);
2478 :
2479 14 : finishCondBranch(BI->getParent(), TBB, FBB);
2480 14 : return true;
2481 : }
2482 : }
2483 :
2484 36 : unsigned CondReg = getRegForValue(BI->getCondition());
2485 18 : if (CondReg == 0)
2486 : return false;
2487 18 : bool CondRegIsKill = hasTrivialKill(BI->getCondition());
2488 :
2489 : // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2490 : unsigned Opcode = AArch64::TBNZW;
2491 18 : if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2492 : std::swap(TBB, FBB);
2493 : Opcode = AArch64::TBZW;
2494 : }
2495 :
2496 18 : const MCInstrDesc &II = TII.get(Opcode);
2497 : unsigned ConstrainedCondReg
2498 36 : = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2499 18 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2500 18 : .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill))
2501 : .addImm(0)
2502 : .addMBB(TBB);
2503 :
2504 18 : finishCondBranch(BI->getParent(), TBB, FBB);
2505 18 : return true;
2506 : }
2507 :
2508 0 : bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2509 : const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2510 0 : unsigned AddrReg = getRegForValue(BI->getOperand(0));
2511 0 : if (AddrReg == 0)
2512 : return false;
2513 :
2514 : // Emit the indirect branch.
2515 0 : const MCInstrDesc &II = TII.get(AArch64::BR);
2516 0 : AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
2517 0 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
2518 :
2519 : // Make sure the CFG is up-to-date.
2520 0 : for (auto *Succ : BI->successors())
2521 0 : FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2522 :
2523 0 : return true;
2524 : }
2525 :
2526 57 : bool AArch64FastISel::selectCmp(const Instruction *I) {
2527 : const CmpInst *CI = cast<CmpInst>(I);
2528 :
2529 : // Vectors of i1 are weird: bail out.
2530 114 : if (CI->getType()->isVectorTy())
2531 : return false;
2532 :
2533 : // Try to optimize or fold the cmp.
2534 51 : CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2535 : unsigned ResultReg = 0;
2536 51 : switch (Predicate) {
2537 : default:
2538 : break;
2539 1 : case CmpInst::FCMP_FALSE:
2540 1 : ResultReg = createResultReg(&AArch64::GPR32RegClass);
2541 2 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2542 2 : TII.get(TargetOpcode::COPY), ResultReg)
2543 1 : .addReg(AArch64::WZR, getKillRegState(true));
2544 1 : break;
2545 : case CmpInst::FCMP_TRUE:
2546 1 : ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2547 1 : break;
2548 : }
2549 :
2550 2 : if (ResultReg) {
2551 2 : updateValueMap(I, ResultReg);
2552 2 : return true;
2553 : }
2554 :
2555 : // Emit the cmp.
2556 98 : if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2557 : return false;
2558 :
2559 49 : ResultReg = createResultReg(&AArch64::GPR32RegClass);
2560 :
2561 : // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2562 : // condition codes are inverted, because they are used by CSINC.
2563 : static unsigned CondCodeTable[2][2] = {
2564 : { AArch64CC::NE, AArch64CC::VC },
2565 : { AArch64CC::PL, AArch64CC::LE }
2566 : };
2567 : unsigned *CondCodes = nullptr;
2568 49 : switch (Predicate) {
2569 : default:
2570 : break;
2571 : case CmpInst::FCMP_UEQ:
2572 : CondCodes = &CondCodeTable[0][0];
2573 : break;
2574 1 : case CmpInst::FCMP_ONE:
2575 : CondCodes = &CondCodeTable[1][0];
2576 1 : break;
2577 : }
2578 :
2579 : if (CondCodes) {
2580 2 : unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2581 6 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2582 4 : TmpReg1)
2583 2 : .addReg(AArch64::WZR, getKillRegState(true))
2584 2 : .addReg(AArch64::WZR, getKillRegState(true))
2585 2 : .addImm(CondCodes[0]);
2586 6 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2587 4 : ResultReg)
2588 2 : .addReg(TmpReg1, getKillRegState(true))
2589 2 : .addReg(AArch64::WZR, getKillRegState(true))
2590 2 : .addImm(CondCodes[1]);
2591 :
2592 2 : updateValueMap(I, ResultReg);
2593 2 : return true;
2594 : }
2595 :
2596 : // Now set a register based on the comparison.
2597 : AArch64CC::CondCode CC = getCompareCC(Predicate);
2598 : assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2599 : AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2600 141 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2601 94 : ResultReg)
2602 47 : .addReg(AArch64::WZR, getKillRegState(true))
2603 47 : .addReg(AArch64::WZR, getKillRegState(true))
2604 47 : .addImm(invertedCC);
2605 :
2606 47 : updateValueMap(I, ResultReg);
2607 47 : return true;
2608 : }
2609 :
2610 : /// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2611 : /// value.
2612 53 : bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2613 53 : if (!SI->getType()->isIntegerTy(1))
2614 : return false;
2615 :
2616 : const Value *Src1Val, *Src2Val;
2617 : unsigned Opc = 0;
2618 : bool NeedExtraOp = false;
2619 : if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2620 3 : if (CI->isOne()) {
2621 : Src1Val = SI->getCondition();
2622 : Src2Val = SI->getFalseValue();
2623 : Opc = AArch64::ORRWrr;
2624 : } else {
2625 : assert(CI->isZero());
2626 : Src1Val = SI->getFalseValue();
2627 : Src2Val = SI->getCondition();
2628 : Opc = AArch64::BICWrr;
2629 : }
2630 : } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2631 2 : if (CI->isOne()) {
2632 : Src1Val = SI->getCondition();
2633 : Src2Val = SI->getTrueValue();
2634 : Opc = AArch64::ORRWrr;
2635 : NeedExtraOp = true;
2636 : } else {
2637 : assert(CI->isZero());
2638 : Src1Val = SI->getCondition();
2639 : Src2Val = SI->getTrueValue();
2640 : Opc = AArch64::ANDWrr;
2641 : }
2642 : }
2643 :
2644 6 : if (!Opc)
2645 : return false;
2646 :
2647 5 : unsigned Src1Reg = getRegForValue(Src1Val);
2648 5 : if (!Src1Reg)
2649 : return false;
2650 5 : bool Src1IsKill = hasTrivialKill(Src1Val);
2651 :
2652 5 : unsigned Src2Reg = getRegForValue(Src2Val);
2653 5 : if (!Src2Reg)
2654 : return false;
2655 5 : bool Src2IsKill = hasTrivialKill(Src2Val);
2656 :
2657 5 : if (NeedExtraOp) {
2658 2 : Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1);
2659 : Src1IsKill = true;
2660 : }
2661 5 : unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2662 : Src1IsKill, Src2Reg, Src2IsKill);
2663 5 : updateValueMap(SI, ResultReg);
2664 5 : return true;
2665 : }
2666 :
2667 53 : bool AArch64FastISel::selectSelect(const Instruction *I) {
2668 : assert(isa<SelectInst>(I) && "Expected a select instruction.");
2669 53 : MVT VT;
2670 53 : if (!isTypeSupported(I->getType(), VT))
2671 : return false;
2672 :
2673 : unsigned Opc;
2674 : const TargetRegisterClass *RC;
2675 53 : switch (VT.SimpleTy) {
2676 : default:
2677 : return false;
2678 : case MVT::i1:
2679 : case MVT::i8:
2680 : case MVT::i16:
2681 : case MVT::i32:
2682 : Opc = AArch64::CSELWr;
2683 : RC = &AArch64::GPR32RegClass;
2684 : break;
2685 8 : case MVT::i64:
2686 : Opc = AArch64::CSELXr;
2687 : RC = &AArch64::GPR64RegClass;
2688 8 : break;
2689 26 : case MVT::f32:
2690 : Opc = AArch64::FCSELSrrr;
2691 : RC = &AArch64::FPR32RegClass;
2692 26 : break;
2693 1 : case MVT::f64:
2694 : Opc = AArch64::FCSELDrrr;
2695 : RC = &AArch64::FPR64RegClass;
2696 1 : break;
2697 : }
2698 :
2699 : const SelectInst *SI = cast<SelectInst>(I);
2700 : const Value *Cond = SI->getCondition();
2701 53 : AArch64CC::CondCode CC = AArch64CC::NE;
2702 : AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2703 :
2704 53 : if (optimizeSelect(SI))
2705 : return true;
2706 :
2707 : // Try to pickup the flags, so we don't have to emit another compare.
2708 48 : if (foldXALUIntrinsic(CC, I, Cond)) {
2709 : // Fake request the condition to force emission of the XALU intrinsic.
2710 12 : unsigned CondReg = getRegForValue(Cond);
2711 12 : if (!CondReg)
2712 : return false;
2713 55 : } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2714 27 : isValueAvailable(Cond)) {
2715 : const auto *Cmp = cast<CmpInst>(Cond);
2716 : // Try to optimize or fold the cmp.
2717 27 : CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2718 : const Value *FoldSelect = nullptr;
2719 27 : switch (Predicate) {
2720 : default:
2721 : break;
2722 : case CmpInst::FCMP_FALSE:
2723 : FoldSelect = SI->getFalseValue();
2724 1 : break;
2725 : case CmpInst::FCMP_TRUE:
2726 : FoldSelect = SI->getTrueValue();
2727 1 : break;
2728 : }
2729 :
2730 2 : if (FoldSelect) {
2731 2 : unsigned SrcReg = getRegForValue(FoldSelect);
2732 2 : if (!SrcReg)
2733 : return false;
2734 2 : unsigned UseReg = lookUpRegForValue(SI);
2735 2 : if (UseReg)
2736 2 : MRI.clearKillFlags(UseReg);
2737 :
2738 2 : updateValueMap(I, SrcReg);
2739 2 : return true;
2740 : }
2741 :
2742 : // Emit the cmp.
2743 50 : if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2744 : return false;
2745 :
2746 : // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2747 25 : CC = getCompareCC(Predicate);
2748 25 : switch (Predicate) {
2749 : default:
2750 : break;
2751 1 : case CmpInst::FCMP_UEQ:
2752 : ExtraCC = AArch64CC::EQ;
2753 1 : CC = AArch64CC::VS;
2754 1 : break;
2755 1 : case CmpInst::FCMP_ONE:
2756 : ExtraCC = AArch64CC::MI;
2757 1 : CC = AArch64CC::GT;
2758 1 : break;
2759 : }
2760 : assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2761 : } else {
2762 9 : unsigned CondReg = getRegForValue(Cond);
2763 9 : if (!CondReg)
2764 : return false;
2765 9 : bool CondIsKill = hasTrivialKill(Cond);
2766 :
2767 9 : const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2768 9 : CondReg = constrainOperandRegClass(II, CondReg, 1);
2769 :
2770 : // Emit a TST instruction (ANDS wzr, reg, #imm).
2771 9 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
2772 9 : AArch64::WZR)
2773 9 : .addReg(CondReg, getKillRegState(CondIsKill))
2774 9 : .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2775 : }
2776 :
2777 92 : unsigned Src1Reg = getRegForValue(SI->getTrueValue());
2778 46 : bool Src1IsKill = hasTrivialKill(SI->getTrueValue());
2779 :
2780 46 : unsigned Src2Reg = getRegForValue(SI->getFalseValue());
2781 46 : bool Src2IsKill = hasTrivialKill(SI->getFalseValue());
2782 :
2783 46 : if (!Src1Reg || !Src2Reg)
2784 : return false;
2785 :
2786 46 : if (ExtraCC != AArch64CC::AL) {
2787 2 : Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2788 : Src2IsKill, ExtraCC);
2789 : Src2IsKill = true;
2790 : }
2791 46 : unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2792 : Src2IsKill, CC);
2793 46 : updateValueMap(I, ResultReg);
2794 46 : return true;
2795 : }
2796 :
2797 5 : bool AArch64FastISel::selectFPExt(const Instruction *I) {
2798 5 : Value *V = I->getOperand(0);
2799 10 : if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2800 : return false;
2801 :
2802 3 : unsigned Op = getRegForValue(V);
2803 3 : if (Op == 0)
2804 : return false;
2805 :
2806 3 : unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
2807 9 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
2808 6 : ResultReg).addReg(Op);
2809 3 : updateValueMap(I, ResultReg);
2810 3 : return true;
2811 : }
2812 :
2813 2 : bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2814 2 : Value *V = I->getOperand(0);
2815 4 : if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2816 : return false;
2817 :
2818 1 : unsigned Op = getRegForValue(V);
2819 1 : if (Op == 0)
2820 : return false;
2821 :
2822 1 : unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
2823 3 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
2824 2 : ResultReg).addReg(Op);
2825 1 : updateValueMap(I, ResultReg);
2826 1 : return true;
2827 : }
2828 :
2829 : // FPToUI and FPToSI
2830 7 : bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2831 7 : MVT DestVT;
2832 7 : if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2833 : return false;
2834 :
2835 12 : unsigned SrcReg = getRegForValue(I->getOperand(0));
2836 6 : if (SrcReg == 0)
2837 : return false;
2838 :
2839 12 : EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2840 : if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
2841 4 : return false;
2842 :
2843 : unsigned Opc;
2844 : if (SrcVT == MVT::f64) {
2845 1 : if (Signed)
2846 0 : Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2847 : else
2848 1 : Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2849 : } else {
2850 1 : if (Signed)
2851 0 : Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2852 : else
2853 1 : Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2854 : }
2855 2 : unsigned ResultReg = createResultReg(
2856 2 : DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2857 4 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2858 2 : .addReg(SrcReg);
2859 2 : updateValueMap(I, ResultReg);
2860 2 : return true;
2861 : }
2862 :
2863 22 : bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2864 22 : MVT DestVT;
2865 22 : if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2866 : return false;
2867 : // Let regular ISEL handle FP16
2868 20 : if (DestVT == MVT::f16)
2869 : return false;
2870 :
2871 : assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2872 : "Unexpected value type.");
2873 :
2874 20 : unsigned SrcReg = getRegForValue(I->getOperand(0));
2875 10 : if (!SrcReg)
2876 : return false;
2877 10 : bool SrcIsKill = hasTrivialKill(I->getOperand(0));
2878 :
2879 20 : EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2880 :
2881 : // Handle sign-extension.
2882 : if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2883 : SrcReg =
2884 12 : emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2885 6 : if (!SrcReg)
2886 : return false;
2887 : SrcIsKill = true;
2888 : }
2889 :
2890 : unsigned Opc;
2891 : if (SrcVT == MVT::i64) {
2892 2 : if (Signed)
2893 0 : Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2894 : else
2895 2 : Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2896 : } else {
2897 8 : if (Signed)
2898 3 : Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2899 : else
2900 5 : Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2901 : }
2902 :
2903 10 : unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
2904 : SrcIsKill);
2905 10 : updateValueMap(I, ResultReg);
2906 10 : return true;
2907 : }
2908 :
2909 1222 : bool AArch64FastISel::fastLowerArguments() {
2910 1222 : if (!FuncInfo.CanLowerReturn)
2911 : return false;
2912 :
2913 1222 : const Function *F = FuncInfo.Fn;
2914 1222 : if (F->isVarArg())
2915 : return false;
2916 :
2917 : CallingConv::ID CC = F->getCallingConv();
2918 1221 : if (CC != CallingConv::C && CC != CallingConv::Swift)
2919 : return false;
2920 :
2921 2426 : if (Subtarget->hasCustomCallingConv())
2922 : return false;
2923 :
2924 : // Only handle simple cases of up to 8 GPR and FPR each.
2925 : unsigned GPRCnt = 0;
2926 : unsigned FPRCnt = 0;
2927 3119 : for (auto const &Arg : F->args()) {
2928 4028 : if (Arg.hasAttribute(Attribute::ByVal) ||
2929 4028 : Arg.hasAttribute(Attribute::InReg) ||
2930 4026 : Arg.hasAttribute(Attribute::StructRet) ||
2931 4017 : Arg.hasAttribute(Attribute::SwiftSelf) ||
2932 6017 : Arg.hasAttribute(Attribute::SwiftError) ||
2933 1998 : Arg.hasAttribute(Attribute::Nest))
2934 107 : return false;
2935 :
2936 1998 : Type *ArgTy = Arg.getType();
2937 1998 : if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2938 : return false;
2939 :
2940 1994 : EVT ArgVT = TLI.getValueType(DL, ArgTy);
2941 1994 : if (!ArgVT.isSimple())
2942 : return false;
2943 :
2944 : MVT VT = ArgVT.getSimpleVT().SimpleTy;
2945 1994 : if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2946 : return false;
2947 :
2948 1994 : if (VT.isVector() &&
2949 117 : (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2950 : return false;
2951 :
2952 1923 : if (VT >= MVT::i1 && VT <= MVT::i64)
2953 1608 : ++GPRCnt;
2954 315 : else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2955 37 : VT.is128BitVector())
2956 305 : ++FPRCnt;
2957 : else
2958 : return false;
2959 :
2960 1913 : if (GPRCnt > 8 || FPRCnt > 8)
2961 : return false;
2962 : }
2963 :
2964 : static const MCPhysReg Registers[6][8] = {
2965 : { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2966 : AArch64::W5, AArch64::W6, AArch64::W7 },
2967 : { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2968 : AArch64::X5, AArch64::X6, AArch64::X7 },
2969 : { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2970 : AArch64::H5, AArch64::H6, AArch64::H7 },
2971 : { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2972 : AArch64::S5, AArch64::S6, AArch64::S7 },
2973 : { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2974 : AArch64::D5, AArch64::D6, AArch64::D7 },
2975 : { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2976 : AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2977 : };
2978 :
2979 : unsigned GPRIdx = 0;
2980 : unsigned FPRIdx = 0;
2981 2943 : for (auto const &Arg : F->args()) {
2982 1838 : MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2983 : unsigned SrcReg;
2984 : const TargetRegisterClass *RC;
2985 1838 : if (VT >= MVT::i1 && VT <= MVT::i32) {
2986 646 : SrcReg = Registers[0][GPRIdx++];
2987 : RC = &AArch64::GPR32RegClass;
2988 : VT = MVT::i32;
2989 1192 : } else if (VT == MVT::i64) {
2990 896 : SrcReg = Registers[1][GPRIdx++];
2991 : RC = &AArch64::GPR64RegClass;
2992 296 : } else if (VT == MVT::f16) {
2993 2 : SrcReg = Registers[2][FPRIdx++];
2994 : RC = &AArch64::FPR16RegClass;
2995 294 : } else if (VT == MVT::f32) {
2996 188 : SrcReg = Registers[3][FPRIdx++];
2997 : RC = &AArch64::FPR32RegClass;
2998 106 : } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2999 79 : SrcReg = Registers[4][FPRIdx++];
3000 : RC = &AArch64::FPR64RegClass;
3001 27 : } else if (VT.is128BitVector()) {
3002 27 : SrcReg = Registers[5][FPRIdx++];
3003 : RC = &AArch64::FPR128RegClass;
3004 : } else
3005 0 : llvm_unreachable("Unexpected value type.");
3006 :
3007 1838 : unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3008 : // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3009 : // Without this, EmitLiveInCopies may eliminate the livein if its only
3010 : // use is a bitcast (which isn't turned into an instruction).
3011 1838 : unsigned ResultReg = createResultReg(RC);
3012 3676 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3013 3676 : TII.get(TargetOpcode::COPY), ResultReg)
3014 1838 : .addReg(DstReg, getKillRegState(true));
3015 1838 : updateValueMap(&Arg, ResultReg);
3016 : }
3017 : return true;
3018 : }
3019 :
3020 119 : bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3021 : SmallVectorImpl<MVT> &OutVTs,
3022 : unsigned &NumBytes) {
3023 119 : CallingConv::ID CC = CLI.CallConv;
3024 : SmallVector<CCValAssign, 16> ArgLocs;
3025 238 : CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3026 119 : CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
3027 :
3028 : // Get a count of how many bytes are to be pushed on the stack.
3029 119 : NumBytes = CCInfo.getNextStackOffset();
3030 :
3031 : // Issue CALLSEQ_START
3032 119 : unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3033 238 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3034 119 : .addImm(NumBytes).addImm(0);
3035 :
3036 : // Process the args.
3037 1430 : for (CCValAssign &VA : ArgLocs) {
3038 2630 : const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3039 1315 : MVT ArgVT = OutVTs[VA.getValNo()];
3040 :
3041 1315 : unsigned ArgReg = getRegForValue(ArgVal);
3042 1315 : if (!ArgReg)
3043 4 : return false;
3044 :
3045 : // Handle arg promotion: SExt, ZExt, AExt.
3046 1313 : switch (VA.getLocInfo()) {
3047 : case CCValAssign::Full:
3048 : break;
3049 15 : case CCValAssign::SExt: {
3050 15 : MVT DestVT = VA.getLocVT();
3051 15 : MVT SrcVT = ArgVT;
3052 15 : ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3053 15 : if (!ArgReg)
3054 0 : return false;
3055 15 : break;
3056 : }
3057 84 : case CCValAssign::AExt:
3058 : // Intentional fall-through.
3059 : case CCValAssign::ZExt: {
3060 84 : MVT DestVT = VA.getLocVT();
3061 84 : MVT SrcVT = ArgVT;
3062 84 : ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3063 84 : if (!ArgReg)
3064 0 : return false;
3065 84 : break;
3066 : }
3067 0 : default:
3068 0 : llvm_unreachable("Unknown arg promotion!");
3069 : }
3070 :
3071 : // Now copy/store arg to correct locations.
3072 1313 : if (VA.isRegLoc() && !VA.needsCustom()) {
3073 498 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3074 498 : TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3075 249 : CLI.OutRegs.push_back(VA.getLocReg());
3076 1064 : } else if (VA.needsCustom()) {
3077 : // FIXME: Handle custom args.
3078 : return false;
3079 : } else {
3080 : assert(VA.isMemLoc() && "Assuming store on stack.");
3081 :
3082 : // Don't emit stores for undef values.
3083 1064 : if (isa<UndefValue>(ArgVal))
3084 1032 : continue;
3085 :
3086 : // Need to store on the stack.
3087 32 : unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3088 :
3089 : unsigned BEAlign = 0;
3090 32 : if (ArgSize < 8 && !Subtarget->isLittleEndian())
3091 2 : BEAlign = 8 - ArgSize;
3092 :
3093 : Address Addr;
3094 : Addr.setKind(Address::RegBase);
3095 : Addr.setReg(AArch64::SP);
3096 32 : Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3097 :
3098 32 : unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
3099 32 : MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3100 32 : MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3101 : MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3102 :
3103 32 : if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3104 2 : return false;
3105 : }
3106 : }
3107 : return true;
3108 : }
3109 :
3110 115 : bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
3111 : unsigned NumBytes) {
3112 115 : CallingConv::ID CC = CLI.CallConv;
3113 :
3114 : // Issue CALLSEQ_END
3115 115 : unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3116 230 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3117 115 : .addImm(NumBytes).addImm(0);
3118 :
3119 : // Now the return value.
3120 115 : if (RetVT != MVT::isVoid) {
3121 : SmallVector<CCValAssign, 16> RVLocs;
3122 64 : CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3123 64 : CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
3124 :
3125 : // Only handle a single return value.
3126 64 : if (RVLocs.size() != 1)
3127 10 : return false;
3128 :
3129 : // Copy all of the result registers out of their specified physreg.
3130 : MVT CopyVT = RVLocs[0].getValVT();
3131 :
3132 : // TODO: Handle big-endian results
3133 64 : if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3134 : return false;
3135 :
3136 54 : unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
3137 108 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3138 108 : TII.get(TargetOpcode::COPY), ResultReg)
3139 54 : .addReg(RVLocs[0].getLocReg());
3140 108 : CLI.InRegs.push_back(RVLocs[0].getLocReg());
3141 :
3142 54 : CLI.ResultReg = ResultReg;
3143 54 : CLI.NumResultRegs = 1;
3144 : }
3145 :
3146 : return true;
3147 : }
3148 :
3149 228 : bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3150 228 : CallingConv::ID CC = CLI.CallConv;
3151 228 : bool IsTailCall = CLI.IsTailCall;
3152 228 : bool IsVarArg = CLI.IsVarArg;
3153 228 : const Value *Callee = CLI.Callee;
3154 228 : MCSymbol *Symbol = CLI.Symbol;
3155 :
3156 228 : if (!Callee && !Symbol)
3157 : return false;
3158 :
3159 : // Allow SelectionDAG isel to handle tail calls.
3160 228 : if (IsTailCall)
3161 : return false;
3162 :
3163 204 : CodeModel::Model CM = TM.getCodeModel();
3164 : // Only support the small-addressing and large code models.
3165 204 : if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3166 : return false;
3167 :
3168 : // FIXME: Add large code model support for ELF.
3169 204 : if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3170 : return false;
3171 :
3172 : // Let SDISel handle vararg functions.
3173 204 : if (IsVarArg)
3174 : return false;
3175 :
3176 : // FIXME: Only handle *simple* calls for now.
3177 204 : MVT RetVT;
3178 408 : if (CLI.RetTy->isVoidTy())
3179 55 : RetVT = MVT::isVoid;
3180 149 : else if (!isTypeLegal(CLI.RetTy, RetVT))
3181 : return false;
3182 :
3183 1581 : for (auto Flag : CLI.OutFlags)
3184 1393 : if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3185 2786 : Flag.isSwiftSelf() || Flag.isSwiftError())
3186 5 : return false;
3187 :
3188 : // Set up the argument vectors.
3189 : SmallVector<MVT, 16> OutVTs;
3190 188 : OutVTs.reserve(CLI.OutVals.size());
3191 :
3192 1506 : for (auto *Val : CLI.OutVals) {
3193 1387 : MVT VT;
3194 1387 : if (!isTypeLegal(Val->getType(), VT) &&
3195 108 : !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3196 69 : return false;
3197 :
3198 : // We don't handle vector parameters yet.
3199 2756 : if (VT.isVector() || VT.getSizeInBits() > 64)
3200 : return false;
3201 :
3202 1318 : OutVTs.push_back(VT);
3203 : }
3204 :
3205 : Address Addr;
3206 119 : if (Callee && !computeCallAddress(Callee, Addr))
3207 : return false;
3208 :
3209 : // Handle the arguments now that we've gotten them.
3210 : unsigned NumBytes;
3211 119 : if (!processCallArgs(CLI, OutVTs, NumBytes))
3212 : return false;
3213 :
3214 115 : const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3215 115 : if (RegInfo->isAnyArgRegReserved(*MF))
3216 2 : RegInfo->emitReservedArgRegCallError(*MF);
3217 :
3218 : // Issue the call.
3219 115 : MachineInstrBuilder MIB;
3220 115 : if (Subtarget->useSmallAddressing()) {
3221 180 : const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
3222 99 : MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
3223 99 : if (Symbol)
3224 : MIB.addSym(Symbol, 0);
3225 83 : else if (Addr.getGlobalValue())
3226 : MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3227 18 : else if (Addr.getReg()) {
3228 18 : unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3229 18 : MIB.addReg(Reg);
3230 : } else
3231 : return false;
3232 : } else {
3233 : unsigned CallReg = 0;
3234 16 : if (Symbol) {
3235 8 : unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3236 24 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
3237 16 : ADRPReg)
3238 : .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
3239 :
3240 8 : CallReg = createResultReg(&AArch64::GPR64RegClass);
3241 8 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3242 16 : TII.get(AArch64::LDRXui), CallReg)
3243 8 : .addReg(ADRPReg)
3244 : .addSym(Symbol,
3245 : AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3246 8 : } else if (Addr.getGlobalValue())
3247 7 : CallReg = materializeGV(Addr.getGlobalValue());
3248 1 : else if (Addr.getReg())
3249 : CallReg = Addr.getReg();
3250 :
3251 16 : if (!CallReg)
3252 0 : return false;
3253 :
3254 16 : const MCInstrDesc &II = TII.get(AArch64::BLR);
3255 16 : CallReg = constrainOperandRegClass(II, CallReg, 0);
3256 16 : MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
3257 : }
3258 :
3259 : // Add implicit physical register uses to the call.
3260 347 : for (auto Reg : CLI.OutRegs)
3261 232 : MIB.addReg(Reg, RegState::Implicit);
3262 :
3263 : // Add a register mask with the call-preserved registers.
3264 : // Proper defs for return values will be added by setPhysRegsDeadExcept().
3265 115 : MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3266 :
3267 115 : CLI.Call = MIB;
3268 :
3269 : // Finish off the call including any return values.
3270 115 : return finishCall(CLI, RetVT, NumBytes);
3271 : }
3272 :
3273 0 : bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
3274 0 : if (Alignment)
3275 29 : return Len / Alignment <= 4;
3276 : else
3277 3 : return Len < 32;
3278 : }
3279 :
3280 13 : bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3281 : uint64_t Len, unsigned Alignment) {
3282 : // Make sure we don't bloat code by inlining very large memcpy's.
3283 13 : if (!isMemCpySmall(Len, Alignment))
3284 : return false;
3285 :
3286 : int64_t UnscaledOffset = 0;
3287 : Address OrigDest = Dest;
3288 : Address OrigSrc = Src;
3289 :
3290 54 : while (Len) {
3291 41 : MVT VT;
3292 41 : if (!Alignment || Alignment >= 8) {
3293 30 : if (Len >= 8)
3294 27 : VT = MVT::i64;
3295 3 : else if (Len >= 4)
3296 0 : VT = MVT::i32;
3297 3 : else if (Len >= 2)
3298 0 : VT = MVT::i16;
3299 : else {
3300 3 : VT = MVT::i8;
3301 : }
3302 : } else {
3303 : // Bound based on alignment.
3304 11 : if (Len >= 4 && Alignment == 4)
3305 2 : VT = MVT::i32;
3306 9 : else if (Len >= 2 && Alignment == 2)
3307 3 : VT = MVT::i16;
3308 : else {
3309 6 : VT = MVT::i8;
3310 : }
3311 : }
3312 :
3313 41 : unsigned ResultReg = emitLoad(VT, VT, Src);
3314 41 : if (!ResultReg)
3315 0 : return false;
3316 :
3317 41 : if (!emitStore(VT, ResultReg, Dest))
3318 : return false;
3319 :
3320 41 : int64_t Size = VT.getSizeInBits() / 8;
3321 41 : Len -= Size;
3322 41 : UnscaledOffset += Size;
3323 :
3324 : // We need to recompute the unscaled offset for each iteration.
3325 41 : Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3326 41 : Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3327 : }
3328 :
3329 : return true;
3330 : }
3331 :
3332 : /// Check if it is possible to fold the condition from the XALU intrinsic
3333 : /// into the user. The condition code will only be updated on success.
3334 79 : bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3335 : const Instruction *I,
3336 : const Value *Cond) {
3337 : if (!isa<ExtractValueInst>(Cond))
3338 : return false;
3339 :
3340 : const auto *EV = cast<ExtractValueInst>(Cond);
3341 : if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3342 : return false;
3343 :
3344 : const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3345 26 : MVT RetVT;
3346 : const Function *Callee = II->getCalledFunction();
3347 : Type *RetTy =
3348 26 : cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3349 26 : if (!isTypeLegal(RetTy, RetVT))
3350 : return false;
3351 :
3352 26 : if (RetVT != MVT::i32 && RetVT != MVT::i64)
3353 : return false;
3354 :
3355 26 : const Value *LHS = II->getArgOperand(0);
3356 : const Value *RHS = II->getArgOperand(1);
3357 :
3358 : // Canonicalize immediate to the RHS.
3359 26 : if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3360 : isCommutativeIntrinsic(II))
3361 : std::swap(LHS, RHS);
3362 :
3363 : // Simplify multiplies.
3364 : Intrinsic::ID IID = II->getIntrinsicID();
3365 26 : switch (IID) {
3366 : default:
3367 : break;
3368 5 : case Intrinsic::smul_with_overflow:
3369 : if (const auto *C = dyn_cast<ConstantInt>(RHS))
3370 1 : if (C->getValue() == 2)
3371 : IID = Intrinsic::sadd_with_overflow;
3372 : break;
3373 5 : case Intrinsic::umul_with_overflow:
3374 : if (const auto *C = dyn_cast<ConstantInt>(RHS))
3375 1 : if (C->getValue() == 2)
3376 : IID = Intrinsic::uadd_with_overflow;
3377 : break;
3378 : }
3379 :
3380 : AArch64CC::CondCode TmpCC;
3381 24 : switch (IID) {
3382 : default:
3383 : return false;
3384 : case Intrinsic::sadd_with_overflow:
3385 : case Intrinsic::ssub_with_overflow:
3386 : TmpCC = AArch64CC::VS;
3387 : break;
3388 : case Intrinsic::uadd_with_overflow:
3389 : TmpCC = AArch64CC::HS;
3390 : break;
3391 4 : case Intrinsic::usub_with_overflow:
3392 : TmpCC = AArch64CC::LO;
3393 4 : break;
3394 8 : case Intrinsic::smul_with_overflow:
3395 : case Intrinsic::umul_with_overflow:
3396 : TmpCC = AArch64CC::NE;
3397 8 : break;
3398 : }
3399 :
3400 : // Check if both instructions are in the same basic block.
3401 26 : if (!isValueAvailable(II))
3402 : return false;
3403 :
3404 : // Make sure nothing is in the way
3405 : BasicBlock::const_iterator Start(I);
3406 : BasicBlock::const_iterator End(II);
3407 66 : for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3408 : // We only expect extractvalue instructions between the intrinsic and the
3409 : // instruction to be selected.
3410 40 : if (!isa<ExtractValueInst>(Itr))
3411 : return false;
3412 :
3413 : // Check that the extractvalue operand comes from the intrinsic.
3414 : const auto *EVI = cast<ExtractValueInst>(Itr);
3415 40 : if (EVI->getAggregateOperand() != II)
3416 : return false;
3417 : }
3418 :
3419 26 : CC = TmpCC;
3420 26 : return true;
3421 : }
3422 :
3423 93 : bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3424 : // FIXME: Handle more intrinsics.
3425 93 : switch (II->getIntrinsicID()) {
3426 : default: return false;
3427 2 : case Intrinsic::frameaddress: {
3428 2 : MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3429 : MFI.setFrameAddressIsTaken(true);
3430 :
3431 2 : const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3432 2 : unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3433 4 : unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3434 4 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3435 4 : TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3436 : // Recursively load frame address
3437 : // ldr x0, [fp]
3438 : // ldr x0, [x0]
3439 : // ldr x0, [x0]
3440 : // ...
3441 : unsigned DestReg;
3442 4 : unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3443 4 : while (Depth--) {
3444 2 : DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3445 : SrcReg, /*IsKill=*/true, 0);
3446 : assert(DestReg && "Unexpected LDR instruction emission failure.");
3447 : SrcReg = DestReg;
3448 : }
3449 :
3450 2 : updateValueMap(II, SrcReg);
3451 2 : return true;
3452 : }
3453 20 : case Intrinsic::memcpy:
3454 : case Intrinsic::memmove: {
3455 : const auto *MTI = cast<MemTransferInst>(II);
3456 : // Don't handle volatile.
3457 20 : if (MTI->isVolatile())
3458 : return false;
3459 :
3460 : // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
3461 : // we would emit dead code because we don't currently handle memmoves.
3462 : bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3463 20 : if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3464 : // Small memcpy's are common enough that we want to do them without a call
3465 : // if possible.
3466 : uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3467 : unsigned Alignment = MinAlign(MTI->getDestAlignment(),
3468 19 : MTI->getSourceAlignment());
3469 19 : if (isMemCpySmall(Len, Alignment)) {
3470 : Address Dest, Src;
3471 26 : if (!computeAddress(MTI->getRawDest(), Dest) ||
3472 13 : !computeAddress(MTI->getRawSource(), Src))
3473 13 : return false;
3474 13 : if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3475 : return true;
3476 : }
3477 : }
3478 :
3479 7 : if (!MTI->getLength()->getType()->isIntegerTy(64))
3480 : return false;
3481 :
3482 14 : if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3483 : // Fast instruction selection doesn't support the special
3484 : // address spaces.
3485 : return false;
3486 :
3487 7 : const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3488 7 : return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 1);
3489 : }
3490 1 : case Intrinsic::memset: {
3491 : const MemSetInst *MSI = cast<MemSetInst>(II);
3492 : // Don't handle volatile.
3493 1 : if (MSI->isVolatile())
3494 : return false;
3495 :
3496 1 : if (!MSI->getLength()->getType()->isIntegerTy(64))
3497 : return false;
3498 :
3499 1 : if (MSI->getDestAddressSpace() > 255)
3500 : // Fast instruction selection doesn't support the special
3501 : // address spaces.
3502 : return false;
3503 :
3504 1 : return lowerCallTo(II, "memset", II->getNumArgOperands() - 1);
3505 : }
3506 12 : case Intrinsic::sin:
3507 : case Intrinsic::cos:
3508 : case Intrinsic::pow: {
3509 12 : MVT RetVT;
3510 12 : if (!isTypeLegal(II->getType(), RetVT))
3511 : return false;
3512 :
3513 12 : if (RetVT != MVT::f32 && RetVT != MVT::f64)
3514 : return false;
3515 :
3516 : static const RTLIB::Libcall LibCallTable[3][2] = {
3517 : { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3518 : { RTLIB::COS_F32, RTLIB::COS_F64 },
3519 : { RTLIB::POW_F32, RTLIB::POW_F64 }
3520 : };
3521 : RTLIB::Libcall LC;
3522 : bool Is64Bit = RetVT == MVT::f64;
3523 : switch (II->getIntrinsicID()) {
3524 0 : default:
3525 0 : llvm_unreachable("Unexpected intrinsic.");
3526 4 : case Intrinsic::sin:
3527 4 : LC = LibCallTable[0][Is64Bit];
3528 4 : break;
3529 4 : case Intrinsic::cos:
3530 4 : LC = LibCallTable[1][Is64Bit];
3531 4 : break;
3532 4 : case Intrinsic::pow:
3533 4 : LC = LibCallTable[2][Is64Bit];
3534 4 : break;
3535 : }
3536 :
3537 : ArgListTy Args;
3538 12 : Args.reserve(II->getNumArgOperands());
3539 :
3540 : // Populate the argument list.
3541 28 : for (auto &Arg : II->arg_operands()) {
3542 : ArgListEntry Entry;
3543 16 : Entry.Val = Arg;
3544 16 : Entry.Ty = Arg->getType();
3545 16 : Args.push_back(Entry);
3546 : }
3547 :
3548 24 : CallLoweringInfo CLI;
3549 12 : MCContext &Ctx = MF->getContext();
3550 : CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3551 36 : TLI.getLibcallName(LC), std::move(Args));
3552 12 : if (!lowerCallTo(CLI))
3553 : return false;
3554 12 : updateValueMap(II, CLI.ResultReg);
3555 12 : return true;
3556 : }
3557 2 : case Intrinsic::fabs: {
3558 2 : MVT VT;
3559 2 : if (!isTypeLegal(II->getType(), VT))
3560 : return false;
3561 :
3562 : unsigned Opc;
3563 2 : switch (VT.SimpleTy) {
3564 : default:
3565 : return false;
3566 : case MVT::f32:
3567 : Opc = AArch64::FABSSr;
3568 : break;
3569 1 : case MVT::f64:
3570 : Opc = AArch64::FABSDr;
3571 1 : break;
3572 : }
3573 2 : unsigned SrcReg = getRegForValue(II->getOperand(0));
3574 2 : if (!SrcReg)
3575 : return false;
3576 2 : bool SrcRegIsKill = hasTrivialKill(II->getOperand(0));
3577 2 : unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3578 4 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3579 2 : .addReg(SrcReg, getKillRegState(SrcRegIsKill));
3580 2 : updateValueMap(II, ResultReg);
3581 2 : return true;
3582 : }
3583 1 : case Intrinsic::trap:
3584 2 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3585 : .addImm(1);
3586 1 : return true;
3587 :
3588 : case Intrinsic::sqrt: {
3589 : Type *RetTy = II->getCalledFunction()->getReturnType();
3590 :
3591 2 : MVT VT;
3592 2 : if (!isTypeLegal(RetTy, VT))
3593 : return false;
3594 :
3595 2 : unsigned Op0Reg = getRegForValue(II->getOperand(0));
3596 2 : if (!Op0Reg)
3597 : return false;
3598 2 : bool Op0IsKill = hasTrivialKill(II->getOperand(0));
3599 :
3600 2 : unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
3601 2 : if (!ResultReg)
3602 : return false;
3603 :
3604 2 : updateValueMap(II, ResultReg);
3605 2 : return true;
3606 : }
3607 : case Intrinsic::sadd_with_overflow:
3608 : case Intrinsic::uadd_with_overflow:
3609 : case Intrinsic::ssub_with_overflow:
3610 : case Intrinsic::usub_with_overflow:
3611 : case Intrinsic::smul_with_overflow:
3612 : case Intrinsic::umul_with_overflow: {
3613 : // This implements the basic lowering of the xalu with overflow intrinsics.
3614 : const Function *Callee = II->getCalledFunction();
3615 : auto *Ty = cast<StructType>(Callee->getReturnType());
3616 48 : Type *RetTy = Ty->getTypeAtIndex(0U);
3617 :
3618 48 : MVT VT;
3619 48 : if (!isTypeLegal(RetTy, VT))
3620 : return false;
3621 :
3622 48 : if (VT != MVT::i32 && VT != MVT::i64)
3623 : return false;
3624 :
3625 48 : const Value *LHS = II->getArgOperand(0);
3626 : const Value *RHS = II->getArgOperand(1);
3627 : // Canonicalize immediate to the RHS.
3628 48 : if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3629 : isCommutativeIntrinsic(II))
3630 : std::swap(LHS, RHS);
3631 :
3632 : // Simplify multiplies.
3633 : Intrinsic::ID IID = II->getIntrinsicID();
3634 : switch (IID) {
3635 : default:
3636 : break;
3637 8 : case Intrinsic::smul_with_overflow:
3638 : if (const auto *C = dyn_cast<ConstantInt>(RHS))
3639 2 : if (C->getValue() == 2) {
3640 : IID = Intrinsic::sadd_with_overflow;
3641 : RHS = LHS;
3642 : }
3643 : break;
3644 9 : case Intrinsic::umul_with_overflow:
3645 : if (const auto *C = dyn_cast<ConstantInt>(RHS))
3646 3 : if (C->getValue() == 2) {
3647 : IID = Intrinsic::uadd_with_overflow;
3648 : RHS = LHS;
3649 : }
3650 : break;
3651 : }
3652 :
3653 : unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3654 : AArch64CC::CondCode CC = AArch64CC::Invalid;
3655 : switch (IID) {
3656 0 : default: llvm_unreachable("Unexpected intrinsic!");
3657 14 : case Intrinsic::sadd_with_overflow:
3658 14 : ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3659 : CC = AArch64CC::VS;
3660 14 : break;
3661 8 : case Intrinsic::uadd_with_overflow:
3662 8 : ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3663 : CC = AArch64CC::HS;
3664 8 : break;
3665 7 : case Intrinsic::ssub_with_overflow:
3666 7 : ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3667 : CC = AArch64CC::VS;
3668 7 : break;
3669 6 : case Intrinsic::usub_with_overflow:
3670 6 : ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3671 : CC = AArch64CC::LO;
3672 6 : break;
3673 6 : case Intrinsic::smul_with_overflow: {
3674 : CC = AArch64CC::NE;
3675 6 : unsigned LHSReg = getRegForValue(LHS);
3676 6 : if (!LHSReg)
3677 : return false;
3678 6 : bool LHSIsKill = hasTrivialKill(LHS);
3679 :
3680 6 : unsigned RHSReg = getRegForValue(RHS);
3681 6 : if (!RHSReg)
3682 : return false;
3683 6 : bool RHSIsKill = hasTrivialKill(RHS);
3684 :
3685 6 : if (VT == MVT::i32) {
3686 3 : MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3687 3 : unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
3688 : /*IsKill=*/false, 32);
3689 3 : MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3690 : AArch64::sub_32);
3691 3 : ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
3692 : AArch64::sub_32);
3693 3 : emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3694 : AArch64_AM::ASR, 31, /*WantResult=*/false);
3695 : } else {
3696 : assert(VT == MVT::i64 && "Unexpected value type.");
3697 : // LHSReg and RHSReg cannot be killed by this Mul, since they are
3698 : // reused in the next instruction.
3699 3 : MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3700 : /*IsKill=*/false);
3701 3 : unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
3702 : RHSReg, RHSIsKill);
3703 3 : emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3704 : AArch64_AM::ASR, 63, /*WantResult=*/false);
3705 : }
3706 : break;
3707 : }
3708 7 : case Intrinsic::umul_with_overflow: {
3709 : CC = AArch64CC::NE;
3710 7 : unsigned LHSReg = getRegForValue(LHS);
3711 7 : if (!LHSReg)
3712 : return false;
3713 7 : bool LHSIsKill = hasTrivialKill(LHS);
3714 :
3715 7 : unsigned RHSReg = getRegForValue(RHS);
3716 7 : if (!RHSReg)
3717 : return false;
3718 7 : bool RHSIsKill = hasTrivialKill(RHS);
3719 :
3720 7 : if (VT == MVT::i32) {
3721 3 : MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3722 3 : emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
3723 : /*IsKill=*/false, AArch64_AM::LSR, 32,
3724 : /*WantResult=*/false);
3725 3 : MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3726 : AArch64::sub_32);
3727 : } else {
3728 : assert(VT == MVT::i64 && "Unexpected value type.");
3729 : // LHSReg and RHSReg cannot be killed by this Mul, since they are
3730 : // reused in the next instruction.
3731 4 : MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3732 : /*IsKill=*/false);
3733 4 : unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
3734 : RHSReg, RHSIsKill);
3735 4 : emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
3736 : /*IsKill=*/false, /*WantResult=*/false);
3737 : }
3738 : break;
3739 : }
3740 : }
3741 :
3742 48 : if (MulReg) {
3743 13 : ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3744 13 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3745 26 : TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3746 : }
3747 :
3748 48 : if (!ResultReg1)
3749 : return false;
3750 :
3751 96 : ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3752 : AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
3753 : /*IsKill=*/true, getInvertedCondCode(CC));
3754 : (void)ResultReg2;
3755 : assert((ResultReg1 + 1) == ResultReg2 &&
3756 : "Nonconsecutive result registers.");
3757 48 : updateValueMap(II, ResultReg1, 2);
3758 48 : return true;
3759 : }
3760 : }
3761 : return false;
3762 : }
3763 :
3764 1302 : bool AArch64FastISel::selectRet(const Instruction *I) {
3765 : const ReturnInst *Ret = cast<ReturnInst>(I);
3766 1302 : const Function &F = *I->getParent()->getParent();
3767 :
3768 1302 : if (!FuncInfo.CanLowerReturn)
3769 : return false;
3770 :
3771 1302 : if (F.isVarArg())
3772 : return false;
3773 :
3774 2602 : if (TLI.supportSwiftError() &&
3775 1301 : F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3776 11 : return false;
3777 :
3778 1290 : if (TLI.supportSplitCSR(FuncInfo.MF))
3779 : return false;
3780 :
3781 : // Build a list of return value registers.
3782 : SmallVector<unsigned, 4> RetRegs;
3783 :
3784 1287 : if (Ret->getNumOperands() > 0) {
3785 : CallingConv::ID CC = F.getCallingConv();
3786 : SmallVector<ISD::OutputArg, 4> Outs;
3787 1900 : GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3788 :
3789 : // Analyze operands of the call, assigning locations to each operand.
3790 : SmallVector<CCValAssign, 16> ValLocs;
3791 950 : CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3792 950 : CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
3793 : : RetCC_AArch64_AAPCS;
3794 950 : CCInfo.AnalyzeReturn(Outs, RetCC);
3795 :
3796 : // Only handle a single return value for now.
3797 950 : if (ValLocs.size() != 1)
3798 101 : return false;
3799 :
3800 : CCValAssign &VA = ValLocs[0];
3801 : const Value *RV = Ret->getOperand(0);
3802 :
3803 : // Don't bother handling odd stuff for now.
3804 943 : if ((VA.getLocInfo() != CCValAssign::Full) &&
3805 : (VA.getLocInfo() != CCValAssign::BCvt))
3806 : return false;
3807 :
3808 : // Only handle register returns for now.
3809 943 : if (!VA.isRegLoc())
3810 : return false;
3811 :
3812 943 : unsigned Reg = getRegForValue(RV);
3813 943 : if (Reg == 0)
3814 : return false;
3815 :
3816 939 : unsigned SrcReg = Reg + VA.getValNo();
3817 939 : unsigned DestReg = VA.getLocReg();
3818 : // Avoid a cross-class copy. This is very unlikely.
3819 1878 : if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3820 : return false;
3821 :
3822 939 : EVT RVEVT = TLI.getValueType(DL, RV->getType());
3823 939 : if (!RVEVT.isSimple())
3824 : return false;
3825 :
3826 : // Vectors (of > 1 lane) in big endian need tricky handling.
3827 1034 : if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 &&
3828 89 : !Subtarget->isLittleEndian())
3829 : return false;
3830 :
3831 : MVT RVVT = RVEVT.getSimpleVT();
3832 879 : if (RVVT == MVT::f128)
3833 : return false;
3834 :
3835 : MVT DestVT = VA.getValVT();
3836 : // Special handling for extended integers.
3837 871 : if (RVVT != DestVT) {
3838 173 : if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3839 : return false;
3840 :
3841 173 : if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3842 : return false;
3843 :
3844 151 : bool IsZExt = Outs[0].Flags.isZExt();
3845 151 : SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3846 151 : if (SrcReg == 0)
3847 : return false;
3848 : }
3849 :
3850 : // Make the copy.
3851 1698 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3852 1698 : TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3853 :
3854 : // Add register to return instruction.
3855 849 : RetRegs.push_back(VA.getLocReg());
3856 : }
3857 :
3858 1186 : MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3859 2372 : TII.get(AArch64::RET_ReallyLR));
3860 2035 : for (unsigned RetReg : RetRegs)
3861 849 : MIB.addReg(RetReg, RegState::Implicit);
3862 : return true;
3863 : }
3864 :
3865 14 : bool AArch64FastISel::selectTrunc(const Instruction *I) {
3866 14 : Type *DestTy = I->getType();
3867 14 : Value *Op = I->getOperand(0);
3868 14 : Type *SrcTy = Op->getType();
3869 :
3870 14 : EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3871 14 : EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3872 14 : if (!SrcEVT.isSimple())
3873 : return false;
3874 14 : if (!DestEVT.isSimple())
3875 : return false;
3876 :
3877 : MVT SrcVT = SrcEVT.getSimpleVT();
3878 : MVT DestVT = DestEVT.getSimpleVT();
3879 :
3880 14 : if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3881 : SrcVT != MVT::i8)
3882 : return false;
3883 12 : if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3884 : DestVT != MVT::i1)
3885 : return false;
3886 :
3887 12 : unsigned SrcReg = getRegForValue(Op);
3888 12 : if (!SrcReg)
3889 : return false;
3890 12 : bool SrcIsKill = hasTrivialKill(Op);
3891 :
3892 : // If we're truncating from i64 to a smaller non-legal type then generate an
3893 : // AND. Otherwise, we know the high bits are undefined and a truncate only
3894 : // generate a COPY. We cannot mark the source register also as result
3895 : // register, because this can incorrectly transfer the kill flag onto the
3896 : // source register.
3897 : unsigned ResultReg;
3898 12 : if (SrcVT == MVT::i64) {
3899 : uint64_t Mask = 0;
3900 : switch (DestVT.SimpleTy) {
3901 : default:
3902 : // Trunc i64 to i32 is handled by the target-independent fast-isel.
3903 : return false;
3904 : case MVT::i1:
3905 : Mask = 0x1;
3906 : break;
3907 : case MVT::i8:
3908 : Mask = 0xff;
3909 : break;
3910 : case MVT::i16:
3911 : Mask = 0xffff;
3912 : break;
3913 : }
3914 : // Issue an extract_subreg to get the lower 32-bits.
3915 12 : unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
3916 : AArch64::sub_32);
3917 : // Create the AND instruction which performs the actual truncation.
3918 6 : ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
3919 : assert(ResultReg && "Unexpected AND instruction emission failure.");
3920 : } else {
3921 6 : ResultReg = createResultReg(&AArch64::GPR32RegClass);
3922 6 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3923 12 : TII.get(TargetOpcode::COPY), ResultReg)
3924 6 : .addReg(SrcReg, getKillRegState(SrcIsKill));
3925 : }
3926 :
3927 12 : updateValueMap(I, ResultReg);
3928 12 : return true;
3929 : }
3930 :
3931 141 : unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
3932 : assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
3933 : DestVT == MVT::i64) &&
3934 : "Unexpected value type.");
3935 : // Handle i8 and i16 as i32.
3936 141 : if (DestVT == MVT::i8 || DestVT == MVT::i16)
3937 : DestVT = MVT::i32;
3938 :
3939 141 : if (IsZExt) {
3940 132 : unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
3941 : assert(ResultReg && "Unexpected AND instruction emission failure.");
3942 132 : if (DestVT == MVT::i64) {
3943 : // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
3944 : // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
3945 0 : unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3946 0 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3947 0 : TII.get(AArch64::SUBREG_TO_REG), Reg64)
3948 : .addImm(0)
3949 0 : .addReg(ResultReg)
3950 : .addImm(AArch64::sub_32);
3951 : ResultReg = Reg64;
3952 : }
3953 132 : return ResultReg;
3954 : } else {
3955 9 : if (DestVT == MVT::i64) {
3956 : // FIXME: We're SExt i1 to i64.
3957 : return 0;
3958 : }
3959 9 : return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
3960 9 : /*TODO:IsKill=*/false, 0, 0);
3961 : }
3962 : }
3963 :
3964 18 : unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3965 : unsigned Op1, bool Op1IsKill) {
3966 : unsigned Opc, ZReg;
3967 18 : switch (RetVT.SimpleTy) {
3968 : default: return 0;
3969 : case MVT::i8:
3970 : case MVT::i16:
3971 : case MVT::i32:
3972 : RetVT = MVT::i32;
3973 5 : Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
3974 : case MVT::i64:
3975 : Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
3976 : }
3977 :
3978 : const TargetRegisterClass *RC =
3979 18 : (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3980 18 : return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
3981 18 : /*IsKill=*/ZReg, true);
3982 : }
3983 :
3984 : unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3985 : unsigned Op1, bool Op1IsKill) {
3986 : if (RetVT != MVT::i64)
3987 : return 0;
3988 :
3989 3 : return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
3990 : Op0, Op0IsKill, Op1, Op1IsKill,
3991 : AArch64::XZR, /*IsKill=*/true);
3992 : }
3993 :
3994 : unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3995 : unsigned Op1, bool Op1IsKill) {
3996 : if (RetVT != MVT::i64)
3997 : return 0;
3998 :
3999 3 : return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
4000 : Op0, Op0IsKill, Op1, Op1IsKill,
4001 : AArch64::XZR, /*IsKill=*/true);
4002 : }
4003 :
4004 4 : unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4005 : unsigned Op1Reg, bool Op1IsKill) {
4006 : unsigned Opc = 0;
4007 : bool NeedTrunc = false;
4008 : uint64_t Mask = 0;
4009 : switch (RetVT.SimpleTy) {
4010 : default: return 0;
4011 : case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
4012 : case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4013 : case MVT::i32: Opc = AArch64::LSLVWr; break;
4014 : case MVT::i64: Opc = AArch64::LSLVXr; break;
4015 : }
4016 :
4017 : const TargetRegisterClass *RC =
4018 4 : (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4019 4 : if (NeedTrunc) {
4020 2 : Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4021 : Op1IsKill = true;
4022 : }
4023 4 : unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4024 : Op1IsKill);
4025 4 : if (NeedTrunc)
4026 2 : ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4027 : return ResultReg;
4028 : }
4029 :
4030 53 : unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4031 : bool Op0IsKill, uint64_t Shift,
4032 : bool IsZExt) {
4033 : assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4034 : "Unexpected source/return type pair.");
4035 : assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4036 : SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4037 : "Unexpected source value type.");
4038 : assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4039 : RetVT == MVT::i64) && "Unexpected return value type.");
4040 :
4041 53 : bool Is64Bit = (RetVT == MVT::i64);
4042 53 : unsigned RegSize = Is64Bit ? 64 : 32;
4043 53 : unsigned DstBits = RetVT.getSizeInBits();
4044 53 : unsigned SrcBits = SrcVT.getSizeInBits();
4045 : const TargetRegisterClass *RC =
4046 53 : Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4047 :
4048 : // Just emit a copy for "zero" shifts.
4049 53 : if (Shift == 0) {
4050 2 : if (RetVT == SrcVT) {
4051 1 : unsigned ResultReg = createResultReg(RC);
4052 2 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4053 2 : TII.get(TargetOpcode::COPY), ResultReg)
4054 1 : .addReg(Op0, getKillRegState(Op0IsKill));
4055 1 : return ResultReg;
4056 : } else
4057 1 : return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4058 : }
4059 :
4060 : // Don't deal with undefined shifts.
4061 51 : if (Shift >= DstBits)
4062 : return 0;
4063 :
4064 : // For immediate shifts we can fold the zero-/sign-extension into the shift.
4065 : // {S|U}BFM Wd, Wn, #r, #s
4066 : // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4067 :
4068 : // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4069 : // %2 = shl i16 %1, 4
4070 : // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4071 : // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4072 : // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4073 : // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4074 :
4075 : // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4076 : // %2 = shl i16 %1, 8
4077 : // Wd<32+7-24,32-24> = Wn<7:0>
4078 : // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4079 : // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4080 : // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4081 :
4082 : // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4083 : // %2 = shl i16 %1, 12
4084 : // Wd<32+3-20,32-20> = Wn<3:0>
4085 : // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4086 : // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4087 : // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4088 :
4089 37 : unsigned ImmR = RegSize - Shift;
4090 : // Limit the width to the length of the source type.
4091 37 : unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4092 : static const unsigned OpcTable[2][2] = {
4093 : {AArch64::SBFMWri, AArch64::SBFMXri},
4094 : {AArch64::UBFMWri, AArch64::UBFMXri}
4095 : };
4096 37 : unsigned Opc = OpcTable[IsZExt][Is64Bit];
4097 37 : if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4098 20 : unsigned TmpReg = MRI.createVirtualRegister(RC);
4099 10 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4100 20 : TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4101 : .addImm(0)
4102 10 : .addReg(Op0, getKillRegState(Op0IsKill))
4103 : .addImm(AArch64::sub_32);
4104 : Op0 = TmpReg;
4105 : Op0IsKill = true;
4106 : }
4107 37 : return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4108 : }
4109 :
4110 4 : unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4111 : unsigned Op1Reg, bool Op1IsKill) {
4112 : unsigned Opc = 0;
4113 : bool NeedTrunc = false;
4114 : uint64_t Mask = 0;
4115 : switch (RetVT.SimpleTy) {
4116 : default: return 0;
4117 : case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
4118 : case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4119 : case MVT::i32: Opc = AArch64::LSRVWr; break;
4120 : case MVT::i64: Opc = AArch64::LSRVXr; break;
4121 : }
4122 :
4123 : const TargetRegisterClass *RC =
4124 4 : (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4125 4 : if (NeedTrunc) {
4126 2 : Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
4127 2 : Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4128 : Op0IsKill = Op1IsKill = true;
4129 : }
4130 4 : unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4131 : Op1IsKill);
4132 4 : if (NeedTrunc)
4133 2 : ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4134 : return ResultReg;
4135 : }
4136 :
4137 27 : unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4138 : bool Op0IsKill, uint64_t Shift,
4139 : bool IsZExt) {
4140 : assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4141 : "Unexpected source/return type pair.");
4142 : assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4143 : SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4144 : "Unexpected source value type.");
4145 : assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4146 : RetVT == MVT::i64) && "Unexpected return value type.");
4147 :
4148 27 : bool Is64Bit = (RetVT == MVT::i64);
4149 27 : unsigned RegSize = Is64Bit ? 64 : 32;
4150 27 : unsigned DstBits = RetVT.getSizeInBits();
4151 27 : unsigned SrcBits = SrcVT.getSizeInBits();
4152 : const TargetRegisterClass *RC =
4153 27 : Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4154 :
4155 : // Just emit a copy for "zero" shifts.
4156 27 : if (Shift == 0) {
4157 2 : if (RetVT == SrcVT) {
4158 1 : unsigned ResultReg = createResultReg(RC);
4159 2 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4160 2 : TII.get(TargetOpcode::COPY), ResultReg)
4161 1 : .addReg(Op0, getKillRegState(Op0IsKill));
4162 1 : return ResultReg;
4163 : } else
4164 1 : return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4165 : }
4166 :
4167 : // Don't deal with undefined shifts.
4168 25 : if (Shift >= DstBits)
4169 : return 0;
4170 :
4171 : // For immediate shifts we can fold the zero-/sign-extension into the shift.
4172 : // {S|U}BFM Wd, Wn, #r, #s
4173 : // Wd<s-r:0> = Wn<s:r> when r <= s
4174 :
4175 : // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4176 : // %2 = lshr i16 %1, 4
4177 : // Wd<7-4:0> = Wn<7:4>
4178 : // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4179 : // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4180 : // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4181 :
4182 : // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4183 : // %2 = lshr i16 %1, 8
4184 : // Wd<7-7,0> = Wn<7:7>
4185 : // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4186 : // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4187 : // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4188 :
4189 : // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4190 : // %2 = lshr i16 %1, 12
4191 : // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4192 : // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4193 : // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4194 : // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4195 :
4196 25 : if (Shift >= SrcBits && IsZExt)
4197 6 : return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4198 :
4199 : // It is not possible to fold a sign-extend into the LShr instruction. In this
4200 : // case emit a sign-extend.
4201 22 : if (!IsZExt) {
4202 4 : Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4203 4 : if (!Op0)
4204 : return 0;
4205 : Op0IsKill = true;
4206 4 : SrcVT = RetVT;
4207 4 : SrcBits = SrcVT.getSizeInBits();
4208 : IsZExt = true;
4209 : }
4210 :
4211 22 : unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4212 : unsigned ImmS = SrcBits - 1;
4213 : static const unsigned OpcTable[2][2] = {
4214 : {AArch64::SBFMWri, AArch64::SBFMXri},
4215 : {AArch64::UBFMWri, AArch64::UBFMXri}
4216 : };
4217 22 : unsigned Opc = OpcTable[IsZExt][Is64Bit];
4218 22 : if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4219 0 : unsigned TmpReg = MRI.createVirtualRegister(RC);
4220 0 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4221 0 : TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4222 : .addImm(0)
4223 0 : .addReg(Op0, getKillRegState(Op0IsKill))
4224 : .addImm(AArch64::sub_32);
4225 : Op0 = TmpReg;
4226 : Op0IsKill = true;
4227 : }
4228 22 : return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4229 : }
4230 :
4231 4 : unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4232 : unsigned Op1Reg, bool Op1IsKill) {
4233 : unsigned Opc = 0;
4234 : bool NeedTrunc = false;
4235 : uint64_t Mask = 0;
4236 : switch (RetVT.SimpleTy) {
4237 : default: return 0;
4238 : case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
4239 : case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4240 : case MVT::i32: Opc = AArch64::ASRVWr; break;
4241 : case MVT::i64: Opc = AArch64::ASRVXr; break;
4242 : }
4243 :
4244 : const TargetRegisterClass *RC =
4245 4 : (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4246 4 : if (NeedTrunc) {
4247 2 : Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
4248 2 : Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4249 : Op0IsKill = Op1IsKill = true;
4250 : }
4251 4 : unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4252 : Op1IsKill);
4253 4 : if (NeedTrunc)
4254 2 : ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4255 : return ResultReg;
4256 : }
4257 :
4258 28 : unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4259 : bool Op0IsKill, uint64_t Shift,
4260 : bool IsZExt) {
4261 : assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4262 : "Unexpected source/return type pair.");
4263 : assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4264 : SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4265 : "Unexpected source value type.");
4266 : assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4267 : RetVT == MVT::i64) && "Unexpected return value type.");
4268 :
4269 28 : bool Is64Bit = (RetVT == MVT::i64);
4270 28 : unsigned RegSize = Is64Bit ? 64 : 32;
4271 28 : unsigned DstBits = RetVT.getSizeInBits();
4272 28 : unsigned SrcBits = SrcVT.getSizeInBits();
4273 : const TargetRegisterClass *RC =
4274 28 : Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4275 :
4276 : // Just emit a copy for "zero" shifts.
4277 28 : if (Shift == 0) {
4278 2 : if (RetVT == SrcVT) {
4279 1 : unsigned ResultReg = createResultReg(RC);
4280 2 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4281 2 : TII.get(TargetOpcode::COPY), ResultReg)
4282 1 : .addReg(Op0, getKillRegState(Op0IsKill));
4283 1 : return ResultReg;
4284 : } else
4285 1 : return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4286 : }
4287 :
4288 : // Don't deal with undefined shifts.
4289 26 : if (Shift >= DstBits)
4290 : return 0;
4291 :
4292 : // For immediate shifts we can fold the zero-/sign-extension into the shift.
4293 : // {S|U}BFM Wd, Wn, #r, #s
4294 : // Wd<s-r:0> = Wn<s:r> when r <= s
4295 :
4296 : // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4297 : // %2 = ashr i16 %1, 4
4298 : // Wd<7-4:0> = Wn<7:4>
4299 : // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4300 : // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4301 : // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4302 :
4303 : // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4304 : // %2 = ashr i16 %1, 8
4305 : // Wd<7-7,0> = Wn<7:7>
4306 : // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4307 : // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4308 : // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4309 :
4310 : // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4311 : // %2 = ashr i16 %1, 12
4312 : // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4313 : // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4314 : // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4315 : // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4316 :
4317 26 : if (Shift >= SrcBits && IsZExt)
4318 6 : return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4319 :
4320 23 : unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4321 : unsigned ImmS = SrcBits - 1;
4322 : static const unsigned OpcTable[2][2] = {
4323 : {AArch64::SBFMWri, AArch64::SBFMXri},
4324 : {AArch64::UBFMWri, AArch64::UBFMXri}
4325 : };
4326 23 : unsigned Opc = OpcTable[IsZExt][Is64Bit];
4327 23 : if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4328 2 : unsigned TmpReg = MRI.createVirtualRegister(RC);
4329 1 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4330 2 : TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4331 : .addImm(0)
4332 1 : .addReg(Op0, getKillRegState(Op0IsKill))
4333 : .addImm(AArch64::sub_32);
4334 : Op0 = TmpReg;
4335 : Op0IsKill = true;
4336 : }
4337 23 : return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4338 : }
4339 :
4340 411 : unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4341 : bool IsZExt) {
4342 : assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4343 :
4344 : // FastISel does not have plumbing to deal with extensions where the SrcVT or
4345 : // DestVT are odd things, so test to make sure that they are both types we can
4346 : // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4347 : // bail out to SelectionDAG.
4348 410 : if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4349 821 : (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4350 270 : ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
4351 28 : (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
4352 : return 0;
4353 :
4354 : unsigned Opc;
4355 : unsigned Imm = 0;
4356 :
4357 411 : switch (SrcVT.SimpleTy) {
4358 : default:
4359 : return 0;
4360 141 : case MVT::i1:
4361 141 : return emiti1Ext(SrcReg, DestVT, IsZExt);
4362 : case MVT::i8:
4363 144 : if (DestVT == MVT::i64)
4364 15 : Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4365 : else
4366 129 : Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4367 : Imm = 7;
4368 : break;
4369 : case MVT::i16:
4370 98 : if (DestVT == MVT::i64)
4371 15 : Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4372 : else
4373 83 : Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4374 : Imm = 15;
4375 : break;
4376 28 : case MVT::i32:
4377 : assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4378 28 : Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4379 : Imm = 31;
4380 : break;
4381 : }
4382 :
4383 : // Handle i8 and i16 as i32.
4384 270 : if (DestVT == MVT::i8 || DestVT == MVT::i16)
4385 : DestVT = MVT::i32;
4386 267 : else if (DestVT == MVT::i64) {
4387 116 : unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4388 58 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4389 116 : TII.get(AArch64::SUBREG_TO_REG), Src64)
4390 : .addImm(0)
4391 58 : .addReg(SrcReg)
4392 : .addImm(AArch64::sub_32);
4393 : SrcReg = Src64;
4394 : }
4395 :
4396 : const TargetRegisterClass *RC =
4397 270 : (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4398 270 : return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
4399 : }
4400 :
4401 : static bool isZExtLoad(const MachineInstr *LI) {
4402 2 : switch (LI->getOpcode()) {
4403 : default:
4404 : return false;
4405 : case AArch64::LDURBBi:
4406 : case AArch64::LDURHHi:
4407 : case AArch64::LDURWi:
4408 : case AArch64::LDRBBui:
4409 : case AArch64::LDRHHui:
4410 : case AArch64::LDRWui:
4411 : case AArch64::LDRBBroX:
4412 : case AArch64::LDRHHroX:
4413 : case AArch64::LDRWroX:
4414 : case AArch64::LDRBBroW:
4415 : case AArch64::LDRHHroW:
4416 : case AArch64::LDRWroW:
4417 : return true;
4418 : }
4419 : }
4420 :
4421 : static bool isSExtLoad(const MachineInstr *LI) {
4422 0 : switch (LI->getOpcode()) {
4423 : default:
4424 : return false;
4425 : case AArch64::LDURSBWi:
4426 : case AArch64::LDURSHWi:
4427 : case AArch64::LDURSBXi:
4428 : case AArch64::LDURSHXi:
4429 : case AArch64::LDURSWi:
4430 : case AArch64::LDRSBWui:
4431 : case AArch64::LDRSHWui:
4432 : case AArch64::LDRSBXui:
4433 : case AArch64::LDRSHXui:
4434 : case AArch64::LDRSWui:
4435 : case AArch64::LDRSBWroX:
4436 : case AArch64::LDRSHWroX:
4437 : case AArch64::LDRSBXroX:
4438 : case AArch64::LDRSHXroX:
4439 : case AArch64::LDRSWroX:
4440 : case AArch64::LDRSBWroW:
4441 : case AArch64::LDRSHWroW:
4442 : case AArch64::LDRSBXroW:
4443 : case AArch64::LDRSHXroW:
4444 : case AArch64::LDRSWroW:
4445 : return true;
4446 : }
4447 : }
4448 :
4449 193 : bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4450 : MVT SrcVT) {
4451 193 : const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4452 85 : if (!LI || !LI->hasOneUse())
4453 : return false;
4454 :
4455 : // Check if the load instruction has already been selected.
4456 85 : unsigned Reg = lookUpRegForValue(LI);
4457 85 : if (!Reg)
4458 : return false;
4459 :
4460 1 : MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4461 1 : if (!MI)
4462 : return false;
4463 :
4464 : // Check if the correct load instruction has been emitted - SelectionDAG might
4465 : // have emitted a zero-extending load, but we need a sign-extending load.
4466 : bool IsZExt = isa<ZExtInst>(I);
4467 : const auto *LoadMI = MI;
4468 2 : if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4469 0 : LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4470 0 : unsigned LoadReg = MI->getOperand(1).getReg();
4471 0 : LoadMI = MRI.getUniqueVRegDef(LoadReg);
4472 : assert(LoadMI && "Expected valid instruction");
4473 : }
4474 1 : if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4475 : return false;
4476 :
4477 : // Nothing to be done.
4478 1 : if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4479 0 : updateValueMap(I, Reg);
4480 0 : return true;
4481 : }
4482 :
4483 1 : if (IsZExt) {
4484 1 : unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
4485 1 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4486 2 : TII.get(AArch64::SUBREG_TO_REG), Reg64)
4487 : .addImm(0)
4488 1 : .addReg(Reg, getKillRegState(true))
4489 : .addImm(AArch64::sub_32);
4490 : Reg = Reg64;
4491 : } else {
4492 : assert((MI->getOpcode() == TargetOpcode::COPY &&
4493 : MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4494 : "Expected copy instruction");
4495 0 : Reg = MI->getOperand(1).getReg();
4496 0 : MI->eraseFromParent();
4497 : }
4498 1 : updateValueMap(I, Reg);
4499 1 : return true;
4500 : }
4501 :
4502 200 : bool AArch64FastISel::selectIntExt(const Instruction *I) {
4503 : assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4504 : "Unexpected integer extend instruction.");
4505 200 : MVT RetVT;
4506 200 : MVT SrcVT;
4507 200 : if (!isTypeSupported(I->getType(), RetVT))
4508 : return false;
4509 :
4510 386 : if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4511 : return false;
4512 :
4513 : // Try to optimize already sign-/zero-extended values from load instructions.
4514 193 : if (optimizeIntExtLoad(I, RetVT, SrcVT))
4515 : return true;
4516 :
4517 384 : unsigned SrcReg = getRegForValue(I->getOperand(0));
4518 192 : if (!SrcReg)
4519 : return false;
4520 192 : bool SrcIsKill = hasTrivialKill(I->getOperand(0));
4521 :
4522 : // Try to optimize already sign-/zero-extended values from function arguments.
4523 : bool IsZExt = isa<ZExtInst>(I);
4524 : if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4525 78 : if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4526 66 : if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4527 13 : unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
4528 13 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4529 26 : TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4530 : .addImm(0)
4531 13 : .addReg(SrcReg, getKillRegState(SrcIsKill))
4532 : .addImm(AArch64::sub_32);
4533 : SrcReg = ResultReg;
4534 : }
4535 : // Conservatively clear all kill flags from all uses, because we are
4536 : // replacing a sign-/zero-extend instruction at IR level with a nop at MI
4537 : // level. The result of the instruction at IR level might have been
4538 : // trivially dead, which is now not longer true.
4539 66 : unsigned UseReg = lookUpRegForValue(I);
4540 66 : if (UseReg)
4541 66 : MRI.clearKillFlags(UseReg);
4542 :
4543 66 : updateValueMap(I, SrcReg);
4544 66 : return true;
4545 : }
4546 : }
4547 :
4548 126 : unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4549 126 : if (!ResultReg)
4550 : return false;
4551 :
4552 126 : updateValueMap(I, ResultReg);
4553 126 : return true;
4554 : }
4555 :
4556 8 : bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4557 8 : EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4558 8 : if (!DestEVT.isSimple())
4559 : return false;
4560 :
4561 : MVT DestVT = DestEVT.getSimpleVT();
4562 8 : if (DestVT != MVT::i64 && DestVT != MVT::i32)
4563 : return false;
4564 :
4565 : unsigned DivOpc;
4566 : bool Is64bit = (DestVT == MVT::i64);
4567 8 : switch (ISDOpcode) {
4568 : default:
4569 : return false;
4570 4 : case ISD::SREM:
4571 4 : DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4572 : break;
4573 4 : case ISD::UREM:
4574 4 : DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4575 : break;
4576 : }
4577 8 : unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4578 16 : unsigned Src0Reg = getRegForValue(I->getOperand(0));
4579 8 : if (!Src0Reg)
4580 : return false;
4581 8 : bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4582 :
4583 8 : unsigned Src1Reg = getRegForValue(I->getOperand(1));
4584 8 : if (!Src1Reg)
4585 : return false;
4586 8 : bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4587 :
4588 : const TargetRegisterClass *RC =
4589 8 : (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4590 8 : unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
4591 : Src1Reg, /*IsKill=*/false);
4592 : assert(QuotReg && "Unexpected DIV instruction emission failure.");
4593 : // The remainder is computed as numerator - (quotient * denominator) using the
4594 : // MSUB instruction.
4595 8 : unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
4596 : Src1Reg, Src1IsKill, Src0Reg,
4597 : Src0IsKill);
4598 8 : updateValueMap(I, ResultReg);
4599 8 : return true;
4600 : }
4601 :
4602 10 : bool AArch64FastISel::selectMul(const Instruction *I) {
4603 10 : MVT VT;
4604 10 : if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4605 : return false;
4606 :
4607 20 : if (VT.isVector())
4608 0 : return selectBinaryOp(I, ISD::MUL);
4609 :
4610 10 : const Value *Src0 = I->getOperand(0);
4611 : const Value *Src1 = I->getOperand(1);
4612 : if (const auto *C = dyn_cast<ConstantInt>(Src0))
4613 0 : if (C->getValue().isPowerOf2())
4614 : std::swap(Src0, Src1);
4615 :
4616 : // Try to simplify to a shift instruction.
4617 : if (const auto *C = dyn_cast<ConstantInt>(Src1))
4618 4 : if (C->getValue().isPowerOf2()) {
4619 2 : uint64_t ShiftVal = C->getValue().logBase2();
4620 2 : MVT SrcVT = VT;
4621 : bool IsZExt = true;
4622 : if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4623 0 : if (!isIntExtFree(ZExt)) {
4624 0 : MVT VT;
4625 0 : if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4626 0 : SrcVT = VT;
4627 : IsZExt = true;
4628 : Src0 = ZExt->getOperand(0);
4629 : }
4630 : }
4631 : } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4632 0 : if (!isIntExtFree(SExt)) {
4633 0 : MVT VT;
4634 0 : if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4635 0 : SrcVT = VT;
4636 : IsZExt = false;
4637 : Src0 = SExt->getOperand(0);
4638 : }
4639 : }
4640 : }
4641 :
4642 2 : unsigned Src0Reg = getRegForValue(Src0);
4643 2 : if (!Src0Reg)
4644 2 : return false;
4645 2 : bool Src0IsKill = hasTrivialKill(Src0);
4646 :
4647 : unsigned ResultReg =
4648 2 : emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt);
4649 :
4650 2 : if (ResultReg) {
4651 2 : updateValueMap(I, ResultReg);
4652 2 : return true;
4653 : }
4654 : }
4655 :
4656 16 : unsigned Src0Reg = getRegForValue(I->getOperand(0));
4657 8 : if (!Src0Reg)
4658 : return false;
4659 8 : bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4660 :
4661 8 : unsigned Src1Reg = getRegForValue(I->getOperand(1));
4662 8 : if (!Src1Reg)
4663 : return false;
4664 8 : bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4665 :
4666 8 : unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
4667 :
4668 8 : if (!ResultReg)
4669 : return false;
4670 :
4671 8 : updateValueMap(I, ResultReg);
4672 8 : return true;
4673 : }
4674 :
4675 102 : bool AArch64FastISel::selectShift(const Instruction *I) {
4676 102 : MVT RetVT;
4677 102 : if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4678 : return false;
4679 :
4680 204 : if (RetVT.isVector())
4681 0 : return selectOperator(I, I->getOpcode());
4682 :
4683 102 : if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4684 : unsigned ResultReg = 0;
4685 : uint64_t ShiftVal = C->getZExtValue();
4686 90 : MVT SrcVT = RetVT;
4687 90 : bool IsZExt = I->getOpcode() != Instruction::AShr;
4688 : const Value *Op0 = I->getOperand(0);
4689 : if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4690 22 : if (!isIntExtFree(ZExt)) {
4691 22 : MVT TmpVT;
4692 44 : if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4693 22 : SrcVT = TmpVT;
4694 : IsZExt = true;
4695 : Op0 = ZExt->getOperand(0);
4696 : }
4697 : }
4698 : } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4699 19 : if (!isIntExtFree(SExt)) {
4700 19 : MVT TmpVT;
4701 38 : if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4702 19 : SrcVT = TmpVT;
4703 : IsZExt = false;
4704 : Op0 = SExt->getOperand(0);
4705 : }
4706 : }
4707 : }
4708 :
4709 90 : unsigned Op0Reg = getRegForValue(Op0);
4710 90 : if (!Op0Reg)
4711 : return false;
4712 90 : bool Op0IsKill = hasTrivialKill(Op0);
4713 :
4714 90 : switch (I->getOpcode()) {
4715 0 : default: llvm_unreachable("Unexpected instruction.");
4716 48 : case Instruction::Shl:
4717 48 : ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4718 48 : break;
4719 18 : case Instruction::AShr:
4720 18 : ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4721 18 : break;
4722 24 : case Instruction::LShr:
4723 24 : ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4724 24 : break;
4725 : }
4726 90 : if (!ResultReg)
4727 : return false;
4728 :
4729 76 : updateValueMap(I, ResultReg);
4730 76 : return true;
4731 : }
4732 :
4733 24 : unsigned Op0Reg = getRegForValue(I->getOperand(0));
4734 12 : if (!Op0Reg)
4735 : return false;
4736 12 : bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4737 :
4738 12 : unsigned Op1Reg = getRegForValue(I->getOperand(1));
4739 12 : if (!Op1Reg)
4740 : return false;
4741 12 : bool Op1IsKill = hasTrivialKill(I->getOperand(1));
4742 :
4743 : unsigned ResultReg = 0;
4744 12 : switch (I->getOpcode()) {
4745 0 : default: llvm_unreachable("Unexpected instruction.");
4746 4 : case Instruction::Shl:
4747 4 : ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4748 4 : break;
4749 4 : case Instruction::AShr:
4750 4 : ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4751 4 : break;
4752 4 : case Instruction::LShr:
4753 4 : ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4754 4 : break;
4755 : }
4756 :
4757 12 : if (!ResultReg)
4758 : return false;
4759 :
4760 12 : updateValueMap(I, ResultReg);
4761 12 : return true;
4762 : }
4763 :
4764 23 : bool AArch64FastISel::selectBitCast(const Instruction *I) {
4765 23 : MVT RetVT, SrcVT;
4766 :
4767 46 : if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4768 : return false;
4769 23 : if (!isTypeLegal(I->getType(), RetVT))
4770 : return false;
4771 :
4772 : unsigned Opc;
4773 23 : if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4774 : Opc = AArch64::FMOVWSr;
4775 22 : else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4776 : Opc = AArch64::FMOVXDr;
4777 19 : else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4778 : Opc = AArch64::FMOVSWr;
4779 18 : else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4780 : Opc = AArch64::FMOVDXr;
4781 : else
4782 : return false;
4783 :
4784 : const TargetRegisterClass *RC = nullptr;
4785 8 : switch (RetVT.SimpleTy) {
4786 0 : default: llvm_unreachable("Unexpected value type.");
4787 : case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4788 3 : case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4789 1 : case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4790 3 : case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4791 : }
4792 16 : unsigned Op0Reg = getRegForValue(I->getOperand(0));
4793 8 : if (!Op0Reg)
4794 : return false;
4795 8 : bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4796 8 : unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
4797 :
4798 8 : if (!ResultReg)
4799 : return false;
4800 :
4801 8 : updateValueMap(I, ResultReg);
4802 8 : return true;
4803 : }
4804 :
4805 4 : bool AArch64FastISel::selectFRem(const Instruction *I) {
4806 4 : MVT RetVT;
4807 4 : if (!isTypeLegal(I->getType(), RetVT))
4808 : return false;
4809 :
4810 : RTLIB::Libcall LC;
4811 4 : switch (RetVT.SimpleTy) {
4812 : default:
4813 : return false;
4814 : case MVT::f32:
4815 : LC = RTLIB::REM_F32;
4816 : break;
4817 2 : case MVT::f64:
4818 : LC = RTLIB::REM_F64;
4819 2 : break;
4820 : }
4821 :
4822 : ArgListTy Args;
4823 4 : Args.reserve(I->getNumOperands());
4824 :
4825 : // Populate the argument list.
4826 16 : for (auto &Arg : I->operands()) {
4827 : ArgListEntry Entry;
4828 8 : Entry.Val = Arg;
4829 8 : Entry.Ty = Arg->getType();
4830 8 : Args.push_back(Entry);
4831 : }
4832 :
4833 8 : CallLoweringInfo CLI;
4834 4 : MCContext &Ctx = MF->getContext();
4835 : CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4836 12 : TLI.getLibcallName(LC), std::move(Args));
4837 4 : if (!lowerCallTo(CLI))
4838 : return false;
4839 4 : updateValueMap(I, CLI.ResultReg);
4840 4 : return true;
4841 : }
4842 :
4843 14 : bool AArch64FastISel::selectSDiv(const Instruction *I) {
4844 14 : MVT VT;
4845 14 : if (!isTypeLegal(I->getType(), VT))
4846 : return false;
4847 :
4848 28 : if (!isa<ConstantInt>(I->getOperand(1)))
4849 0 : return selectBinaryOp(I, ISD::SDIV);
4850 :
4851 : const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4852 28 : if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4853 26 : !(C.isPowerOf2() || (-C).isPowerOf2()))
4854 0 : return selectBinaryOp(I, ISD::SDIV);
4855 :
4856 14 : unsigned Lg2 = C.countTrailingZeros();
4857 28 : unsigned Src0Reg = getRegForValue(I->getOperand(0));
4858 14 : if (!Src0Reg)
4859 : return false;
4860 14 : bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4861 :
4862 14 : if (cast<BinaryOperator>(I)->isExact()) {
4863 3 : unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
4864 3 : if (!ResultReg)
4865 : return false;
4866 3 : updateValueMap(I, ResultReg);
4867 3 : return true;
4868 : }
4869 :
4870 11 : int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4871 11 : unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne);
4872 11 : if (!AddReg)
4873 : return false;
4874 :
4875 : // (Src0 < 0) ? Pow2 - 1 : 0;
4876 11 : if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
4877 : return false;
4878 :
4879 : unsigned SelectOpc;
4880 : const TargetRegisterClass *RC;
4881 11 : if (VT == MVT::i64) {
4882 : SelectOpc = AArch64::CSELXr;
4883 : RC = &AArch64::GPR64RegClass;
4884 : } else {
4885 : SelectOpc = AArch64::CSELWr;
4886 : RC = &AArch64::GPR32RegClass;
4887 : }
4888 : unsigned SelectReg =
4889 11 : fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
4890 : Src0IsKill, AArch64CC::LT);
4891 11 : if (!SelectReg)
4892 : return false;
4893 :
4894 : // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4895 : // negate the result.
4896 11 : unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4897 : unsigned ResultReg;
4898 11 : if (C.isNegative())
4899 4 : ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
4900 : SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
4901 : else
4902 7 : ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
4903 :
4904 11 : if (!ResultReg)
4905 : return false;
4906 :
4907 11 : updateValueMap(I, ResultReg);
4908 11 : return true;
4909 : }
4910 :
4911 : /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4912 : /// have to duplicate it for AArch64, because otherwise we would fail during the
4913 : /// sign-extend emission.
4914 3 : std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4915 3 : unsigned IdxN = getRegForValue(Idx);
4916 3 : if (IdxN == 0)
4917 : // Unhandled operand. Halt "fast" selection and bail.
4918 0 : return std::pair<unsigned, bool>(0, false);
4919 :
4920 3 : bool IdxNIsKill = hasTrivialKill(Idx);
4921 :
4922 : // If the index is smaller or larger than intptr_t, truncate or extend it.
4923 3 : MVT PtrVT = TLI.getPointerTy(DL);
4924 3 : EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4925 3 : if (IdxVT.bitsLT(PtrVT)) {
4926 1 : IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*IsZExt=*/false);
4927 : IdxNIsKill = true;
4928 2 : } else if (IdxVT.bitsGT(PtrVT))
4929 0 : llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4930 3 : return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
4931 : }
4932 :
4933 : /// This is mostly a copy of the existing FastISel GEP code, but we have to
4934 : /// duplicate it for AArch64, because otherwise we would bail out even for
4935 : /// simple cases. This is because the standard fastEmit functions don't cover
4936 : /// MUL at all and ADD is lowered very inefficientily.
4937 16 : bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4938 32 : unsigned N = getRegForValue(I->getOperand(0));
4939 16 : if (!N)
4940 : return false;
4941 16 : bool NIsKill = hasTrivialKill(I->getOperand(0));
4942 :
4943 : // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4944 : // into a single N = N + TotalOffset.
4945 : uint64_t TotalOffs = 0;
4946 16 : MVT VT = TLI.getPointerTy(DL);
4947 34 : for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
4948 52 : GTI != E; ++GTI) {
4949 : const Value *Idx = GTI.getOperand();
4950 2 : if (auto *StTy = GTI.getStructTypeOrNull()) {
4951 2 : unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4952 : // N = N + Offset
4953 2 : if (Field)
4954 2 : TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4955 : } else {
4956 16 : Type *Ty = GTI.getIndexedType();
4957 :
4958 : // If this is a constant subscript, handle it quickly.
4959 : if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4960 13 : if (CI->isZero())
4961 13 : continue;
4962 : // N = N + Offset
4963 11 : TotalOffs +=
4964 11 : DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
4965 11 : continue;
4966 : }
4967 3 : if (TotalOffs) {
4968 0 : N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
4969 0 : if (!N)
4970 0 : return false;
4971 : NIsKill = true;
4972 : TotalOffs = 0;
4973 : }
4974 :
4975 : // N = N + Idx * ElementSize;
4976 3 : uint64_t ElementSize = DL.getTypeAllocSize(Ty);
4977 3 : std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
4978 3 : unsigned IdxN = Pair.first;
4979 3 : bool IdxNIsKill = Pair.second;
4980 3 : if (!IdxN)
4981 : return false;
4982 :
4983 3 : if (ElementSize != 1) {
4984 3 : unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
4985 3 : if (!C)
4986 : return false;
4987 3 : IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true);
4988 3 : if (!IdxN)
4989 : return false;
4990 : IdxNIsKill = true;
4991 : }
4992 3 : N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
4993 3 : if (!N)
4994 : return false;
4995 : }
4996 : }
4997 16 : if (TotalOffs) {
4998 13 : N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
4999 13 : if (!N)
5000 : return false;
5001 : }
5002 16 : updateValueMap(I, N);
5003 16 : return true;
5004 : }
5005 :
5006 3 : bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
5007 : assert(TM.getOptLevel() == CodeGenOpt::None &&
5008 : "cmpxchg survived AtomicExpand at optlevel > -O0");
5009 :
5010 3 : auto *RetPairTy = cast<StructType>(I->getType());
5011 3 : Type *RetTy = RetPairTy->getTypeAtIndex(0U);
5012 : assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5013 : "cmpxchg has a non-i1 status result");
5014 :
5015 3 : MVT VT;
5016 3 : if (!isTypeLegal(RetTy, VT))
5017 : return false;
5018 :
5019 : const TargetRegisterClass *ResRC;
5020 : unsigned Opc, CmpOpc;
5021 : // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5022 : // extractvalue selection doesn't support that.
5023 3 : if (VT == MVT::i32) {
5024 : Opc = AArch64::CMP_SWAP_32;
5025 : CmpOpc = AArch64::SUBSWrs;
5026 : ResRC = &AArch64::GPR32RegClass;
5027 1 : } else if (VT == MVT::i64) {
5028 : Opc = AArch64::CMP_SWAP_64;
5029 : CmpOpc = AArch64::SUBSXrs;
5030 : ResRC = &AArch64::GPR64RegClass;
5031 : } else {
5032 : return false;
5033 : }
5034 :
5035 3 : const MCInstrDesc &II = TII.get(Opc);
5036 :
5037 3 : const unsigned AddrReg = constrainOperandRegClass(
5038 : II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
5039 3 : const unsigned DesiredReg = constrainOperandRegClass(
5040 3 : II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
5041 3 : const unsigned NewReg = constrainOperandRegClass(
5042 3 : II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
5043 :
5044 3 : const unsigned ResultReg1 = createResultReg(ResRC);
5045 3 : const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5046 3 : const unsigned ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5047 :
5048 : // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5049 3 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
5050 : .addDef(ResultReg1)
5051 : .addDef(ScratchReg)
5052 : .addUse(AddrReg)
5053 : .addUse(DesiredReg)
5054 : .addUse(NewReg);
5055 :
5056 6 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
5057 3 : .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5058 : .addUse(ResultReg1)
5059 : .addUse(DesiredReg)
5060 : .addImm(0);
5061 :
5062 6 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr))
5063 : .addDef(ResultReg2)
5064 : .addUse(AArch64::WZR)
5065 : .addUse(AArch64::WZR)
5066 : .addImm(AArch64CC::NE);
5067 :
5068 : assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5069 3 : updateValueMap(I, ResultReg1, 2);
5070 3 : return true;
5071 : }
5072 :
5073 3902 : bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5074 3902 : switch (I->getOpcode()) {
5075 : default:
5076 : break;
5077 284 : case Instruction::Add:
5078 : case Instruction::Sub:
5079 284 : return selectAddSub(I);
5080 10 : case Instruction::Mul:
5081 10 : return selectMul(I);
5082 14 : case Instruction::SDiv:
5083 14 : return selectSDiv(I);
5084 4 : case Instruction::SRem:
5085 4 : if (!selectBinaryOp(I, ISD::SREM))
5086 4 : return selectRem(I, ISD::SREM);
5087 : return true;
5088 4 : case Instruction::URem:
5089 4 : if (!selectBinaryOp(I, ISD::UREM))
5090 4 : return selectRem(I, ISD::UREM);
5091 : return true;
5092 102 : case Instruction::Shl:
5093 : case Instruction::LShr:
5094 : case Instruction::AShr:
5095 102 : return selectShift(I);
5096 89 : case Instruction::And:
5097 : case Instruction::Or:
5098 : case Instruction::Xor:
5099 89 : return selectLogicalOp(I);
5100 272 : case Instruction::Br:
5101 272 : return selectBranch(I);
5102 0 : case Instruction::IndirectBr:
5103 0 : return selectIndirectBr(I);
5104 52 : case Instruction::BitCast:
5105 52 : if (!FastISel::selectBitCast(I))
5106 23 : return selectBitCast(I);
5107 : return true;
5108 4 : case Instruction::FPToSI:
5109 4 : if (!selectCast(I, ISD::FP_TO_SINT))
5110 1 : return selectFPToInt(I, /*Signed=*/true);
5111 : return true;
5112 6 : case Instruction::FPToUI:
5113 6 : return selectFPToInt(I, /*Signed=*/false);
5114 200 : case Instruction::ZExt:
5115 : case Instruction::SExt:
5116 200 : return selectIntExt(I);
5117 26 : case Instruction::Trunc:
5118 26 : if (!selectCast(I, ISD::TRUNCATE))
5119 14 : return selectTrunc(I);
5120 : return true;
5121 5 : case Instruction::FPExt:
5122 5 : return selectFPExt(I);
5123 2 : case Instruction::FPTrunc:
5124 2 : return selectFPTrunc(I);
5125 17 : case Instruction::SIToFP:
5126 17 : if (!selectCast(I, ISD::SINT_TO_FP))
5127 9 : return selectIntToFP(I, /*Signed=*/true);
5128 : return true;
5129 13 : case Instruction::UIToFP:
5130 13 : return selectIntToFP(I, /*Signed=*/false);
5131 375 : case Instruction::Load:
5132 375 : return selectLoad(I);
5133 421 : case Instruction::Store:
5134 421 : return selectStore(I);
5135 57 : case Instruction::FCmp:
5136 : case Instruction::ICmp:
5137 57 : return selectCmp(I);
5138 53 : case Instruction::Select:
5139 53 : return selectSelect(I);
5140 1302 : case Instruction::Ret:
5141 1302 : return selectRet(I);
5142 4 : case Instruction::FRem:
5143 4 : return selectFRem(I);
5144 16 : case Instruction::GetElementPtr:
5145 16 : return selectGetElementPtr(I);
5146 : case Instruction::AtomicCmpXchg:
5147 3 : return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5148 : }
5149 :
5150 : // Silence warnings.
5151 : (void)&CC_AArch64_DarwinPCS_VarArg;
5152 : (void)&CC_AArch64_Win64_VarArg;
5153 :
5154 : // fall-back to target-independent instruction selection.
5155 567 : return selectOperator(I, I->getOpcode());
5156 : }
5157 :
5158 : namespace llvm {
5159 :
5160 1222 : FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
5161 : const TargetLibraryInfo *LibInfo) {
5162 1222 : return new AArch64FastISel(FuncInfo, LibInfo);
5163 : }
5164 :
5165 : } // end namespace llvm
|