LLVM 23.0.0git
AArch64FastISel.cpp
Go to the documentation of this file.
1//===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the AArch64-specific support for the FastISel class. Some
10// of the target-specific code is generated by tablegen in the file
11// AArch64GenFastISel.inc, which is #included here.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AArch64.h"
18#include "AArch64RegisterInfo.h"
20#include "AArch64Subtarget.h"
23#include "llvm/ADT/APFloat.h"
24#include "llvm/ADT/APInt.h"
25#include "llvm/ADT/DenseMap.h"
41#include "llvm/IR/Argument.h"
42#include "llvm/IR/Attributes.h"
43#include "llvm/IR/BasicBlock.h"
44#include "llvm/IR/CallingConv.h"
45#include "llvm/IR/Constant.h"
46#include "llvm/IR/Constants.h"
47#include "llvm/IR/DataLayout.h"
49#include "llvm/IR/Function.h"
51#include "llvm/IR/GlobalValue.h"
52#include "llvm/IR/InstrTypes.h"
53#include "llvm/IR/Instruction.h"
56#include "llvm/IR/Intrinsics.h"
57#include "llvm/IR/IntrinsicsAArch64.h"
58#include "llvm/IR/Module.h"
59#include "llvm/IR/Operator.h"
60#include "llvm/IR/Type.h"
61#include "llvm/IR/User.h"
62#include "llvm/IR/Value.h"
63#include "llvm/MC/MCInstrDesc.h"
64#include "llvm/MC/MCSymbol.h"
71#include <algorithm>
72#include <cassert>
73#include <cstdint>
74#include <iterator>
75#include <utility>
76
77using namespace llvm;
78
79namespace {
80
81class AArch64FastISel final : public FastISel {
82 class Address {
83 public:
84 enum BaseKind { RegBase, FrameIndexBase };
85
86 private:
87 BaseKind Kind = RegBase;
89 union {
90 unsigned Reg;
91 int FI;
92 } Base;
93 Register OffsetReg;
94 unsigned Shift = 0;
95 int64_t Offset = 0;
96 const GlobalValue *GV = nullptr;
97
98 public:
99 Address() { Base.Reg = 0; }
100
101 void setKind(BaseKind K) { Kind = K; }
102 BaseKind getKind() const { return Kind; }
103 void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
104 AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
105 bool isRegBase() const { return Kind == RegBase; }
106 bool isFIBase() const { return Kind == FrameIndexBase; }
107
108 void setReg(Register Reg) {
109 assert(isRegBase() && "Invalid base register access!");
110 Base.Reg = Reg.id();
111 }
112
113 Register getReg() const {
114 assert(isRegBase() && "Invalid base register access!");
115 return Base.Reg;
116 }
117
118 void setOffsetReg(Register Reg) { OffsetReg = Reg; }
119
120 Register getOffsetReg() const { return OffsetReg; }
121
122 void setFI(unsigned FI) {
123 assert(isFIBase() && "Invalid base frame index access!");
124 Base.FI = FI;
125 }
126
127 unsigned getFI() const {
128 assert(isFIBase() && "Invalid base frame index access!");
129 return Base.FI;
130 }
131
132 void setOffset(int64_t O) { Offset = O; }
133 int64_t getOffset() { return Offset; }
134 void setShift(unsigned S) { Shift = S; }
135 unsigned getShift() { return Shift; }
136
137 void setGlobalValue(const GlobalValue *G) { GV = G; }
138 const GlobalValue *getGlobalValue() { return GV; }
139 };
140
141 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
142 /// make the right decision when generating code for different targets.
143 const AArch64Subtarget *Subtarget;
144 LLVMContext *Context;
145
146 bool fastLowerArguments() override;
147 bool fastLowerCall(CallLoweringInfo &CLI) override;
148 bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
149
150private:
151 // Selection routines.
152 bool selectAddSub(const Instruction *I);
153 bool selectLogicalOp(const Instruction *I);
154 bool selectLoad(const Instruction *I);
155 bool selectStore(const Instruction *I);
156 bool selectBranch(const Instruction *I);
157 bool selectIndirectBr(const Instruction *I);
158 bool selectCmp(const Instruction *I);
159 bool selectSelect(const Instruction *I);
160 bool selectFPExt(const Instruction *I);
161 bool selectFPTrunc(const Instruction *I);
162 bool selectFPToInt(const Instruction *I, bool Signed);
163 bool selectIntToFP(const Instruction *I, bool Signed);
164 bool selectRem(const Instruction *I, unsigned ISDOpcode);
165 bool selectRet(const Instruction *I);
166 bool selectTrunc(const Instruction *I);
167 bool selectIntExt(const Instruction *I);
168 bool selectMul(const Instruction *I);
169 bool selectShift(const Instruction *I);
170 bool selectBitCast(const Instruction *I);
171 bool selectFRem(const Instruction *I);
172 bool selectSDiv(const Instruction *I);
173 bool selectGetElementPtr(const Instruction *I);
174 bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
175
176 // Utility helper routines.
177 bool isTypeLegal(Type *Ty, MVT &VT);
178 bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
179 bool isValueAvailable(const Value *V) const;
180 bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
181 bool computeCallAddress(const Value *V, Address &Addr);
182 bool simplifyAddress(Address &Addr, MVT VT);
183 void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
185 unsigned ScaleFactor, MachineMemOperand *MMO);
186 bool isMemCpySmall(uint64_t Len, MaybeAlign Alignment);
187 bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
188 MaybeAlign Alignment);
189 bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
190 const Value *Cond);
191 bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
192 bool optimizeSelect(const SelectInst *SI);
193 Register getRegForGEPIndex(const Value *Idx);
194
195 // Emit helper routines.
196 Register emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
197 const Value *RHS, bool SetFlags = false,
198 bool WantResult = true, bool IsZExt = false);
199 Register emitAddSub_rr(bool UseAdd, MVT RetVT, Register LHSReg,
200 Register RHSReg, bool SetFlags = false,
201 bool WantResult = true);
202 Register emitAddSub_ri(bool UseAdd, MVT RetVT, Register LHSReg, uint64_t Imm,
203 bool SetFlags = false, bool WantResult = true);
204 Register emitAddSub_rs(bool UseAdd, MVT RetVT, Register LHSReg,
205 Register RHSReg, AArch64_AM::ShiftExtendType ShiftType,
206 uint64_t ShiftImm, bool SetFlags = false,
207 bool WantResult = true);
208 Register emitAddSub_rx(bool UseAdd, MVT RetVT, Register LHSReg,
210 uint64_t ShiftImm, bool SetFlags = false,
211 bool WantResult = true);
212
213 // Emit functions.
214 bool emitCompareAndBranch(const CondBrInst *BI);
215 bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
216 bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
217 bool emitICmp_ri(MVT RetVT, Register LHSReg, uint64_t Imm);
218 bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
219 Register emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
220 MachineMemOperand *MMO = nullptr);
221 bool emitStore(MVT VT, Register SrcReg, Address Addr,
222 MachineMemOperand *MMO = nullptr);
223 bool emitStoreRelease(MVT VT, Register SrcReg, Register AddrReg,
224 MachineMemOperand *MMO = nullptr);
225 Register emitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT, bool isZExt);
226 Register emiti1Ext(Register SrcReg, MVT DestVT, bool isZExt);
227 Register emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
228 bool SetFlags = false, bool WantResult = true,
229 bool IsZExt = false);
230 Register emitAdd_ri_(MVT VT, Register Op0, int64_t Imm);
231 Register emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
232 bool SetFlags = false, bool WantResult = true,
233 bool IsZExt = false);
234 Register emitSubs_rr(MVT RetVT, Register LHSReg, Register RHSReg,
235 bool WantResult = true);
236 Register emitSubs_rs(MVT RetVT, Register LHSReg, Register RHSReg,
237 AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
238 bool WantResult = true);
239 Register emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
240 const Value *RHS);
241 Register emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, Register LHSReg,
242 uint64_t Imm);
243 Register emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, Register LHSReg,
244 Register RHSReg, uint64_t ShiftImm);
245 Register emitAnd_ri(MVT RetVT, Register LHSReg, uint64_t Imm);
246 Register emitMul_rr(MVT RetVT, Register Op0, Register Op1);
247 Register emitSMULL_rr(MVT RetVT, Register Op0, Register Op1);
248 Register emitUMULL_rr(MVT RetVT, Register Op0, Register Op1);
249 Register emitLSL_rr(MVT RetVT, Register Op0Reg, Register Op1Reg);
250 Register emitLSL_ri(MVT RetVT, MVT SrcVT, Register Op0Reg, uint64_t Imm,
251 bool IsZExt = true);
252 Register emitLSR_rr(MVT RetVT, Register Op0Reg, Register Op1Reg);
253 Register emitLSR_ri(MVT RetVT, MVT SrcVT, Register Op0Reg, uint64_t Imm,
254 bool IsZExt = true);
255 Register emitASR_rr(MVT RetVT, Register Op0Reg, Register Op1Reg);
256 Register emitASR_ri(MVT RetVT, MVT SrcVT, Register Op0Reg, uint64_t Imm,
257 bool IsZExt = false);
258
259 Register materializeInt(const ConstantInt *CI, MVT VT);
260 Register materializeFP(const ConstantFP *CFP, MVT VT);
261 Register materializeGV(const GlobalValue *GV);
262
263 // Call handling routines.
264private:
265 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
266 bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
267 SmallVectorImpl<Type *> &OrigTys, unsigned &NumBytes);
268 bool finishCall(CallLoweringInfo &CLI, unsigned NumBytes);
269
270public:
271 // Backend specific FastISel code.
272 Register fastMaterializeAlloca(const AllocaInst *AI) override;
273 Register fastMaterializeConstant(const Constant *C) override;
274 Register fastMaterializeFloatZero(const ConstantFP *CF) override;
275
276 explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
277 const TargetLibraryInfo *LibInfo,
278 const LibcallLoweringInfo *libcallLowering)
279 : FastISel(FuncInfo, LibInfo, libcallLowering,
280 /*SkipTargetIndependentISel=*/true) {
281 Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>();
282 Context = &FuncInfo.Fn->getContext();
283 }
284
285 bool fastSelectInstruction(const Instruction *I) override;
286
287#include "AArch64GenFastISel.inc"
288};
289
290} // end anonymous namespace
291
292/// Check if the sign-/zero-extend will be a noop.
293static bool isIntExtFree(const Instruction *I) {
295 "Unexpected integer extend instruction.");
296 assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
297 "Unexpected value type.");
298 bool IsZExt = isa<ZExtInst>(I);
299
300 if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
301 if (LI->hasOneUse())
302 return true;
303
304 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
305 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
306 return true;
307
308 return false;
309}
310
311/// Determine the implicit scale factor that is applied by a memory
312/// operation for a given value type.
313static unsigned getImplicitScaleFactor(MVT VT) {
314 switch (VT.SimpleTy) {
315 default:
316 return 0; // invalid
317 case MVT::i1: // fall-through
318 case MVT::i8:
319 return 1;
320 case MVT::i16:
321 return 2;
322 case MVT::i32: // fall-through
323 case MVT::f32:
324 return 4;
325 case MVT::i64: // fall-through
326 case MVT::f64:
327 return 8;
328 }
329}
330
331CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
332 if (CC == CallingConv::GHC)
333 return CC_AArch64_GHC;
334 if (CC == CallingConv::CFGuard_Check)
336 if (Subtarget->isTargetDarwin())
338 if (Subtarget->isTargetWindows())
339 return CC_AArch64_Win64PCS;
340 return CC_AArch64_AAPCS;
341}
342
343Register AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
344 assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
345 "Alloca should always return a pointer.");
346
347 // Don't handle dynamic allocas.
348 auto SI = FuncInfo.StaticAllocaMap.find(AI);
349 if (SI == FuncInfo.StaticAllocaMap.end())
350 return Register();
351
352 if (SI != FuncInfo.StaticAllocaMap.end()) {
353 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
354 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
355 ResultReg)
356 .addFrameIndex(SI->second)
357 .addImm(0)
358 .addImm(0);
359 return ResultReg;
360 }
361
362 return Register();
363}
364
365Register AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
366 if (VT > MVT::i64)
367 return Register();
368
369 if (!CI->isZero())
370 return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
371
372 // Create a copy from the zero register to materialize a "0" value.
373 const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
374 : &AArch64::GPR32RegClass;
375 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
376 Register ResultReg = createResultReg(RC);
377 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
378 ResultReg).addReg(ZeroReg, getKillRegState(true));
379 return ResultReg;
380}
381
382Register AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
383 // Positive zero (+0.0) has to be materialized with a fmov from the zero
384 // register, because the immediate version of fmov cannot encode zero.
385 if (CFP->isNullValue())
386 return fastMaterializeFloatZero(CFP);
387
388 if (VT != MVT::f32 && VT != MVT::f64)
389 return Register();
390
391 const APFloat Val = CFP->getValueAPF();
392 bool Is64Bit = (VT == MVT::f64);
393 // This checks to see if we can use FMOV instructions to materialize
394 // a constant, otherwise we have to materialize via the constant pool.
395 int Imm =
396 Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
397 if (Imm != -1) {
398 unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
399 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
400 }
401
402 // For the large code model materialize the FP constant in code.
403 if (TM.getCodeModel() == CodeModel::Large) {
404 unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
405 const TargetRegisterClass *RC = Is64Bit ?
406 &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
407
408 Register TmpReg = createResultReg(RC);
409 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc1), TmpReg)
410 .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
411
412 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
413 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
414 TII.get(TargetOpcode::COPY), ResultReg)
415 .addReg(TmpReg, getKillRegState(true));
416
417 return ResultReg;
418 }
419
420 // Materialize via constant pool. MachineConstantPool wants an explicit
421 // alignment.
422 Align Alignment = DL.getPrefTypeAlign(CFP->getType());
423
424 unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
425 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
426 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
428
429 unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
430 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
431 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
432 .addReg(ADRPReg)
434 return ResultReg;
435}
436
437Register AArch64FastISel::materializeGV(const GlobalValue *GV) {
438 // We can't handle thread-local variables quickly yet.
439 if (GV->isThreadLocal())
440 return Register();
441
442 // MachO still uses GOT for large code-model accesses, but ELF requires
443 // movz/movk sequences, which FastISel doesn't handle yet.
444 if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
445 return Register();
446
447 if (FuncInfo.MF->getInfo<AArch64FunctionInfo>()->hasELFSignedGOT())
448 return Register();
449
450 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
451
452 EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
453 if (!DestEVT.isSimple())
454 return Register();
455
456 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
457 Register ResultReg;
458
459 if (OpFlags & AArch64II::MO_GOT) {
460 // ADRP + LDRX
461 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
462 ADRPReg)
463 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
464
465 unsigned LdrOpc;
466 if (Subtarget->isTargetILP32()) {
467 ResultReg = createResultReg(&AArch64::GPR32RegClass);
468 LdrOpc = AArch64::LDRWui;
469 } else {
470 ResultReg = createResultReg(&AArch64::GPR64RegClass);
471 LdrOpc = AArch64::LDRXui;
472 }
473 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(LdrOpc),
474 ResultReg)
475 .addReg(ADRPReg)
477 AArch64II::MO_NC | OpFlags);
478 if (!Subtarget->isTargetILP32())
479 return ResultReg;
480
481 // LDRWui produces a 32-bit register, but pointers in-register are 64-bits
482 // so we must extend the result on ILP32.
483 Register Result64 = createResultReg(&AArch64::GPR64RegClass);
484 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
485 TII.get(TargetOpcode::SUBREG_TO_REG))
486 .addDef(Result64)
487 .addReg(ResultReg, RegState::Kill)
488 .addImm(AArch64::sub_32);
489 return Result64;
490 } else {
491 // ADRP + ADDX
492 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
493 ADRPReg)
494 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
495
496 if (OpFlags & AArch64II::MO_TAGGED) {
497 // MO_TAGGED on the page indicates a tagged address. Set the tag now.
498 // We do so by creating a MOVK that sets bits 48-63 of the register to
499 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
500 // the small code model so we can assume a binary size of <= 4GB, which
501 // makes the untagged PC relative offset positive. The binary must also be
502 // loaded into address range [0, 2^48). Both of these properties need to
503 // be ensured at runtime when using tagged addresses.
504 //
505 // TODO: There is duplicate logic in AArch64ExpandPseudoInsts.cpp that
506 // also uses BuildMI for making an ADRP (+ MOVK) + ADD, but the operands
507 // are not exactly 1:1 with FastISel so we cannot easily abstract this
508 // out. At some point, it would be nice to find a way to not have this
509 // duplicate code.
510 Register DstReg = createResultReg(&AArch64::GPR64commonRegClass);
511 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::MOVKXi),
512 DstReg)
513 .addReg(ADRPReg)
514 .addGlobalAddress(GV, /*Offset=*/0x100000000,
516 .addImm(48);
517 ADRPReg = DstReg;
518 }
519
520 ResultReg = createResultReg(&AArch64::GPR64spRegClass);
521 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
522 ResultReg)
523 .addReg(ADRPReg)
524 .addGlobalAddress(GV, 0,
526 .addImm(0);
527 }
528 return ResultReg;
529}
530
531Register AArch64FastISel::fastMaterializeConstant(const Constant *C) {
532 EVT CEVT = TLI.getValueType(DL, C->getType(), true);
533
534 // Only handle simple types.
535 if (!CEVT.isSimple())
536 return Register();
537 MVT VT = CEVT.getSimpleVT();
538 // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that,
539 // 'null' pointers need to have a somewhat special treatment.
541 assert(VT == MVT::i64 && "Expected 64-bit pointers");
542 return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT);
543 }
544
545 if (const auto *CI = dyn_cast<ConstantInt>(C))
546 return materializeInt(CI, VT);
547 else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
548 return materializeFP(CFP, VT);
549 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
550 return materializeGV(GV);
551
552 return Register();
553}
554
555Register AArch64FastISel::fastMaterializeFloatZero(const ConstantFP *CFP) {
556 assert(CFP->isNullValue() &&
557 "Floating-point constant is not a positive zero.");
558 MVT VT;
559 if (!isTypeLegal(CFP->getType(), VT))
560 return Register();
561
562 if (VT != MVT::f32 && VT != MVT::f64)
563 return Register();
564
565 bool Is64Bit = (VT == MVT::f64);
566 unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
567 unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
568 return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg);
569}
570
571/// Check if the multiply is by a power-of-2 constant.
572static bool isMulPowOf2(const Value *I) {
573 if (const auto *MI = dyn_cast<MulOperator>(I)) {
574 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
575 if (C->getValue().isPowerOf2())
576 return true;
577 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
578 if (C->getValue().isPowerOf2())
579 return true;
580 }
581 return false;
582}
583
584// Computes the address to get to an object.
585bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
586{
587 const User *U = nullptr;
588 unsigned Opcode = Instruction::UserOp1;
589 if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
590 // Don't walk into other basic blocks unless the object is an alloca from
591 // another block, otherwise it may not have a virtual register assigned.
592 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
593 FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB) {
594 Opcode = I->getOpcode();
595 U = I;
596 }
597 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
598 Opcode = C->getOpcode();
599 U = C;
600 }
601
602 if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
603 if (Ty->getAddressSpace() > 255)
604 // Fast instruction selection doesn't support the special
605 // address spaces.
606 return false;
607
608 switch (Opcode) {
609 default:
610 break;
611 case Instruction::BitCast:
612 // Look through bitcasts.
613 return computeAddress(U->getOperand(0), Addr, Ty);
614
615 case Instruction::IntToPtr:
616 // Look past no-op inttoptrs.
617 if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
618 TLI.getPointerTy(DL))
619 return computeAddress(U->getOperand(0), Addr, Ty);
620 break;
621
622 case Instruction::PtrToInt:
623 // Look past no-op ptrtoints.
624 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
625 return computeAddress(U->getOperand(0), Addr, Ty);
626 break;
627
628 case Instruction::GetElementPtr: {
629 Address SavedAddr = Addr;
630 uint64_t TmpOffset = Addr.getOffset();
631
632 // Iterate through the GEP folding the constants into offsets where
633 // we can.
635 GTI != E; ++GTI) {
636 const Value *Op = GTI.getOperand();
637 if (StructType *STy = GTI.getStructTypeOrNull()) {
638 const StructLayout *SL = DL.getStructLayout(STy);
639 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
640 TmpOffset += SL->getElementOffset(Idx);
641 } else {
642 uint64_t S = GTI.getSequentialElementStride(DL);
643 while (true) {
644 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
645 // Constant-offset addressing.
646 TmpOffset += CI->getSExtValue() * S;
647 break;
648 }
649 if (canFoldAddIntoGEP(U, Op)) {
650 // A compatible add with a constant operand. Fold the constant.
651 ConstantInt *CI =
652 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
653 TmpOffset += CI->getSExtValue() * S;
654 // Iterate on the other operand.
655 Op = cast<AddOperator>(Op)->getOperand(0);
656 continue;
657 }
658 // Unsupported
659 goto unsupported_gep;
660 }
661 }
662 }
663
664 // Try to grab the base operand now.
665 Addr.setOffset(TmpOffset);
666 if (computeAddress(U->getOperand(0), Addr, Ty))
667 return true;
668
669 // We failed, restore everything and try the other options.
670 Addr = SavedAddr;
671
672 unsupported_gep:
673 break;
674 }
675 case Instruction::Alloca: {
676 const AllocaInst *AI = cast<AllocaInst>(Obj);
677 DenseMap<const AllocaInst *, int>::iterator SI =
678 FuncInfo.StaticAllocaMap.find(AI);
679 if (SI != FuncInfo.StaticAllocaMap.end()) {
680 Addr.setKind(Address::FrameIndexBase);
681 Addr.setFI(SI->second);
682 return true;
683 }
684 break;
685 }
686 case Instruction::Add: {
687 // Adds of constants are common and easy enough.
688 const Value *LHS = U->getOperand(0);
689 const Value *RHS = U->getOperand(1);
690
692 std::swap(LHS, RHS);
693
694 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
695 Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
696 return computeAddress(LHS, Addr, Ty);
697 }
698
699 Address Backup = Addr;
700 if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
701 return true;
702 Addr = Backup;
703
704 break;
705 }
706 case Instruction::Sub: {
707 // Subs of constants are common and easy enough.
708 const Value *LHS = U->getOperand(0);
709 const Value *RHS = U->getOperand(1);
710
711 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
712 Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
713 return computeAddress(LHS, Addr, Ty);
714 }
715 break;
716 }
717 case Instruction::Shl: {
718 if (Addr.getOffsetReg())
719 break;
720
721 const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
722 if (!CI)
723 break;
724
725 unsigned Val = CI->getZExtValue();
726 if (Val < 1 || Val > 3)
727 break;
728
729 uint64_t NumBytes = 0;
730 if (Ty && Ty->isSized()) {
731 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
732 NumBytes = NumBits / 8;
733 if (!isPowerOf2_64(NumBits))
734 NumBytes = 0;
735 }
736
737 if (NumBytes != (1ULL << Val))
738 break;
739
740 Addr.setShift(Val);
741 Addr.setExtendType(AArch64_AM::LSL);
742
743 const Value *Src = U->getOperand(0);
744 if (const auto *I = dyn_cast<Instruction>(Src)) {
745 if (FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB) {
746 // Fold the zext or sext when it won't become a noop.
747 if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
748 if (!isIntExtFree(ZE) &&
749 ZE->getOperand(0)->getType()->isIntegerTy(32)) {
750 Addr.setExtendType(AArch64_AM::UXTW);
751 Src = ZE->getOperand(0);
752 }
753 } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
754 if (!isIntExtFree(SE) &&
755 SE->getOperand(0)->getType()->isIntegerTy(32)) {
756 Addr.setExtendType(AArch64_AM::SXTW);
757 Src = SE->getOperand(0);
758 }
759 }
760 }
761 }
762
763 if (const auto *AI = dyn_cast<BinaryOperator>(Src))
764 if (AI->getOpcode() == Instruction::And) {
765 const Value *LHS = AI->getOperand(0);
766 const Value *RHS = AI->getOperand(1);
767
768 if (const auto *C = dyn_cast<ConstantInt>(LHS))
769 if (C->getValue() == 0xffffffff)
770 std::swap(LHS, RHS);
771
772 if (const auto *C = dyn_cast<ConstantInt>(RHS))
773 if (C->getValue() == 0xffffffff) {
774 Addr.setExtendType(AArch64_AM::UXTW);
775 Register Reg = getRegForValue(LHS);
776 if (!Reg)
777 return false;
778 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
779 Addr.setOffsetReg(Reg);
780 return true;
781 }
782 }
783
784 Register Reg = getRegForValue(Src);
785 if (!Reg)
786 return false;
787 Addr.setOffsetReg(Reg);
788 return true;
789 }
790 case Instruction::Mul: {
791 if (Addr.getOffsetReg())
792 break;
793
794 if (!isMulPowOf2(U))
795 break;
796
797 const Value *LHS = U->getOperand(0);
798 const Value *RHS = U->getOperand(1);
799
800 // Canonicalize power-of-2 value to the RHS.
801 if (const auto *C = dyn_cast<ConstantInt>(LHS))
802 if (C->getValue().isPowerOf2())
803 std::swap(LHS, RHS);
804
805 assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
806 const auto *C = cast<ConstantInt>(RHS);
807 unsigned Val = C->getValue().logBase2();
808 if (Val < 1 || Val > 3)
809 break;
810
811 uint64_t NumBytes = 0;
812 if (Ty && Ty->isSized()) {
813 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
814 NumBytes = NumBits / 8;
815 if (!isPowerOf2_64(NumBits))
816 NumBytes = 0;
817 }
818
819 if (NumBytes != (1ULL << Val))
820 break;
821
822 Addr.setShift(Val);
823 Addr.setExtendType(AArch64_AM::LSL);
824
825 const Value *Src = LHS;
826 if (const auto *I = dyn_cast<Instruction>(Src)) {
827 if (FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB) {
828 // Fold the zext or sext when it won't become a noop.
829 if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
830 if (!isIntExtFree(ZE) &&
831 ZE->getOperand(0)->getType()->isIntegerTy(32)) {
832 Addr.setExtendType(AArch64_AM::UXTW);
833 Src = ZE->getOperand(0);
834 }
835 } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
836 if (!isIntExtFree(SE) &&
837 SE->getOperand(0)->getType()->isIntegerTy(32)) {
838 Addr.setExtendType(AArch64_AM::SXTW);
839 Src = SE->getOperand(0);
840 }
841 }
842 }
843 }
844
845 Register Reg = getRegForValue(Src);
846 if (!Reg)
847 return false;
848 Addr.setOffsetReg(Reg);
849 return true;
850 }
851 case Instruction::And: {
852 if (Addr.getOffsetReg())
853 break;
854
855 if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
856 break;
857
858 const Value *LHS = U->getOperand(0);
859 const Value *RHS = U->getOperand(1);
860
861 if (const auto *C = dyn_cast<ConstantInt>(LHS))
862 if (C->getValue() == 0xffffffff)
863 std::swap(LHS, RHS);
864
865 if (const auto *C = dyn_cast<ConstantInt>(RHS))
866 if (C->getValue() == 0xffffffff) {
867 Addr.setShift(0);
868 Addr.setExtendType(AArch64_AM::LSL);
869 Addr.setExtendType(AArch64_AM::UXTW);
870
871 Register Reg = getRegForValue(LHS);
872 if (!Reg)
873 return false;
874 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
875 Addr.setOffsetReg(Reg);
876 return true;
877 }
878 break;
879 }
880 case Instruction::SExt:
881 case Instruction::ZExt: {
882 if (!Addr.getReg() || Addr.getOffsetReg())
883 break;
884
885 const Value *Src = nullptr;
886 // Fold the zext or sext when it won't become a noop.
887 if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
888 if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
889 Addr.setExtendType(AArch64_AM::UXTW);
890 Src = ZE->getOperand(0);
891 }
892 } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
893 if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
894 Addr.setExtendType(AArch64_AM::SXTW);
895 Src = SE->getOperand(0);
896 }
897 }
898
899 if (!Src)
900 break;
901
902 Addr.setShift(0);
903 Register Reg = getRegForValue(Src);
904 if (!Reg)
905 return false;
906 Addr.setOffsetReg(Reg);
907 return true;
908 }
909 } // end switch
910
911 if (Addr.isRegBase() && !Addr.getReg()) {
912 Register Reg = getRegForValue(Obj);
913 if (!Reg)
914 return false;
915 Addr.setReg(Reg);
916 return true;
917 }
918
919 if (!Addr.getOffsetReg()) {
920 Register Reg = getRegForValue(Obj);
921 if (!Reg)
922 return false;
923 Addr.setOffsetReg(Reg);
924 return true;
925 }
926
927 return false;
928}
929
930bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
931 const User *U = nullptr;
932 unsigned Opcode = Instruction::UserOp1;
933 bool InMBB = true;
934
935 if (const auto *I = dyn_cast<Instruction>(V)) {
936 Opcode = I->getOpcode();
937 U = I;
938 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
939 } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
940 Opcode = C->getOpcode();
941 U = C;
942 }
943
944 switch (Opcode) {
945 default: break;
946 case Instruction::BitCast:
947 // Look past bitcasts if its operand is in the same BB.
948 if (InMBB)
949 return computeCallAddress(U->getOperand(0), Addr);
950 break;
951 case Instruction::IntToPtr:
952 // Look past no-op inttoptrs if its operand is in the same BB.
953 if (InMBB &&
954 TLI.getValueType(DL, U->getOperand(0)->getType()) ==
955 TLI.getPointerTy(DL))
956 return computeCallAddress(U->getOperand(0), Addr);
957 break;
958 case Instruction::PtrToInt:
959 // Look past no-op ptrtoints if its operand is in the same BB.
960 if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
961 return computeCallAddress(U->getOperand(0), Addr);
962 break;
963 }
964
965 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
966 Addr.setGlobalValue(GV);
967 return true;
968 }
969
970 // If all else fails, try to materialize the value in a register.
971 if (!Addr.getGlobalValue()) {
972 Addr.setReg(getRegForValue(V));
973 return Addr.getReg().isValid();
974 }
975
976 return false;
977}
978
979bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
980 EVT evt = TLI.getValueType(DL, Ty, true);
981
982 if (Subtarget->isTargetILP32() && Ty->isPointerTy())
983 return false;
984
985 // Only handle simple types.
986 if (evt == MVT::Other || !evt.isSimple())
987 return false;
988 VT = evt.getSimpleVT();
989
990 // This is a legal type, but it's not something we handle in fast-isel.
991 if (VT == MVT::f128)
992 return false;
993
994 // Handle all other legal types, i.e. a register that will directly hold this
995 // value.
996 return TLI.isTypeLegal(VT);
997}
998
999/// Determine if the value type is supported by FastISel.
1000///
1001/// FastISel for AArch64 can handle more value types than are legal. This adds
1002/// simple value type such as i1, i8, and i16.
1003bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
1004 if (Ty->isVectorTy() && !IsVectorAllowed)
1005 return false;
1006
1007 if (isTypeLegal(Ty, VT))
1008 return true;
1009
1010 // If this is a type than can be sign or zero-extended to a basic operation
1011 // go ahead and accept it now.
1012 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
1013 return true;
1014
1015 return false;
1016}
1017
1018bool AArch64FastISel::isValueAvailable(const Value *V) const {
1019 if (!isa<Instruction>(V))
1020 return true;
1021
1022 const auto *I = cast<Instruction>(V);
1023 return FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB;
1024}
1025
1026bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
1027 if (Subtarget->isTargetILP32())
1028 return false;
1029
1030 unsigned ScaleFactor = getImplicitScaleFactor(VT);
1031 if (!ScaleFactor)
1032 return false;
1033
1034 bool ImmediateOffsetNeedsLowering = false;
1035 bool RegisterOffsetNeedsLowering = false;
1036 int64_t Offset = Addr.getOffset();
1037 if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1038 ImmediateOffsetNeedsLowering = true;
1039 else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1040 !isUInt<12>(Offset / ScaleFactor))
1041 ImmediateOffsetNeedsLowering = true;
1042
1043 // Cannot encode an offset register and an immediate offset in the same
1044 // instruction. Fold the immediate offset into the load/store instruction and
1045 // emit an additional add to take care of the offset register.
1046 if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1047 RegisterOffsetNeedsLowering = true;
1048
1049 // Cannot encode zero register as base.
1050 if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1051 RegisterOffsetNeedsLowering = true;
1052
1053 // If this is a stack pointer and the offset needs to be simplified then put
1054 // the alloca address into a register, set the base type back to register and
1055 // continue. This should almost never happen.
1056 if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1057 {
1058 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1059 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
1060 ResultReg)
1061 .addFrameIndex(Addr.getFI())
1062 .addImm(0)
1063 .addImm(0);
1064 Addr.setKind(Address::RegBase);
1065 Addr.setReg(ResultReg);
1066 }
1067
1068 if (RegisterOffsetNeedsLowering) {
1069 Register ResultReg;
1070 if (Addr.getReg()) {
1071 if (Addr.getExtendType() == AArch64_AM::SXTW ||
1072 Addr.getExtendType() == AArch64_AM::UXTW )
1073 ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1074 Addr.getOffsetReg(), Addr.getExtendType(),
1075 Addr.getShift());
1076 else
1077 ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1078 Addr.getOffsetReg(), AArch64_AM::LSL,
1079 Addr.getShift());
1080 } else {
1081 if (Addr.getExtendType() == AArch64_AM::UXTW)
1082 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1083 Addr.getShift(), /*IsZExt=*/true);
1084 else if (Addr.getExtendType() == AArch64_AM::SXTW)
1085 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1086 Addr.getShift(), /*IsZExt=*/false);
1087 else
1088 ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1089 Addr.getShift());
1090 }
1091 if (!ResultReg)
1092 return false;
1093
1094 Addr.setReg(ResultReg);
1095 Addr.setOffsetReg(0);
1096 Addr.setShift(0);
1097 Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1098 }
1099
1100 // Since the offset is too large for the load/store instruction get the
1101 // reg+offset into a register.
1102 if (ImmediateOffsetNeedsLowering) {
1103 Register ResultReg;
1104 if (Addr.getReg())
1105 // Try to fold the immediate into the add instruction.
1106 ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset);
1107 else
1108 ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1109
1110 if (!ResultReg)
1111 return false;
1112 Addr.setReg(ResultReg);
1113 Addr.setOffset(0);
1114 }
1115 return true;
1116}
1117
1118void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1119 const MachineInstrBuilder &MIB,
1121 unsigned ScaleFactor,
1122 MachineMemOperand *MMO) {
1123 int64_t Offset = Addr.getOffset() / ScaleFactor;
1124 // Frame base works a bit differently. Handle it separately.
1125 if (Addr.isFIBase()) {
1126 int FI = Addr.getFI();
1127 // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
1128 // and alignment should be based on the VT.
1129 MMO = FuncInfo.MF->getMachineMemOperand(
1130 MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1131 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
1132 // Now add the rest of the operands.
1133 MIB.addFrameIndex(FI).addImm(Offset);
1134 } else {
1135 assert(Addr.isRegBase() && "Unexpected address kind.");
1136 const MCInstrDesc &II = MIB->getDesc();
1137 unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1138 Addr.setReg(
1139 constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1140 Addr.setOffsetReg(
1141 constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1142 if (Addr.getOffsetReg()) {
1143 assert(Addr.getOffset() == 0 && "Unexpected offset");
1144 bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1145 Addr.getExtendType() == AArch64_AM::SXTX;
1146 MIB.addReg(Addr.getReg());
1147 MIB.addReg(Addr.getOffsetReg());
1148 MIB.addImm(IsSigned);
1149 MIB.addImm(Addr.getShift() != 0);
1150 } else
1151 MIB.addReg(Addr.getReg()).addImm(Offset);
1152 }
1153
1154 if (MMO)
1155 MIB.addMemOperand(MMO);
1156}
1157
1158Register AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1159 const Value *RHS, bool SetFlags,
1160 bool WantResult, bool IsZExt) {
1162 bool NeedExtend = false;
1163 switch (RetVT.SimpleTy) {
1164 default:
1165 return Register();
1166 case MVT::i1:
1167 NeedExtend = true;
1168 break;
1169 case MVT::i8:
1170 NeedExtend = true;
1171 ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1172 break;
1173 case MVT::i16:
1174 NeedExtend = true;
1175 ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1176 break;
1177 case MVT::i32: // fall-through
1178 case MVT::i64:
1179 break;
1180 }
1181 MVT SrcVT = RetVT;
1182 RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1183
1184 // Canonicalize immediates to the RHS first.
1185 if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1186 std::swap(LHS, RHS);
1187
1188 // Canonicalize mul by power of 2 to the RHS.
1189 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1190 if (isMulPowOf2(LHS))
1191 std::swap(LHS, RHS);
1192
1193 // Canonicalize shift immediate to the RHS.
1194 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1195 if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1196 if (isa<ConstantInt>(SI->getOperand(1)))
1197 if (SI->getOpcode() == Instruction::Shl ||
1198 SI->getOpcode() == Instruction::LShr ||
1199 SI->getOpcode() == Instruction::AShr )
1200 std::swap(LHS, RHS);
1201
1202 Register LHSReg = getRegForValue(LHS);
1203 if (!LHSReg)
1204 return Register();
1205
1206 if (NeedExtend)
1207 LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1208
1209 Register ResultReg;
1210 if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1211 uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1212 if (C->isNegative())
1213 ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags,
1214 WantResult);
1215 else
1216 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags,
1217 WantResult);
1218 } else if (const auto *C = dyn_cast<Constant>(RHS))
1219 if (C->isNullValue())
1220 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult);
1221
1222 if (ResultReg)
1223 return ResultReg;
1224
1225 // Only extend the RHS within the instruction if there is a valid extend type.
1226 if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1227 isValueAvailable(RHS)) {
1228 Register RHSReg = getRegForValue(RHS);
1229 if (!RHSReg)
1230 return Register();
1231 return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0,
1232 SetFlags, WantResult);
1233 }
1234
1235 // Check if the mul can be folded into the instruction.
1236 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1237 if (isMulPowOf2(RHS)) {
1238 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1239 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1240
1241 if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1242 if (C->getValue().isPowerOf2())
1243 std::swap(MulLHS, MulRHS);
1244
1245 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1246 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1247 Register RHSReg = getRegForValue(MulLHS);
1248 if (!RHSReg)
1249 return Register();
1250 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL,
1251 ShiftVal, SetFlags, WantResult);
1252 if (ResultReg)
1253 return ResultReg;
1254 }
1255 }
1256
1257 // Check if the shift can be folded into the instruction.
1258 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1259 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1260 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1262 switch (SI->getOpcode()) {
1263 default: break;
1264 case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
1265 case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1266 case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1267 }
1268 uint64_t ShiftVal = C->getZExtValue();
1269 if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1270 Register RHSReg = getRegForValue(SI->getOperand(0));
1271 if (!RHSReg)
1272 return Register();
1273 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType,
1274 ShiftVal, SetFlags, WantResult);
1275 if (ResultReg)
1276 return ResultReg;
1277 }
1278 }
1279 }
1280 }
1281
1282 Register RHSReg = getRegForValue(RHS);
1283 if (!RHSReg)
1284 return Register();
1285
1286 if (NeedExtend)
1287 RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1288
1289 return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult);
1290}
1291
1292Register AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, Register LHSReg,
1293 Register RHSReg, bool SetFlags,
1294 bool WantResult) {
1295 assert(LHSReg && RHSReg && "Invalid register number.");
1296
1297 if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1298 RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1299 return Register();
1300
1301 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1302 return Register();
1303
1304 static const unsigned OpcTable[2][2][2] = {
1305 { { AArch64::SUBWrr, AArch64::SUBXrr },
1306 { AArch64::ADDWrr, AArch64::ADDXrr } },
1307 { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1308 { AArch64::ADDSWrr, AArch64::ADDSXrr } }
1309 };
1310 bool Is64Bit = RetVT == MVT::i64;
1311 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1312 const TargetRegisterClass *RC =
1313 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1314 Register ResultReg;
1315 if (WantResult)
1316 ResultReg = createResultReg(RC);
1317 else
1318 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1319
1320 const MCInstrDesc &II = TII.get(Opc);
1321 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1322 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1323 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1324 .addReg(LHSReg)
1325 .addReg(RHSReg);
1326 return ResultReg;
1327}
1328
1329Register AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, Register LHSReg,
1330 uint64_t Imm, bool SetFlags,
1331 bool WantResult) {
1332 assert(LHSReg && "Invalid register number.");
1333
1334 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1335 return Register();
1336
1337 unsigned ShiftImm;
1338 if (isUInt<12>(Imm))
1339 ShiftImm = 0;
1340 else if ((Imm & 0xfff000) == Imm) {
1341 ShiftImm = 12;
1342 Imm >>= 12;
1343 } else
1344 return Register();
1345
1346 static const unsigned OpcTable[2][2][2] = {
1347 { { AArch64::SUBWri, AArch64::SUBXri },
1348 { AArch64::ADDWri, AArch64::ADDXri } },
1349 { { AArch64::SUBSWri, AArch64::SUBSXri },
1350 { AArch64::ADDSWri, AArch64::ADDSXri } }
1351 };
1352 bool Is64Bit = RetVT == MVT::i64;
1353 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1354 const TargetRegisterClass *RC;
1355 if (SetFlags)
1356 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1357 else
1358 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1359 Register ResultReg;
1360 if (WantResult)
1361 ResultReg = createResultReg(RC);
1362 else
1363 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1364
1365 const MCInstrDesc &II = TII.get(Opc);
1366 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1367 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1368 .addReg(LHSReg)
1369 .addImm(Imm)
1370 .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1371 return ResultReg;
1372}
1373
1374Register AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, Register LHSReg,
1375 Register RHSReg,
1377 uint64_t ShiftImm, bool SetFlags,
1378 bool WantResult) {
1379 assert(LHSReg && RHSReg && "Invalid register number.");
1380 assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1381 RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1382
1383 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1384 return Register();
1385
1386 // Don't deal with undefined shifts.
1387 if (ShiftImm >= RetVT.getSizeInBits())
1388 return Register();
1389
1390 static const unsigned OpcTable[2][2][2] = {
1391 { { AArch64::SUBWrs, AArch64::SUBXrs },
1392 { AArch64::ADDWrs, AArch64::ADDXrs } },
1393 { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1394 { AArch64::ADDSWrs, AArch64::ADDSXrs } }
1395 };
1396 bool Is64Bit = RetVT == MVT::i64;
1397 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1398 const TargetRegisterClass *RC =
1399 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1400 Register ResultReg;
1401 if (WantResult)
1402 ResultReg = createResultReg(RC);
1403 else
1404 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1405
1406 const MCInstrDesc &II = TII.get(Opc);
1407 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1408 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1409 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1410 .addReg(LHSReg)
1411 .addReg(RHSReg)
1412 .addImm(getShifterImm(ShiftType, ShiftImm));
1413 return ResultReg;
1414}
1415
1416Register AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, Register LHSReg,
1417 Register RHSReg,
1419 uint64_t ShiftImm, bool SetFlags,
1420 bool WantResult) {
1421 assert(LHSReg && RHSReg && "Invalid register number.");
1422 assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1423 RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1424
1425 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1426 return Register();
1427
1428 if (ShiftImm >= 4)
1429 return Register();
1430
1431 static const unsigned OpcTable[2][2][2] = {
1432 { { AArch64::SUBWrx, AArch64::SUBXrx },
1433 { AArch64::ADDWrx, AArch64::ADDXrx } },
1434 { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1435 { AArch64::ADDSWrx, AArch64::ADDSXrx } }
1436 };
1437 bool Is64Bit = RetVT == MVT::i64;
1438 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1439 const TargetRegisterClass *RC = nullptr;
1440 if (SetFlags)
1441 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1442 else
1443 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1444 Register ResultReg;
1445 if (WantResult)
1446 ResultReg = createResultReg(RC);
1447 else
1448 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1449
1450 const MCInstrDesc &II = TII.get(Opc);
1451 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1452 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1453 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1454 .addReg(LHSReg)
1455 .addReg(RHSReg)
1456 .addImm(getArithExtendImm(ExtType, ShiftImm));
1457 return ResultReg;
1458}
1459
1460bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1461 Type *Ty = LHS->getType();
1462 EVT EVT = TLI.getValueType(DL, Ty, true);
1463 if (!EVT.isSimple())
1464 return false;
1465 MVT VT = EVT.getSimpleVT();
1466
1467 switch (VT.SimpleTy) {
1468 default:
1469 return false;
1470 case MVT::i1:
1471 case MVT::i8:
1472 case MVT::i16:
1473 case MVT::i32:
1474 case MVT::i64:
1475 return emitICmp(VT, LHS, RHS, IsZExt);
1476 case MVT::f32:
1477 case MVT::f64:
1478 return emitFCmp(VT, LHS, RHS);
1479 }
1480}
1481
1482bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1483 bool IsZExt) {
1484 return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1485 IsZExt)
1486 .isValid();
1487}
1488
1489bool AArch64FastISel::emitICmp_ri(MVT RetVT, Register LHSReg, uint64_t Imm) {
1490 return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm,
1491 /*SetFlags=*/true, /*WantResult=*/false)
1492 .isValid();
1493}
1494
1495bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1496 if (RetVT != MVT::f32 && RetVT != MVT::f64)
1497 return false;
1498
1499 // Check to see if the 2nd operand is a constant that we can encode directly
1500 // in the compare.
1501 bool UseImm = false;
1502 if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1503 if (CFP->isZero() && !CFP->isNegative())
1504 UseImm = true;
1505
1506 Register LHSReg = getRegForValue(LHS);
1507 if (!LHSReg)
1508 return false;
1509
1510 if (UseImm) {
1511 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1512 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1513 .addReg(LHSReg);
1514 return true;
1515 }
1516
1517 Register RHSReg = getRegForValue(RHS);
1518 if (!RHSReg)
1519 return false;
1520
1521 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1522 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1523 .addReg(LHSReg)
1524 .addReg(RHSReg);
1525 return true;
1526}
1527
1528Register AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1529 bool SetFlags, bool WantResult, bool IsZExt) {
1530 return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1531 IsZExt);
1532}
1533
1534/// This method is a wrapper to simplify add emission.
1535///
1536/// First try to emit an add with an immediate operand using emitAddSub_ri. If
1537/// that fails, then try to materialize the immediate into a register and use
1538/// emitAddSub_rr instead.
1539Register AArch64FastISel::emitAdd_ri_(MVT VT, Register Op0, int64_t Imm) {
1540 Register ResultReg;
1541 if (Imm < 0)
1542 ResultReg = emitAddSub_ri(false, VT, Op0, -Imm);
1543 else
1544 ResultReg = emitAddSub_ri(true, VT, Op0, Imm);
1545
1546 if (ResultReg)
1547 return ResultReg;
1548
1549 Register CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1550 if (!CReg)
1551 return Register();
1552
1553 ResultReg = emitAddSub_rr(true, VT, Op0, CReg);
1554 return ResultReg;
1555}
1556
1557Register AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1558 bool SetFlags, bool WantResult, bool IsZExt) {
1559 return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1560 IsZExt);
1561}
1562
1563Register AArch64FastISel::emitSubs_rr(MVT RetVT, Register LHSReg,
1564 Register RHSReg, bool WantResult) {
1565 return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg,
1566 /*SetFlags=*/true, WantResult);
1567}
1568
1569Register AArch64FastISel::emitSubs_rs(MVT RetVT, Register LHSReg,
1570 Register RHSReg,
1572 uint64_t ShiftImm, bool WantResult) {
1573 return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType,
1574 ShiftImm, /*SetFlags=*/true, WantResult);
1575}
1576
1577Register AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1578 const Value *LHS, const Value *RHS) {
1579 // Canonicalize immediates to the RHS first.
1581 std::swap(LHS, RHS);
1582
1583 // Canonicalize mul by power-of-2 to the RHS.
1584 if (LHS->hasOneUse() && isValueAvailable(LHS))
1585 if (isMulPowOf2(LHS))
1586 std::swap(LHS, RHS);
1587
1588 // Canonicalize shift immediate to the RHS.
1589 if (LHS->hasOneUse() && isValueAvailable(LHS))
1590 if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1591 if (isa<ConstantInt>(SI->getOperand(1)))
1592 std::swap(LHS, RHS);
1593
1594 Register LHSReg = getRegForValue(LHS);
1595 if (!LHSReg)
1596 return Register();
1597
1598 Register ResultReg;
1599 if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1600 uint64_t Imm = C->getZExtValue();
1601 ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm);
1602 }
1603 if (ResultReg)
1604 return ResultReg;
1605
1606 // Check if the mul can be folded into the instruction.
1607 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1608 if (isMulPowOf2(RHS)) {
1609 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1610 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1611
1612 if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1613 if (C->getValue().isPowerOf2())
1614 std::swap(MulLHS, MulRHS);
1615
1616 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1617 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1618
1619 Register RHSReg = getRegForValue(MulLHS);
1620 if (!RHSReg)
1621 return Register();
1622 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1623 if (ResultReg)
1624 return ResultReg;
1625 }
1626 }
1627
1628 // Check if the shift can be folded into the instruction.
1629 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1630 if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1631 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1632 uint64_t ShiftVal = C->getZExtValue();
1633 Register RHSReg = getRegForValue(SI->getOperand(0));
1634 if (!RHSReg)
1635 return Register();
1636 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1637 if (ResultReg)
1638 return ResultReg;
1639 }
1640 }
1641
1642 Register RHSReg = getRegForValue(RHS);
1643 if (!RHSReg)
1644 return Register();
1645
1646 MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1647 ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg);
1648 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1649 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1650 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1651 }
1652 return ResultReg;
1653}
1654
1655Register AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1656 Register LHSReg, uint64_t Imm) {
1657 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1658 "ISD nodes are not consecutive!");
1659 static const unsigned OpcTable[3][2] = {
1660 { AArch64::ANDWri, AArch64::ANDXri },
1661 { AArch64::ORRWri, AArch64::ORRXri },
1662 { AArch64::EORWri, AArch64::EORXri }
1663 };
1664 const TargetRegisterClass *RC;
1665 unsigned Opc;
1666 unsigned RegSize;
1667 switch (RetVT.SimpleTy) {
1668 default:
1669 return Register();
1670 case MVT::i1:
1671 case MVT::i8:
1672 case MVT::i16:
1673 case MVT::i32: {
1674 unsigned Idx = ISDOpc - ISD::AND;
1675 Opc = OpcTable[Idx][0];
1676 RC = &AArch64::GPR32spRegClass;
1677 RegSize = 32;
1678 break;
1679 }
1680 case MVT::i64:
1681 Opc = OpcTable[ISDOpc - ISD::AND][1];
1682 RC = &AArch64::GPR64spRegClass;
1683 RegSize = 64;
1684 break;
1685 }
1686
1688 return Register();
1689
1690 Register ResultReg =
1691 fastEmitInst_ri(Opc, RC, LHSReg,
1693 if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1694 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1695 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1696 }
1697 return ResultReg;
1698}
1699
1700Register AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1701 Register LHSReg, Register RHSReg,
1702 uint64_t ShiftImm) {
1703 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1704 "ISD nodes are not consecutive!");
1705 static const unsigned OpcTable[3][2] = {
1706 { AArch64::ANDWrs, AArch64::ANDXrs },
1707 { AArch64::ORRWrs, AArch64::ORRXrs },
1708 { AArch64::EORWrs, AArch64::EORXrs }
1709 };
1710
1711 // Don't deal with undefined shifts.
1712 if (ShiftImm >= RetVT.getSizeInBits())
1713 return Register();
1714
1715 const TargetRegisterClass *RC;
1716 unsigned Opc;
1717 switch (RetVT.SimpleTy) {
1718 default:
1719 return Register();
1720 case MVT::i1:
1721 case MVT::i8:
1722 case MVT::i16:
1723 case MVT::i32:
1724 Opc = OpcTable[ISDOpc - ISD::AND][0];
1725 RC = &AArch64::GPR32RegClass;
1726 break;
1727 case MVT::i64:
1728 Opc = OpcTable[ISDOpc - ISD::AND][1];
1729 RC = &AArch64::GPR64RegClass;
1730 break;
1731 }
1732 Register ResultReg =
1733 fastEmitInst_rri(Opc, RC, LHSReg, RHSReg,
1735 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1736 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1737 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1738 }
1739 return ResultReg;
1740}
1741
1742Register AArch64FastISel::emitAnd_ri(MVT RetVT, Register LHSReg, uint64_t Imm) {
1743 return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm);
1744}
1745
1746Register AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1747 bool WantZExt, MachineMemOperand *MMO) {
1748 if (!TLI.allowsMisalignedMemoryAccesses(VT))
1749 return Register();
1750
1751 // Simplify this down to something we can handle.
1752 if (!simplifyAddress(Addr, VT))
1753 return Register();
1754
1755 unsigned ScaleFactor = getImplicitScaleFactor(VT);
1756 if (!ScaleFactor)
1757 llvm_unreachable("Unexpected value type.");
1758
1759 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1760 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1761 bool UseScaled = true;
1762 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1763 UseScaled = false;
1764 ScaleFactor = 1;
1765 }
1766
1767 static const unsigned GPOpcTable[2][8][4] = {
1768 // Sign-extend.
1769 { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1770 AArch64::LDURXi },
1771 { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
1772 AArch64::LDURXi },
1773 { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
1774 AArch64::LDRXui },
1775 { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1776 AArch64::LDRXui },
1777 { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1778 AArch64::LDRXroX },
1779 { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1780 AArch64::LDRXroX },
1781 { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1782 AArch64::LDRXroW },
1783 { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1784 AArch64::LDRXroW }
1785 },
1786 // Zero-extend.
1787 { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1788 AArch64::LDURXi },
1789 { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1790 AArch64::LDURXi },
1791 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1792 AArch64::LDRXui },
1793 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1794 AArch64::LDRXui },
1795 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1796 AArch64::LDRXroX },
1797 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1798 AArch64::LDRXroX },
1799 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1800 AArch64::LDRXroW },
1801 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1802 AArch64::LDRXroW }
1803 }
1804 };
1805
1806 static const unsigned FPOpcTable[4][2] = {
1807 { AArch64::LDURSi, AArch64::LDURDi },
1808 { AArch64::LDRSui, AArch64::LDRDui },
1809 { AArch64::LDRSroX, AArch64::LDRDroX },
1810 { AArch64::LDRSroW, AArch64::LDRDroW }
1811 };
1812
1813 unsigned Opc;
1814 const TargetRegisterClass *RC;
1815 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1816 Addr.getOffsetReg();
1817 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1818 if (Addr.getExtendType() == AArch64_AM::UXTW ||
1819 Addr.getExtendType() == AArch64_AM::SXTW)
1820 Idx++;
1821
1822 bool IsRet64Bit = RetVT == MVT::i64;
1823 switch (VT.SimpleTy) {
1824 default:
1825 llvm_unreachable("Unexpected value type.");
1826 case MVT::i1: // Intentional fall-through.
1827 case MVT::i8:
1828 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1829 RC = (IsRet64Bit && !WantZExt) ?
1830 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1831 break;
1832 case MVT::i16:
1833 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1834 RC = (IsRet64Bit && !WantZExt) ?
1835 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1836 break;
1837 case MVT::i32:
1838 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1839 RC = (IsRet64Bit && !WantZExt) ?
1840 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1841 break;
1842 case MVT::i64:
1843 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1844 RC = &AArch64::GPR64RegClass;
1845 break;
1846 case MVT::f32:
1847 Opc = FPOpcTable[Idx][0];
1848 RC = &AArch64::FPR32RegClass;
1849 break;
1850 case MVT::f64:
1851 Opc = FPOpcTable[Idx][1];
1852 RC = &AArch64::FPR64RegClass;
1853 break;
1854 }
1855
1856 // Create the base instruction, then add the operands.
1857 Register ResultReg = createResultReg(RC);
1858 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1859 TII.get(Opc), ResultReg);
1860 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1861
1862 // Loading an i1 requires special handling.
1863 if (VT == MVT::i1) {
1864 Register ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1);
1865 assert(ANDReg && "Unexpected AND instruction emission failure.");
1866 ResultReg = ANDReg;
1867 }
1868
1869 // For zero-extending loads to 64bit we emit a 32bit load and then convert
1870 // the 32bit reg to a 64bit reg.
1871 if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1872 Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
1873 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1874 TII.get(AArch64::SUBREG_TO_REG), Reg64)
1875 .addReg(ResultReg, getKillRegState(true))
1876 .addImm(AArch64::sub_32);
1877 ResultReg = Reg64;
1878 }
1879 return ResultReg;
1880}
1881
1882bool AArch64FastISel::selectAddSub(const Instruction *I) {
1883 MVT VT;
1884 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1885 return false;
1886
1887 if (VT.isVector())
1888 return selectOperator(I, I->getOpcode());
1889
1890 Register ResultReg;
1891 switch (I->getOpcode()) {
1892 default:
1893 llvm_unreachable("Unexpected instruction.");
1894 case Instruction::Add:
1895 ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1896 break;
1897 case Instruction::Sub:
1898 ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1899 break;
1900 }
1901 if (!ResultReg)
1902 return false;
1903
1904 updateValueMap(I, ResultReg);
1905 return true;
1906}
1907
1908bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1909 MVT VT;
1910 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1911 return false;
1912
1913 if (VT.isVector())
1914 return selectOperator(I, I->getOpcode());
1915
1916 Register ResultReg;
1917 switch (I->getOpcode()) {
1918 default:
1919 llvm_unreachable("Unexpected instruction.");
1920 case Instruction::And:
1921 ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1922 break;
1923 case Instruction::Or:
1924 ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1925 break;
1926 case Instruction::Xor:
1927 ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1928 break;
1929 }
1930 if (!ResultReg)
1931 return false;
1932
1933 updateValueMap(I, ResultReg);
1934 return true;
1935}
1936
1937bool AArch64FastISel::selectLoad(const Instruction *I) {
1938 MVT VT;
1939 // Verify we have a legal type before going any further. Currently, we handle
1940 // simple types that will directly fit in a register (i32/f32/i64/f64) or
1941 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1942 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1943 cast<LoadInst>(I)->isAtomic())
1944 return false;
1945
1946 const Value *SV = I->getOperand(0);
1947 if (TLI.supportSwiftError()) {
1948 // Swifterror values can come from either a function parameter with
1949 // swifterror attribute or an alloca with swifterror attribute.
1950 if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1951 if (Arg->hasSwiftErrorAttr())
1952 return false;
1953 }
1954
1955 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1956 if (Alloca->isSwiftError())
1957 return false;
1958 }
1959 }
1960
1961 // See if we can handle this address.
1962 Address Addr;
1963 if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1964 return false;
1965
1966 // Fold the following sign-/zero-extend into the load instruction.
1967 bool WantZExt = true;
1968 MVT RetVT = VT;
1969 const Value *IntExtVal = nullptr;
1970 if (I->hasOneUse()) {
1971 if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1972 if (isTypeSupported(ZE->getType(), RetVT))
1973 IntExtVal = ZE;
1974 else
1975 RetVT = VT;
1976 } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1977 if (isTypeSupported(SE->getType(), RetVT))
1978 IntExtVal = SE;
1979 else
1980 RetVT = VT;
1981 WantZExt = false;
1982 }
1983 }
1984
1985 Register ResultReg =
1986 emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1987 if (!ResultReg)
1988 return false;
1989
1990 // There are a few different cases we have to handle, because the load or the
1991 // sign-/zero-extend might not be selected by FastISel if we fall-back to
1992 // SelectionDAG. There is also an ordering issue when both instructions are in
1993 // different basic blocks.
1994 // 1.) The load instruction is selected by FastISel, but the integer extend
1995 // not. This usually happens when the integer extend is in a different
1996 // basic block and SelectionDAG took over for that basic block.
1997 // 2.) The load instruction is selected before the integer extend. This only
1998 // happens when the integer extend is in a different basic block.
1999 // 3.) The load instruction is selected by SelectionDAG and the integer extend
2000 // by FastISel. This happens if there are instructions between the load
2001 // and the integer extend that couldn't be selected by FastISel.
2002 if (IntExtVal) {
2003 // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2004 // could select it. Emit a copy to subreg if necessary. FastISel will remove
2005 // it when it selects the integer extend.
2006 Register Reg = lookUpRegForValue(IntExtVal);
2007 auto *MI = MRI.getUniqueVRegDef(Reg);
2008 if (!MI) {
2009 if (RetVT == MVT::i64 && VT <= MVT::i32) {
2010 if (WantZExt) {
2011 // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2012 MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
2013 ResultReg = std::prev(I)->getOperand(0).getReg();
2014 removeDeadCode(I, std::next(I));
2015 } else
2016 ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2017 AArch64::sub_32);
2018 }
2019 updateValueMap(I, ResultReg);
2020 return true;
2021 }
2022
2023 // The integer extend has already been emitted - delete all the instructions
2024 // that have been emitted by the integer extend lowering code and use the
2025 // result from the load instruction directly.
2026 while (MI) {
2027 Reg = 0;
2028 for (auto &Opnd : MI->uses()) {
2029 if (Opnd.isReg()) {
2030 Reg = Opnd.getReg();
2031 break;
2032 }
2033 }
2035 removeDeadCode(I, std::next(I));
2036 MI = nullptr;
2037 if (Reg)
2038 MI = MRI.getUniqueVRegDef(Reg);
2039 }
2040 updateValueMap(IntExtVal, ResultReg);
2041 return true;
2042 }
2043
2044 updateValueMap(I, ResultReg);
2045 return true;
2046}
2047
2048bool AArch64FastISel::emitStoreRelease(MVT VT, Register SrcReg,
2049 Register AddrReg,
2050 MachineMemOperand *MMO) {
2051 unsigned Opc;
2052 switch (VT.SimpleTy) {
2053 default: return false;
2054 case MVT::i8: Opc = AArch64::STLRB; break;
2055 case MVT::i16: Opc = AArch64::STLRH; break;
2056 case MVT::i32: Opc = AArch64::STLRW; break;
2057 case MVT::i64: Opc = AArch64::STLRX; break;
2058 }
2059
2060 const MCInstrDesc &II = TII.get(Opc);
2061 SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2062 AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2063 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2064 .addReg(SrcReg)
2065 .addReg(AddrReg)
2066 .addMemOperand(MMO);
2067 return true;
2068}
2069
2070bool AArch64FastISel::emitStore(MVT VT, Register SrcReg, Address Addr,
2071 MachineMemOperand *MMO) {
2072 if (!TLI.allowsMisalignedMemoryAccesses(VT))
2073 return false;
2074
2075 // Simplify this down to something we can handle.
2076 if (!simplifyAddress(Addr, VT))
2077 return false;
2078
2079 unsigned ScaleFactor = getImplicitScaleFactor(VT);
2080 if (!ScaleFactor)
2081 llvm_unreachable("Unexpected value type.");
2082
2083 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2084 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2085 bool UseScaled = true;
2086 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2087 UseScaled = false;
2088 ScaleFactor = 1;
2089 }
2090
2091 static const unsigned OpcTable[4][6] = {
2092 { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
2093 AArch64::STURSi, AArch64::STURDi },
2094 { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
2095 AArch64::STRSui, AArch64::STRDui },
2096 { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2097 AArch64::STRSroX, AArch64::STRDroX },
2098 { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2099 AArch64::STRSroW, AArch64::STRDroW }
2100 };
2101
2102 unsigned Opc;
2103 bool VTIsi1 = false;
2104 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2105 Addr.getOffsetReg();
2106 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2107 if (Addr.getExtendType() == AArch64_AM::UXTW ||
2108 Addr.getExtendType() == AArch64_AM::SXTW)
2109 Idx++;
2110
2111 switch (VT.SimpleTy) {
2112 default: llvm_unreachable("Unexpected value type.");
2113 case MVT::i1: VTIsi1 = true; [[fallthrough]];
2114 case MVT::i8: Opc = OpcTable[Idx][0]; break;
2115 case MVT::i16: Opc = OpcTable[Idx][1]; break;
2116 case MVT::i32: Opc = OpcTable[Idx][2]; break;
2117 case MVT::i64: Opc = OpcTable[Idx][3]; break;
2118 case MVT::f32: Opc = OpcTable[Idx][4]; break;
2119 case MVT::f64: Opc = OpcTable[Idx][5]; break;
2120 }
2121
2122 // Storing an i1 requires special handling.
2123 if (VTIsi1 && SrcReg != AArch64::WZR) {
2124 Register ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1);
2125 assert(ANDReg && "Unexpected AND instruction emission failure.");
2126 SrcReg = ANDReg;
2127 }
2128 // Create the base instruction, then add the operands.
2129 const MCInstrDesc &II = TII.get(Opc);
2130 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2131 MachineInstrBuilder MIB =
2132 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(SrcReg);
2133 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2134
2135 return true;
2136}
2137
2138bool AArch64FastISel::selectStore(const Instruction *I) {
2139 MVT VT;
2140 const Value *Op0 = I->getOperand(0);
2141 // Verify we have a legal type before going any further. Currently, we handle
2142 // simple types that will directly fit in a register (i32/f32/i64/f64) or
2143 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2144 if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2145 return false;
2146
2147 const Value *PtrV = I->getOperand(1);
2148 if (TLI.supportSwiftError()) {
2149 // Swifterror values can come from either a function parameter with
2150 // swifterror attribute or an alloca with swifterror attribute.
2151 if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2152 if (Arg->hasSwiftErrorAttr())
2153 return false;
2154 }
2155
2156 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2157 if (Alloca->isSwiftError())
2158 return false;
2159 }
2160 }
2161
2162 // Get the value to be stored into a register. Use the zero register directly
2163 // when possible to avoid an unnecessary copy and a wasted register.
2164 Register SrcReg;
2165 if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2166 if (CI->isZero())
2167 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2168 } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2169 if (CF->isZero() && !CF->isNegative()) {
2171 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2172 }
2173 }
2174
2175 if (!SrcReg)
2176 SrcReg = getRegForValue(Op0);
2177
2178 if (!SrcReg)
2179 return false;
2180
2181 auto *SI = cast<StoreInst>(I);
2182
2183 // Try to emit a STLR for seq_cst/release.
2184 if (SI->isAtomic()) {
2185 AtomicOrdering Ord = SI->getOrdering();
2186 // The non-atomic instructions are sufficient for relaxed stores.
2187 if (isReleaseOrStronger(Ord)) {
2188 // The STLR addressing mode only supports a base reg; pass that directly.
2189 Register AddrReg = getRegForValue(PtrV);
2190 if (!AddrReg)
2191 return false;
2192 return emitStoreRelease(VT, SrcReg, AddrReg,
2193 createMachineMemOperandFor(I));
2194 }
2195 }
2196
2197 // See if we can handle this address.
2198 Address Addr;
2199 if (!computeAddress(PtrV, Addr, Op0->getType()))
2200 return false;
2201
2202 if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2203 return false;
2204 return true;
2205}
2206
2208 switch (Pred) {
2209 case CmpInst::FCMP_ONE:
2210 case CmpInst::FCMP_UEQ:
2211 default:
2212 // AL is our "false" for now. The other two need more compares.
2213 return AArch64CC::AL;
2214 case CmpInst::ICMP_EQ:
2215 case CmpInst::FCMP_OEQ:
2216 return AArch64CC::EQ;
2217 case CmpInst::ICMP_SGT:
2218 case CmpInst::FCMP_OGT:
2219 return AArch64CC::GT;
2220 case CmpInst::ICMP_SGE:
2221 case CmpInst::FCMP_OGE:
2222 return AArch64CC::GE;
2223 case CmpInst::ICMP_UGT:
2224 case CmpInst::FCMP_UGT:
2225 return AArch64CC::HI;
2226 case CmpInst::FCMP_OLT:
2227 return AArch64CC::MI;
2228 case CmpInst::ICMP_ULE:
2229 case CmpInst::FCMP_OLE:
2230 return AArch64CC::LS;
2231 case CmpInst::FCMP_ORD:
2232 return AArch64CC::VC;
2233 case CmpInst::FCMP_UNO:
2234 return AArch64CC::VS;
2235 case CmpInst::FCMP_UGE:
2236 return AArch64CC::PL;
2237 case CmpInst::ICMP_SLT:
2238 case CmpInst::FCMP_ULT:
2239 return AArch64CC::LT;
2240 case CmpInst::ICMP_SLE:
2241 case CmpInst::FCMP_ULE:
2242 return AArch64CC::LE;
2243 case CmpInst::FCMP_UNE:
2244 case CmpInst::ICMP_NE:
2245 return AArch64CC::NE;
2246 case CmpInst::ICMP_UGE:
2247 return AArch64CC::HS;
2248 case CmpInst::ICMP_ULT:
2249 return AArch64CC::LO;
2250 }
2251}
2252
2253/// Try to emit a combined compare-and-branch instruction.
2254bool AArch64FastISel::emitCompareAndBranch(const CondBrInst *BI) {
2255 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2256 // will not be produced, as they are conditional branch instructions that do
2257 // not set flags.
2258 if (FuncInfo.MF->getFunction().hasFnAttribute(
2259 Attribute::SpeculativeLoadHardening))
2260 return false;
2261
2262 assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2263 const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2264 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2265
2266 const Value *LHS = CI->getOperand(0);
2267 const Value *RHS = CI->getOperand(1);
2268
2269 MVT VT;
2270 if (!isTypeSupported(LHS->getType(), VT))
2271 return false;
2272
2273 unsigned BW = VT.getSizeInBits();
2274 if (BW > 64)
2275 return false;
2276
2277 MachineBasicBlock *TBB = FuncInfo.getMBB(BI->getSuccessor(0));
2278 MachineBasicBlock *FBB = FuncInfo.getMBB(BI->getSuccessor(1));
2279
2280 // Try to take advantage of fallthrough opportunities.
2281 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2282 std::swap(TBB, FBB);
2284 }
2285
2286 int TestBit = -1;
2287 bool IsCmpNE;
2288 switch (Predicate) {
2289 default:
2290 return false;
2291 case CmpInst::ICMP_EQ:
2292 case CmpInst::ICMP_NE:
2293 if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2294 std::swap(LHS, RHS);
2295
2296 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2297 return false;
2298
2299 if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2300 if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2301 const Value *AndLHS = AI->getOperand(0);
2302 const Value *AndRHS = AI->getOperand(1);
2303
2304 if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2305 if (C->getValue().isPowerOf2())
2306 std::swap(AndLHS, AndRHS);
2307
2308 if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2309 if (C->getValue().isPowerOf2()) {
2310 TestBit = C->getValue().logBase2();
2311 LHS = AndLHS;
2312 }
2313 }
2314
2315 if (VT == MVT::i1)
2316 TestBit = 0;
2317
2318 IsCmpNE = Predicate == CmpInst::ICMP_NE;
2319 break;
2320 case CmpInst::ICMP_SLT:
2321 case CmpInst::ICMP_SGE:
2322 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2323 return false;
2324
2325 TestBit = BW - 1;
2326 IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2327 break;
2328 case CmpInst::ICMP_SGT:
2329 case CmpInst::ICMP_SLE:
2330 if (!isa<ConstantInt>(RHS))
2331 return false;
2332
2333 if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2334 return false;
2335
2336 TestBit = BW - 1;
2337 IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2338 break;
2339 } // end switch
2340
2341 static const unsigned OpcTable[2][2][2] = {
2342 { {AArch64::CBZW, AArch64::CBZX },
2343 {AArch64::CBNZW, AArch64::CBNZX} },
2344 { {AArch64::TBZW, AArch64::TBZX },
2345 {AArch64::TBNZW, AArch64::TBNZX} }
2346 };
2347
2348 bool IsBitTest = TestBit != -1;
2349 bool Is64Bit = BW == 64;
2350 if (TestBit < 32 && TestBit >= 0)
2351 Is64Bit = false;
2352
2353 unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2354 const MCInstrDesc &II = TII.get(Opc);
2355
2356 Register SrcReg = getRegForValue(LHS);
2357 if (!SrcReg)
2358 return false;
2359
2360 if (BW == 64 && !Is64Bit)
2361 SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32);
2362
2363 if ((BW < 32) && !IsBitTest)
2364 SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
2365
2366 // Emit the combined compare and branch instruction.
2367 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2368 MachineInstrBuilder MIB =
2369 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
2370 .addReg(SrcReg);
2371 if (IsBitTest)
2372 MIB.addImm(TestBit);
2373 MIB.addMBB(TBB);
2374
2375 finishCondBranch(BI->getParent(), TBB, FBB);
2376 return true;
2377}
2378
2379bool AArch64FastISel::selectBranch(const Instruction *I) {
2380 const CondBrInst *BI = cast<CondBrInst>(I);
2381
2382 MachineBasicBlock *TBB = FuncInfo.getMBB(BI->getSuccessor(0));
2383 MachineBasicBlock *FBB = FuncInfo.getMBB(BI->getSuccessor(1));
2384
2385 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2386 if (CI->hasOneUse() && isValueAvailable(CI)) {
2387 // Try to optimize or fold the cmp.
2388 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2389 switch (Predicate) {
2390 default:
2391 break;
2393 fastEmitBranch(FBB, MIMD.getDL());
2394 return true;
2395 case CmpInst::FCMP_TRUE:
2396 fastEmitBranch(TBB, MIMD.getDL());
2397 return true;
2398 }
2399
2400 // Try to emit a combined compare-and-branch first.
2401 if (emitCompareAndBranch(BI))
2402 return true;
2403
2404 // Try to take advantage of fallthrough opportunities.
2405 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2406 std::swap(TBB, FBB);
2408 }
2409
2410 // Emit the cmp.
2411 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2412 return false;
2413
2414 // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2415 // instruction.
2416 AArch64CC::CondCode CC = getCompareCC(Predicate);
2418 switch (Predicate) {
2419 default:
2420 break;
2421 case CmpInst::FCMP_UEQ:
2422 ExtraCC = AArch64CC::EQ;
2423 CC = AArch64CC::VS;
2424 break;
2425 case CmpInst::FCMP_ONE:
2426 ExtraCC = AArch64CC::MI;
2427 CC = AArch64CC::GT;
2428 break;
2429 }
2430 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2431
2432 // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2433 if (ExtraCC != AArch64CC::AL) {
2434 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2435 .addImm(ExtraCC)
2436 .addMBB(TBB);
2437 }
2438
2439 // Emit the branch.
2440 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2441 .addImm(CC)
2442 .addMBB(TBB);
2443
2444 finishCondBranch(BI->getParent(), TBB, FBB);
2445 return true;
2446 }
2447 } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2448 uint64_t Imm = CI->getZExtValue();
2449 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2450 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::B))
2451 .addMBB(Target);
2452
2453 // Obtain the branch probability and add the target to the successor list.
2454 if (FuncInfo.BPI) {
2455 auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2456 BI->getParent(), Target->getBasicBlock());
2457 FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2458 } else
2459 FuncInfo.MBB->addSuccessorWithoutProb(Target);
2460 return true;
2461 } else {
2463 if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2464 // Fake request the condition, otherwise the intrinsic might be completely
2465 // optimized away.
2466 Register CondReg = getRegForValue(BI->getCondition());
2467 if (!CondReg)
2468 return false;
2469
2470 // Emit the branch.
2471 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2472 .addImm(CC)
2473 .addMBB(TBB);
2474
2475 finishCondBranch(BI->getParent(), TBB, FBB);
2476 return true;
2477 }
2478 }
2479
2480 Register CondReg = getRegForValue(BI->getCondition());
2481 if (!CondReg)
2482 return false;
2483
2484 // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2485 unsigned Opcode = AArch64::TBNZW;
2486 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2487 std::swap(TBB, FBB);
2488 Opcode = AArch64::TBZW;
2489 }
2490
2491 const MCInstrDesc &II = TII.get(Opcode);
2492 Register ConstrainedCondReg
2493 = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2494 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2495 .addReg(ConstrainedCondReg)
2496 .addImm(0)
2497 .addMBB(TBB);
2498
2499 finishCondBranch(BI->getParent(), TBB, FBB);
2500 return true;
2501}
2502
2503bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2504 const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2505 Register AddrReg = getRegForValue(BI->getOperand(0));
2506 if (!AddrReg)
2507 return false;
2508
2509 // Authenticated indirectbr is not implemented yet.
2510 if (FuncInfo.MF->getFunction().hasFnAttribute("ptrauth-indirect-gotos"))
2511 return false;
2512
2513 // Emit the indirect branch.
2514 const MCInstrDesc &II = TII.get(AArch64::BR);
2515 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
2516 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(AddrReg);
2517
2518 // Make sure the CFG is up-to-date.
2519 for (const auto *Succ : BI->successors())
2520 FuncInfo.MBB->addSuccessor(FuncInfo.getMBB(Succ));
2521
2522 return true;
2523}
2524
2525bool AArch64FastISel::selectCmp(const Instruction *I) {
2526 const CmpInst *CI = cast<CmpInst>(I);
2527
2528 // Vectors of i1 are weird: bail out.
2529 if (CI->getType()->isVectorTy())
2530 return false;
2531
2532 // Try to optimize or fold the cmp.
2533 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2534 Register ResultReg;
2535 switch (Predicate) {
2536 default:
2537 break;
2539 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2540 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2541 TII.get(TargetOpcode::COPY), ResultReg)
2542 .addReg(AArch64::WZR, getKillRegState(true));
2543 break;
2544 case CmpInst::FCMP_TRUE:
2545 ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2546 break;
2547 }
2548
2549 if (ResultReg) {
2550 updateValueMap(I, ResultReg);
2551 return true;
2552 }
2553
2554 // Emit the cmp.
2555 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2556 return false;
2557
2558 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2559
2560 // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2561 // condition codes are inverted, because they are used by CSINC.
2562 static unsigned CondCodeTable[2][2] = {
2565 };
2566 unsigned *CondCodes = nullptr;
2567 switch (Predicate) {
2568 default:
2569 break;
2570 case CmpInst::FCMP_UEQ:
2571 CondCodes = &CondCodeTable[0][0];
2572 break;
2573 case CmpInst::FCMP_ONE:
2574 CondCodes = &CondCodeTable[1][0];
2575 break;
2576 }
2577
2578 if (CondCodes) {
2579 Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2580 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2581 TmpReg1)
2582 .addReg(AArch64::WZR, getKillRegState(true))
2583 .addReg(AArch64::WZR, getKillRegState(true))
2584 .addImm(CondCodes[0]);
2585 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2586 ResultReg)
2587 .addReg(TmpReg1, getKillRegState(true))
2588 .addReg(AArch64::WZR, getKillRegState(true))
2589 .addImm(CondCodes[1]);
2590
2591 updateValueMap(I, ResultReg);
2592 return true;
2593 }
2594
2595 // Now set a register based on the comparison.
2596 AArch64CC::CondCode CC = getCompareCC(Predicate);
2597 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2598 AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2599 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2600 ResultReg)
2601 .addReg(AArch64::WZR, getKillRegState(true))
2602 .addReg(AArch64::WZR, getKillRegState(true))
2603 .addImm(invertedCC);
2604
2605 updateValueMap(I, ResultReg);
2606 return true;
2607}
2608
2609/// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2610/// value.
2611bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2612 if (!SI->getType()->isIntegerTy(1))
2613 return false;
2614
2615 const Value *Src1Val, *Src2Val;
2616 unsigned Opc = 0;
2617 bool NeedExtraOp = false;
2618 if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2619 if (CI->isOne()) {
2620 Src1Val = SI->getCondition();
2621 Src2Val = SI->getFalseValue();
2622 Opc = AArch64::ORRWrr;
2623 } else {
2624 assert(CI->isZero());
2625 Src1Val = SI->getFalseValue();
2626 Src2Val = SI->getCondition();
2627 Opc = AArch64::BICWrr;
2628 }
2629 } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2630 if (CI->isOne()) {
2631 Src1Val = SI->getCondition();
2632 Src2Val = SI->getTrueValue();
2633 Opc = AArch64::ORRWrr;
2634 NeedExtraOp = true;
2635 } else {
2636 assert(CI->isZero());
2637 Src1Val = SI->getCondition();
2638 Src2Val = SI->getTrueValue();
2639 Opc = AArch64::ANDWrr;
2640 }
2641 }
2642
2643 if (!Opc)
2644 return false;
2645
2646 Register Src1Reg = getRegForValue(Src1Val);
2647 if (!Src1Reg)
2648 return false;
2649
2650 Register Src2Reg = getRegForValue(Src2Val);
2651 if (!Src2Reg)
2652 return false;
2653
2654 if (NeedExtraOp)
2655 Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1);
2656
2657 Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2658 Src2Reg);
2659 updateValueMap(SI, ResultReg);
2660 return true;
2661}
2662
2663bool AArch64FastISel::selectSelect(const Instruction *I) {
2664 assert(isa<SelectInst>(I) && "Expected a select instruction.");
2665 MVT VT;
2666 if (!isTypeSupported(I->getType(), VT))
2667 return false;
2668
2669 unsigned Opc;
2670 const TargetRegisterClass *RC;
2671 switch (VT.SimpleTy) {
2672 default:
2673 return false;
2674 case MVT::i1:
2675 case MVT::i8:
2676 case MVT::i16:
2677 case MVT::i32:
2678 Opc = AArch64::CSELWr;
2679 RC = &AArch64::GPR32RegClass;
2680 break;
2681 case MVT::i64:
2682 Opc = AArch64::CSELXr;
2683 RC = &AArch64::GPR64RegClass;
2684 break;
2685 case MVT::f32:
2686 Opc = AArch64::FCSELSrrr;
2687 RC = &AArch64::FPR32RegClass;
2688 break;
2689 case MVT::f64:
2690 Opc = AArch64::FCSELDrrr;
2691 RC = &AArch64::FPR64RegClass;
2692 break;
2693 }
2694
2695 const SelectInst *SI = cast<SelectInst>(I);
2696 const Value *Cond = SI->getCondition();
2699
2700 if (optimizeSelect(SI))
2701 return true;
2702
2703 // Try to pickup the flags, so we don't have to emit another compare.
2704 if (foldXALUIntrinsic(CC, I, Cond)) {
2705 // Fake request the condition to force emission of the XALU intrinsic.
2706 Register CondReg = getRegForValue(Cond);
2707 if (!CondReg)
2708 return false;
2709 } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2710 isValueAvailable(Cond)) {
2711 const auto *Cmp = cast<CmpInst>(Cond);
2712 // Try to optimize or fold the cmp.
2713 CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2714 const Value *FoldSelect = nullptr;
2715 switch (Predicate) {
2716 default:
2717 break;
2719 FoldSelect = SI->getFalseValue();
2720 break;
2721 case CmpInst::FCMP_TRUE:
2722 FoldSelect = SI->getTrueValue();
2723 break;
2724 }
2725
2726 if (FoldSelect) {
2727 Register SrcReg = getRegForValue(FoldSelect);
2728 if (!SrcReg)
2729 return false;
2730
2731 updateValueMap(I, SrcReg);
2732 return true;
2733 }
2734
2735 // Emit the cmp.
2736 if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2737 return false;
2738
2739 // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2740 CC = getCompareCC(Predicate);
2741 switch (Predicate) {
2742 default:
2743 break;
2744 case CmpInst::FCMP_UEQ:
2745 ExtraCC = AArch64CC::EQ;
2746 CC = AArch64CC::VS;
2747 break;
2748 case CmpInst::FCMP_ONE:
2749 ExtraCC = AArch64CC::MI;
2750 CC = AArch64CC::GT;
2751 break;
2752 }
2753 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2754 } else {
2755 Register CondReg = getRegForValue(Cond);
2756 if (!CondReg)
2757 return false;
2758
2759 const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2760 CondReg = constrainOperandRegClass(II, CondReg, 1);
2761
2762 // Emit a TST instruction (ANDS wzr, reg, #imm).
2763 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II,
2764 AArch64::WZR)
2765 .addReg(CondReg)
2767 }
2768
2769 Register Src1Reg = getRegForValue(SI->getTrueValue());
2770 Register Src2Reg = getRegForValue(SI->getFalseValue());
2771
2772 if (!Src1Reg || !Src2Reg)
2773 return false;
2774
2775 if (ExtraCC != AArch64CC::AL)
2776 Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC);
2777
2778 Register ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC);
2779 updateValueMap(I, ResultReg);
2780 return true;
2781}
2782
2783bool AArch64FastISel::selectFPExt(const Instruction *I) {
2784 Value *V = I->getOperand(0);
2785 if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2786 return false;
2787
2788 Register Op = getRegForValue(V);
2789 if (Op == 0)
2790 return false;
2791
2792 Register ResultReg = createResultReg(&AArch64::FPR64RegClass);
2793 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTDSr),
2794 ResultReg).addReg(Op);
2795 updateValueMap(I, ResultReg);
2796 return true;
2797}
2798
2799bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2800 Value *V = I->getOperand(0);
2801 if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2802 return false;
2803
2804 Register Op = getRegForValue(V);
2805 if (Op == 0)
2806 return false;
2807
2808 Register ResultReg = createResultReg(&AArch64::FPR32RegClass);
2809 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTSDr),
2810 ResultReg).addReg(Op);
2811 updateValueMap(I, ResultReg);
2812 return true;
2813}
2814
2815// FPToUI and FPToSI
2816bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2817 MVT DestVT;
2818 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2819 return false;
2820
2821 Register SrcReg = getRegForValue(I->getOperand(0));
2822 if (!SrcReg)
2823 return false;
2824
2825 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2826 if (SrcVT == MVT::f128 || SrcVT == MVT::f16 || SrcVT == MVT::bf16)
2827 return false;
2828
2829 unsigned Opc;
2830 if (SrcVT == MVT::f64) {
2831 if (Signed)
2832 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2833 else
2834 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2835 } else {
2836 if (Signed)
2837 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2838 else
2839 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2840 }
2841 Register ResultReg = createResultReg(
2842 DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2843 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
2844 .addReg(SrcReg);
2845 updateValueMap(I, ResultReg);
2846 return true;
2847}
2848
2849bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2850 MVT DestVT;
2851 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2852 return false;
2853 // Let regular ISEL handle FP16
2854 if (DestVT == MVT::f16 || DestVT == MVT::bf16)
2855 return false;
2856
2857 assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2858 "Unexpected value type.");
2859
2860 Register SrcReg = getRegForValue(I->getOperand(0));
2861 if (!SrcReg)
2862 return false;
2863
2864 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2865
2866 // Handle sign-extension.
2867 if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2868 SrcReg =
2869 emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2870 if (!SrcReg)
2871 return false;
2872 }
2873
2874 unsigned Opc;
2875 if (SrcVT == MVT::i64) {
2876 if (Signed)
2877 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2878 else
2879 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2880 } else {
2881 if (Signed)
2882 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2883 else
2884 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2885 }
2886
2887 Register ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg);
2888 updateValueMap(I, ResultReg);
2889 return true;
2890}
2891
2892bool AArch64FastISel::fastLowerArguments() {
2893 if (!FuncInfo.CanLowerReturn)
2894 return false;
2895
2896 const Function *F = FuncInfo.Fn;
2897 if (F->isVarArg())
2898 return false;
2899
2900 CallingConv::ID CC = F->getCallingConv();
2901 if (CC != CallingConv::C && CC != CallingConv::Swift)
2902 return false;
2903
2904 if (Subtarget->hasCustomCallingConv())
2905 return false;
2906
2907 // Only handle simple cases of up to 8 GPR and FPR each.
2908 unsigned GPRCnt = 0;
2909 unsigned FPRCnt = 0;
2910 for (auto const &Arg : F->args()) {
2911 if (Arg.hasAttribute(Attribute::ByVal) ||
2912 Arg.hasAttribute(Attribute::InReg) ||
2913 Arg.hasAttribute(Attribute::StructRet) ||
2914 Arg.hasAttribute(Attribute::SwiftSelf) ||
2915 Arg.hasAttribute(Attribute::SwiftAsync) ||
2916 Arg.hasAttribute(Attribute::SwiftError) ||
2917 Arg.hasAttribute(Attribute::Nest))
2918 return false;
2919
2920 Type *ArgTy = Arg.getType();
2921 if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2922 return false;
2923
2924 EVT ArgVT = TLI.getValueType(DL, ArgTy);
2925 if (!ArgVT.isSimple())
2926 return false;
2927
2928 MVT VT = ArgVT.getSimpleVT().SimpleTy;
2929 if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2930 return false;
2931
2932 if (VT.isVector() &&
2933 (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2934 return false;
2935
2936 if (VT >= MVT::i1 && VT <= MVT::i64)
2937 ++GPRCnt;
2938 else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2939 VT.is128BitVector())
2940 ++FPRCnt;
2941 else
2942 return false;
2943
2944 if (GPRCnt > 8 || FPRCnt > 8)
2945 return false;
2946 }
2947
2948 static const MCPhysReg Registers[6][8] = {
2949 { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2950 AArch64::W5, AArch64::W6, AArch64::W7 },
2951 { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2952 AArch64::X5, AArch64::X6, AArch64::X7 },
2953 { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2954 AArch64::H5, AArch64::H6, AArch64::H7 },
2955 { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2956 AArch64::S5, AArch64::S6, AArch64::S7 },
2957 { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2958 AArch64::D5, AArch64::D6, AArch64::D7 },
2959 { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2960 AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2961 };
2962
2963 unsigned GPRIdx = 0;
2964 unsigned FPRIdx = 0;
2965 for (auto const &Arg : F->args()) {
2966 MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2967 unsigned SrcReg;
2968 const TargetRegisterClass *RC;
2969 if (VT >= MVT::i1 && VT <= MVT::i32) {
2970 SrcReg = Registers[0][GPRIdx++];
2971 RC = &AArch64::GPR32RegClass;
2972 VT = MVT::i32;
2973 } else if (VT == MVT::i64) {
2974 SrcReg = Registers[1][GPRIdx++];
2975 RC = &AArch64::GPR64RegClass;
2976 } else if (VT == MVT::f16 || VT == MVT::bf16) {
2977 SrcReg = Registers[2][FPRIdx++];
2978 RC = &AArch64::FPR16RegClass;
2979 } else if (VT == MVT::f32) {
2980 SrcReg = Registers[3][FPRIdx++];
2981 RC = &AArch64::FPR32RegClass;
2982 } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2983 SrcReg = Registers[4][FPRIdx++];
2984 RC = &AArch64::FPR64RegClass;
2985 } else if (VT.is128BitVector()) {
2986 SrcReg = Registers[5][FPRIdx++];
2987 RC = &AArch64::FPR128RegClass;
2988 } else
2989 llvm_unreachable("Unexpected value type.");
2990
2991 Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
2992 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
2993 // Without this, EmitLiveInCopies may eliminate the livein if its only
2994 // use is a bitcast (which isn't turned into an instruction).
2995 Register ResultReg = createResultReg(RC);
2996 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2997 TII.get(TargetOpcode::COPY), ResultReg)
2998 .addReg(DstReg, getKillRegState(true));
2999 updateValueMap(&Arg, ResultReg);
3000 }
3001 return true;
3002}
3003
3004bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3005 SmallVectorImpl<MVT> &OutVTs,
3006 SmallVectorImpl<Type *> &OrigTys,
3007 unsigned &NumBytes) {
3008 CallingConv::ID CC = CLI.CallConv;
3010 CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3011 CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, OrigTys,
3012 CCAssignFnForCall(CC));
3013
3014 // Get a count of how many bytes are to be pushed on the stack.
3015 NumBytes = CCInfo.getStackSize();
3016
3017 // Issue CALLSEQ_START
3018 unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3019 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackDown))
3020 .addImm(NumBytes).addImm(0);
3021
3022 // Process the args.
3023 for (CCValAssign &VA : ArgLocs) {
3024 const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3025 MVT ArgVT = OutVTs[VA.getValNo()];
3026
3027 Register ArgReg = getRegForValue(ArgVal);
3028 if (!ArgReg)
3029 return false;
3030
3031 // Handle arg promotion: SExt, ZExt, AExt.
3032 switch (VA.getLocInfo()) {
3033 case CCValAssign::Full:
3034 break;
3035 case CCValAssign::SExt: {
3036 MVT DestVT = VA.getLocVT();
3037 MVT SrcVT = ArgVT;
3038 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3039 if (!ArgReg)
3040 return false;
3041 break;
3042 }
3043 case CCValAssign::AExt:
3044 // Intentional fall-through.
3045 case CCValAssign::ZExt: {
3046 MVT DestVT = VA.getLocVT();
3047 MVT SrcVT = ArgVT;
3048 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3049 if (!ArgReg)
3050 return false;
3051 break;
3052 }
3053 default:
3054 llvm_unreachable("Unknown arg promotion!");
3055 }
3056
3057 // Now copy/store arg to correct locations.
3058 if (VA.isRegLoc() && !VA.needsCustom()) {
3059 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3060 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3061 CLI.OutRegs.push_back(VA.getLocReg());
3062 } else if (VA.needsCustom()) {
3063 // FIXME: Handle custom args.
3064 return false;
3065 } else {
3066 assert(VA.isMemLoc() && "Assuming store on stack.");
3067
3068 // Don't emit stores for undef values.
3069 if (isa<UndefValue>(ArgVal))
3070 continue;
3071
3072 // Need to store on the stack.
3073 unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3074
3075 unsigned BEAlign = 0;
3076 if (ArgSize < 8 && !Subtarget->isLittleEndian())
3077 BEAlign = 8 - ArgSize;
3078
3079 Address Addr;
3080 Addr.setKind(Address::RegBase);
3081 Addr.setReg(AArch64::SP);
3082 Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3083
3084 Align Alignment = DL.getABITypeAlign(ArgVal->getType());
3085 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3086 MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3087 MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3088
3089 if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3090 return false;
3091 }
3092 }
3093 return true;
3094}
3095
3096bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, unsigned NumBytes) {
3097 CallingConv::ID CC = CLI.CallConv;
3098
3099 // Issue CALLSEQ_END
3100 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3101 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp))
3102 .addImm(NumBytes).addImm(0);
3103
3104 // Now the return values.
3106 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3107 CCInfo.AnalyzeCallResult(CLI.Ins, CCAssignFnForCall(CC));
3108
3109 Register ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
3110 for (unsigned i = 0; i != RVLocs.size(); ++i) {
3111 CCValAssign &VA = RVLocs[i];
3112 MVT CopyVT = VA.getValVT();
3113 Register CopyReg = ResultReg + i;
3114
3115 // TODO: Handle big-endian results
3116 if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3117 return false;
3118
3119 // Copy result out of their specified physreg.
3120 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
3121 CopyReg)
3122 .addReg(VA.getLocReg());
3123 CLI.InRegs.push_back(VA.getLocReg());
3124 }
3125
3126 CLI.ResultReg = ResultReg;
3127 CLI.NumResultRegs = RVLocs.size();
3128
3129 return true;
3130}
3131
3132bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3133 CallingConv::ID CC = CLI.CallConv;
3134 bool IsTailCall = CLI.IsTailCall;
3135 bool IsVarArg = CLI.IsVarArg;
3136 const Value *Callee = CLI.Callee;
3137 MCSymbol *Symbol = CLI.Symbol;
3138
3139 if (!Callee && !Symbol)
3140 return false;
3141
3142 // Allow SelectionDAG isel to handle calls to functions like setjmp that need
3143 // a bti instruction following the call.
3144 if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
3145 !Subtarget->noBTIAtReturnTwice() &&
3146 MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
3147 return false;
3148
3149 // Allow SelectionDAG isel to handle indirect calls with KCFI checks.
3150 if (CLI.CB && CLI.CB->isIndirectCall() &&
3151 CLI.CB->getOperandBundle(LLVMContext::OB_kcfi))
3152 return false;
3153
3154 // Allow SelectionDAG isel to handle tail calls.
3155 if (IsTailCall)
3156 return false;
3157
3158 // FIXME: we could and should support this, but for now correctness at -O0 is
3159 // more important.
3160 if (Subtarget->isTargetILP32())
3161 return false;
3162
3163 CodeModel::Model CM = TM.getCodeModel();
3164 // Only support the small-addressing and large code models.
3165 if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3166 return false;
3167
3168 // FIXME: Add large code model support for ELF.
3169 if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3170 return false;
3171
3172 // ELF -fno-plt compiled intrinsic calls do not have the nonlazybind
3173 // attribute. Check "RtLibUseGOT" instead.
3174 if (MF->getFunction().getParent()->getRtLibUseGOT())
3175 return false;
3176
3177 // Let SDISel handle vararg functions.
3178 if (IsVarArg)
3179 return false;
3180
3181 if (Subtarget->isWindowsArm64EC())
3182 return false;
3183
3184 for (auto Flag : CLI.OutFlags)
3185 if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3186 Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError())
3187 return false;
3188
3189 // Set up the argument vectors.
3190 SmallVector<MVT, 16> OutVTs;
3192 OutVTs.reserve(CLI.OutVals.size());
3193
3194 for (auto *Val : CLI.OutVals) {
3195 MVT VT;
3196 if (!isTypeLegal(Val->getType(), VT) &&
3197 !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3198 return false;
3199
3200 // We don't handle vector parameters yet.
3201 if (VT.isVector() || VT.getSizeInBits() > 64)
3202 return false;
3203
3204 OutVTs.push_back(VT);
3205 OrigTys.push_back(Val->getType());
3206 }
3207
3208 Address Addr;
3209 if (Callee && !computeCallAddress(Callee, Addr))
3210 return false;
3211
3212 // The weak function target may be zero; in that case we must use indirect
3213 // addressing via a stub on windows as it may be out of range for a
3214 // PC-relative jump.
3215 if (Subtarget->isTargetWindows() && Addr.getGlobalValue() &&
3216 Addr.getGlobalValue()->hasExternalWeakLinkage())
3217 return false;
3218
3219 // Handle the arguments now that we've gotten them.
3220 unsigned NumBytes;
3221 if (!processCallArgs(CLI, OutVTs, OrigTys, NumBytes))
3222 return false;
3223
3224 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3225 if (RegInfo->isAnyArgRegReserved(*MF))
3226 RegInfo->emitReservedArgRegCallError(*MF);
3227
3228 // Issue the call.
3229 MachineInstrBuilder MIB;
3230 if (Subtarget->useSmallAddressing()) {
3231 const MCInstrDesc &II =
3232 TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL);
3233 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II);
3234 if (Symbol)
3235 MIB.addSym(Symbol, 0);
3236 else if (Addr.getGlobalValue())
3237 MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3238 else if (Addr.getReg()) {
3239 Register Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3240 MIB.addReg(Reg);
3241 } else
3242 return false;
3243 } else {
3244 Register CallReg;
3245 if (Symbol) {
3246 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3247 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
3248 ADRPReg)
3250
3251 CallReg = createResultReg(&AArch64::GPR64RegClass);
3252 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3253 TII.get(AArch64::LDRXui), CallReg)
3254 .addReg(ADRPReg)
3255 .addSym(Symbol,
3257 } else if (Addr.getGlobalValue())
3258 CallReg = materializeGV(Addr.getGlobalValue());
3259 else if (Addr.getReg())
3260 CallReg = Addr.getReg();
3261
3262 if (!CallReg)
3263 return false;
3264
3265 const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF));
3266 CallReg = constrainOperandRegClass(II, CallReg, 0);
3267 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(CallReg);
3268 }
3269
3270 // Add implicit physical register uses to the call.
3271 for (auto Reg : CLI.OutRegs)
3272 MIB.addReg(Reg, RegState::Implicit);
3273
3274 // Add a register mask with the call-preserved registers.
3275 // Proper defs for return values will be added by setPhysRegsDeadExcept().
3276 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3277
3278 CLI.Call = MIB;
3279
3280 // Finish off the call including any return values.
3281 return finishCall(CLI, NumBytes);
3282}
3283
3284bool AArch64FastISel::isMemCpySmall(uint64_t Len, MaybeAlign Alignment) {
3285 if (Alignment)
3286 return Len / Alignment->value() <= 4;
3287 else
3288 return Len < 32;
3289}
3290
3291bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3292 uint64_t Len, MaybeAlign Alignment) {
3293 // Make sure we don't bloat code by inlining very large memcpy's.
3294 if (!isMemCpySmall(Len, Alignment))
3295 return false;
3296
3297 int64_t UnscaledOffset = 0;
3298 Address OrigDest = Dest;
3299 Address OrigSrc = Src;
3300
3301 while (Len) {
3302 MVT VT;
3303 if (!Alignment || *Alignment >= 8) {
3304 if (Len >= 8)
3305 VT = MVT::i64;
3306 else if (Len >= 4)
3307 VT = MVT::i32;
3308 else if (Len >= 2)
3309 VT = MVT::i16;
3310 else {
3311 VT = MVT::i8;
3312 }
3313 } else {
3314 assert(Alignment && "Alignment is set in this branch");
3315 // Bound based on alignment.
3316 if (Len >= 4 && *Alignment == 4)
3317 VT = MVT::i32;
3318 else if (Len >= 2 && *Alignment == 2)
3319 VT = MVT::i16;
3320 else {
3321 VT = MVT::i8;
3322 }
3323 }
3324
3325 Register ResultReg = emitLoad(VT, VT, Src);
3326 if (!ResultReg)
3327 return false;
3328
3329 if (!emitStore(VT, ResultReg, Dest))
3330 return false;
3331
3332 int64_t Size = VT.getSizeInBits() / 8;
3333 Len -= Size;
3334 UnscaledOffset += Size;
3335
3336 // We need to recompute the unscaled offset for each iteration.
3337 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3338 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3339 }
3340
3341 return true;
3342}
3343
3344/// Check if it is possible to fold the condition from the XALU intrinsic
3345/// into the user. The condition code will only be updated on success.
3346bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3347 const Instruction *I,
3348 const Value *Cond) {
3350 return false;
3351
3352 const auto *EV = cast<ExtractValueInst>(Cond);
3353 if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3354 return false;
3355
3356 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3357 MVT RetVT;
3358 const Function *Callee = II->getCalledFunction();
3359 Type *RetTy =
3360 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3361 if (!isTypeLegal(RetTy, RetVT))
3362 return false;
3363
3364 if (RetVT != MVT::i32 && RetVT != MVT::i64)
3365 return false;
3366
3367 const Value *LHS = II->getArgOperand(0);
3368 const Value *RHS = II->getArgOperand(1);
3369
3370 // Canonicalize immediate to the RHS.
3371 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3372 std::swap(LHS, RHS);
3373
3374 // Simplify multiplies.
3375 Intrinsic::ID IID = II->getIntrinsicID();
3376 switch (IID) {
3377 default:
3378 break;
3379 case Intrinsic::smul_with_overflow:
3380 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3381 if (C->getValue() == 2)
3382 IID = Intrinsic::sadd_with_overflow;
3383 break;
3384 case Intrinsic::umul_with_overflow:
3385 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3386 if (C->getValue() == 2)
3387 IID = Intrinsic::uadd_with_overflow;
3388 break;
3389 }
3390
3391 AArch64CC::CondCode TmpCC;
3392 switch (IID) {
3393 default:
3394 return false;
3395 case Intrinsic::sadd_with_overflow:
3396 case Intrinsic::ssub_with_overflow:
3397 TmpCC = AArch64CC::VS;
3398 break;
3399 case Intrinsic::uadd_with_overflow:
3400 TmpCC = AArch64CC::HS;
3401 break;
3402 case Intrinsic::usub_with_overflow:
3403 TmpCC = AArch64CC::LO;
3404 break;
3405 case Intrinsic::smul_with_overflow:
3406 case Intrinsic::umul_with_overflow:
3407 TmpCC = AArch64CC::NE;
3408 break;
3409 }
3410
3411 // Check if both instructions are in the same basic block.
3412 if (!isValueAvailable(II))
3413 return false;
3414
3415 // Make sure nothing is in the way
3418 for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3419 // We only expect extractvalue instructions between the intrinsic and the
3420 // instruction to be selected.
3421 if (!isa<ExtractValueInst>(Itr))
3422 return false;
3423
3424 // Check that the extractvalue operand comes from the intrinsic.
3425 const auto *EVI = cast<ExtractValueInst>(Itr);
3426 if (EVI->getAggregateOperand() != II)
3427 return false;
3428 }
3429
3430 CC = TmpCC;
3431 return true;
3432}
3433
3434bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3435 // FIXME: Handle more intrinsics.
3436 switch (II->getIntrinsicID()) {
3437 default: return false;
3438 case Intrinsic::frameaddress: {
3439 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3440 MFI.setFrameAddressIsTaken(true);
3441
3442 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3443 Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3444 Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3445 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3446 TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3447 // Recursively load frame address
3448 // ldr x0, [fp]
3449 // ldr x0, [x0]
3450 // ldr x0, [x0]
3451 // ...
3452 Register DestReg;
3453 unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3454 while (Depth--) {
3455 DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3456 SrcReg, 0);
3457 assert(DestReg && "Unexpected LDR instruction emission failure.");
3458 SrcReg = DestReg;
3459 }
3460
3461 updateValueMap(II, SrcReg);
3462 return true;
3463 }
3464 case Intrinsic::sponentry: {
3465 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3466
3467 // SP = FP + Fixed Object + 16
3468 int FI = MFI.CreateFixedObject(4, 0, false);
3469 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3470 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3471 TII.get(AArch64::ADDXri), ResultReg)
3472 .addFrameIndex(FI)
3473 .addImm(0)
3474 .addImm(0);
3475
3476 updateValueMap(II, ResultReg);
3477 return true;
3478 }
3479 case Intrinsic::memcpy:
3480 case Intrinsic::memmove: {
3481 const auto *MTI = cast<MemTransferInst>(II);
3482 // Don't handle volatile.
3483 if (MTI->isVolatile())
3484 return false;
3485
3486 // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
3487 // we would emit dead code because we don't currently handle memmoves.
3488 bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3489 if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3490 // Small memcpy's are common enough that we want to do them without a call
3491 // if possible.
3492 uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3493 MaybeAlign Alignment;
3494 if (MTI->getDestAlign() || MTI->getSourceAlign())
3495 Alignment = std::min(MTI->getDestAlign().valueOrOne(),
3496 MTI->getSourceAlign().valueOrOne());
3497 if (isMemCpySmall(Len, Alignment)) {
3498 Address Dest, Src;
3499 if (!computeAddress(MTI->getRawDest(), Dest) ||
3500 !computeAddress(MTI->getRawSource(), Src))
3501 return false;
3502 if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3503 return true;
3504 }
3505 }
3506
3507 if (!MTI->getLength()->getType()->isIntegerTy(64))
3508 return false;
3509
3510 if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3511 // Fast instruction selection doesn't support the special
3512 // address spaces.
3513 return false;
3514
3515 const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3516 return lowerCallTo(II, IntrMemName, II->arg_size() - 1);
3517 }
3518 case Intrinsic::memset: {
3519 const MemSetInst *MSI = cast<MemSetInst>(II);
3520 // Don't handle volatile.
3521 if (MSI->isVolatile())
3522 return false;
3523
3524 if (!MSI->getLength()->getType()->isIntegerTy(64))
3525 return false;
3526
3527 if (MSI->getDestAddressSpace() > 255)
3528 // Fast instruction selection doesn't support the special
3529 // address spaces.
3530 return false;
3531
3532 return lowerCallTo(II, "memset", II->arg_size() - 1);
3533 }
3534 case Intrinsic::sin:
3535 case Intrinsic::cos:
3536 case Intrinsic::tan:
3537 case Intrinsic::pow: {
3538 MVT RetVT;
3539 if (!isTypeLegal(II->getType(), RetVT))
3540 return false;
3541
3542 if (RetVT != MVT::f32 && RetVT != MVT::f64)
3543 return false;
3544
3545 static const RTLIB::Libcall LibCallTable[4][2] = {
3546 {RTLIB::SIN_F32, RTLIB::SIN_F64},
3547 {RTLIB::COS_F32, RTLIB::COS_F64},
3548 {RTLIB::TAN_F32, RTLIB::TAN_F64},
3549 {RTLIB::POW_F32, RTLIB::POW_F64}};
3550 RTLIB::Libcall LC;
3551 bool Is64Bit = RetVT == MVT::f64;
3552 switch (II->getIntrinsicID()) {
3553 default:
3554 llvm_unreachable("Unexpected intrinsic.");
3555 case Intrinsic::sin:
3556 LC = LibCallTable[0][Is64Bit];
3557 break;
3558 case Intrinsic::cos:
3559 LC = LibCallTable[1][Is64Bit];
3560 break;
3561 case Intrinsic::tan:
3562 LC = LibCallTable[2][Is64Bit];
3563 break;
3564 case Intrinsic::pow:
3565 LC = LibCallTable[3][Is64Bit];
3566 break;
3567 }
3568
3569 ArgListTy Args;
3570 Args.reserve(II->arg_size());
3571
3572 // Populate the argument list.
3573 for (auto &Arg : II->args())
3574 Args.emplace_back(Arg);
3575
3576 CallLoweringInfo CLI;
3577 MCContext &Ctx = MF->getContext();
3578
3579 RTLIB::LibcallImpl LCImpl = LibcallLowering->getLibcallImpl(LC);
3580 if (LCImpl == RTLIB::Unsupported)
3581 return false;
3582
3583 CallingConv::ID CC = LibcallLowering->getLibcallImplCallingConv(LCImpl);
3584 StringRef FuncName = RTLIB::RuntimeLibcallsInfo::getLibcallImplName(LCImpl);
3585 CLI.setCallee(DL, Ctx, CC, II->getType(), FuncName, std::move(Args));
3586 if (!lowerCallTo(CLI))
3587 return false;
3588 updateValueMap(II, CLI.ResultReg);
3589 return true;
3590 }
3591 case Intrinsic::fabs: {
3592 MVT VT;
3593 if (!isTypeLegal(II->getType(), VT))
3594 return false;
3595
3596 unsigned Opc;
3597 switch (VT.SimpleTy) {
3598 default:
3599 return false;
3600 case MVT::f32:
3601 Opc = AArch64::FABSSr;
3602 break;
3603 case MVT::f64:
3604 Opc = AArch64::FABSDr;
3605 break;
3606 }
3607 Register SrcReg = getRegForValue(II->getOperand(0));
3608 if (!SrcReg)
3609 return false;
3610 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3611 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
3612 .addReg(SrcReg);
3613 updateValueMap(II, ResultReg);
3614 return true;
3615 }
3616 case Intrinsic::trap:
3617 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3618 .addImm(1);
3619 return true;
3620 case Intrinsic::debugtrap:
3621 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3622 .addImm(0xF000);
3623 return true;
3624
3625 case Intrinsic::sqrt: {
3626 Type *RetTy = II->getCalledFunction()->getReturnType();
3627
3628 MVT VT;
3629 if (!isTypeLegal(RetTy, VT))
3630 return false;
3631
3632 Register Op0Reg = getRegForValue(II->getOperand(0));
3633 if (!Op0Reg)
3634 return false;
3635
3636 Register ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg);
3637 if (!ResultReg)
3638 return false;
3639
3640 updateValueMap(II, ResultReg);
3641 return true;
3642 }
3643 case Intrinsic::sadd_with_overflow:
3644 case Intrinsic::uadd_with_overflow:
3645 case Intrinsic::ssub_with_overflow:
3646 case Intrinsic::usub_with_overflow:
3647 case Intrinsic::smul_with_overflow:
3648 case Intrinsic::umul_with_overflow: {
3649 // This implements the basic lowering of the xalu with overflow intrinsics.
3650 const Function *Callee = II->getCalledFunction();
3651 auto *Ty = cast<StructType>(Callee->getReturnType());
3652 Type *RetTy = Ty->getTypeAtIndex(0U);
3653
3654 MVT VT;
3655 if (!isTypeLegal(RetTy, VT))
3656 return false;
3657
3658 if (VT != MVT::i32 && VT != MVT::i64)
3659 return false;
3660
3661 const Value *LHS = II->getArgOperand(0);
3662 const Value *RHS = II->getArgOperand(1);
3663 // Canonicalize immediate to the RHS.
3664 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3665 std::swap(LHS, RHS);
3666
3667 // Simplify multiplies.
3668 Intrinsic::ID IID = II->getIntrinsicID();
3669 switch (IID) {
3670 default:
3671 break;
3672 case Intrinsic::smul_with_overflow:
3673 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3674 if (C->getValue() == 2) {
3675 IID = Intrinsic::sadd_with_overflow;
3676 RHS = LHS;
3677 }
3678 break;
3679 case Intrinsic::umul_with_overflow:
3680 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3681 if (C->getValue() == 2) {
3682 IID = Intrinsic::uadd_with_overflow;
3683 RHS = LHS;
3684 }
3685 break;
3686 }
3687
3688 Register ResultReg1, ResultReg2, MulReg;
3690 switch (IID) {
3691 default: llvm_unreachable("Unexpected intrinsic!");
3692 case Intrinsic::sadd_with_overflow:
3693 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3694 CC = AArch64CC::VS;
3695 break;
3696 case Intrinsic::uadd_with_overflow:
3697 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3698 CC = AArch64CC::HS;
3699 break;
3700 case Intrinsic::ssub_with_overflow:
3701 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3702 CC = AArch64CC::VS;
3703 break;
3704 case Intrinsic::usub_with_overflow:
3705 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3706 CC = AArch64CC::LO;
3707 break;
3708 case Intrinsic::smul_with_overflow: {
3709 CC = AArch64CC::NE;
3710 Register LHSReg = getRegForValue(LHS);
3711 if (!LHSReg)
3712 return false;
3713
3714 Register RHSReg = getRegForValue(RHS);
3715 if (!RHSReg)
3716 return false;
3717
3718 if (VT == MVT::i32) {
3719 MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg);
3720 Register MulSubReg =
3721 fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3722 // cmp xreg, wreg, sxtw
3723 emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg,
3724 AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true,
3725 /*WantResult=*/false);
3726 MulReg = MulSubReg;
3727 } else {
3728 assert(VT == MVT::i64 && "Unexpected value type.");
3729 // LHSReg and RHSReg cannot be killed by this Mul, since they are
3730 // reused in the next instruction.
3731 MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3732 Register SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg);
3733 emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63,
3734 /*WantResult=*/false);
3735 }
3736 break;
3737 }
3738 case Intrinsic::umul_with_overflow: {
3739 CC = AArch64CC::NE;
3740 Register LHSReg = getRegForValue(LHS);
3741 if (!LHSReg)
3742 return false;
3743
3744 Register RHSReg = getRegForValue(RHS);
3745 if (!RHSReg)
3746 return false;
3747
3748 if (VT == MVT::i32) {
3749 MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg);
3750 // tst xreg, #0xffffffff00000000
3751 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3752 TII.get(AArch64::ANDSXri), AArch64::XZR)
3753 .addReg(MulReg)
3754 .addImm(AArch64_AM::encodeLogicalImmediate(0xFFFFFFFF00000000, 64));
3755 MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3756 } else {
3757 assert(VT == MVT::i64 && "Unexpected value type.");
3758 // LHSReg and RHSReg cannot be killed by this Mul, since they are
3759 // reused in the next instruction.
3760 MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3761 Register UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg);
3762 emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false);
3763 }
3764 break;
3765 }
3766 }
3767
3768 if (MulReg) {
3769 ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3770 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3771 TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3772 }
3773
3774 if (!ResultReg1)
3775 return false;
3776
3777 ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3778 AArch64::WZR, AArch64::WZR,
3779 getInvertedCondCode(CC));
3780 (void)ResultReg2;
3781 assert((ResultReg1 + 1) == ResultReg2 &&
3782 "Nonconsecutive result registers.");
3783 updateValueMap(II, ResultReg1, 2);
3784 return true;
3785 }
3786 case Intrinsic::aarch64_crc32b:
3787 case Intrinsic::aarch64_crc32h:
3788 case Intrinsic::aarch64_crc32w:
3789 case Intrinsic::aarch64_crc32x:
3790 case Intrinsic::aarch64_crc32cb:
3791 case Intrinsic::aarch64_crc32ch:
3792 case Intrinsic::aarch64_crc32cw:
3793 case Intrinsic::aarch64_crc32cx: {
3794 if (!Subtarget->hasCRC())
3795 return false;
3796
3797 unsigned Opc;
3798 switch (II->getIntrinsicID()) {
3799 default:
3800 llvm_unreachable("Unexpected intrinsic!");
3801 case Intrinsic::aarch64_crc32b:
3802 Opc = AArch64::CRC32Brr;
3803 break;
3804 case Intrinsic::aarch64_crc32h:
3805 Opc = AArch64::CRC32Hrr;
3806 break;
3807 case Intrinsic::aarch64_crc32w:
3808 Opc = AArch64::CRC32Wrr;
3809 break;
3810 case Intrinsic::aarch64_crc32x:
3811 Opc = AArch64::CRC32Xrr;
3812 break;
3813 case Intrinsic::aarch64_crc32cb:
3814 Opc = AArch64::CRC32CBrr;
3815 break;
3816 case Intrinsic::aarch64_crc32ch:
3817 Opc = AArch64::CRC32CHrr;
3818 break;
3819 case Intrinsic::aarch64_crc32cw:
3820 Opc = AArch64::CRC32CWrr;
3821 break;
3822 case Intrinsic::aarch64_crc32cx:
3823 Opc = AArch64::CRC32CXrr;
3824 break;
3825 }
3826
3827 Register LHSReg = getRegForValue(II->getArgOperand(0));
3828 Register RHSReg = getRegForValue(II->getArgOperand(1));
3829 if (!LHSReg || !RHSReg)
3830 return false;
3831
3832 Register ResultReg =
3833 fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, LHSReg, RHSReg);
3834 updateValueMap(II, ResultReg);
3835 return true;
3836 }
3837 }
3838 return false;
3839}
3840
3841bool AArch64FastISel::selectRet(const Instruction *I) {
3842 const ReturnInst *Ret = cast<ReturnInst>(I);
3843 const Function &F = *I->getParent()->getParent();
3844
3845 if (!FuncInfo.CanLowerReturn)
3846 return false;
3847
3848 if (F.isVarArg())
3849 return false;
3850
3851 if (TLI.supportSwiftError() &&
3852 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3853 return false;
3854
3855 if (TLI.supportSplitCSR(FuncInfo.MF))
3856 return false;
3857
3858 // Build a list of return value registers.
3860
3861 if (Ret->getNumOperands() > 0) {
3862 CallingConv::ID CC = F.getCallingConv();
3864 GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3865
3866 // Analyze operands of the call, assigning locations to each operand.
3868 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3869 CCInfo.AnalyzeReturn(Outs, RetCC_AArch64_AAPCS);
3870
3871 // Only handle a single return value for now.
3872 if (ValLocs.size() != 1)
3873 return false;
3874
3875 CCValAssign &VA = ValLocs[0];
3876 const Value *RV = Ret->getOperand(0);
3877
3878 // Don't bother handling odd stuff for now.
3879 if ((VA.getLocInfo() != CCValAssign::Full) &&
3880 (VA.getLocInfo() != CCValAssign::BCvt))
3881 return false;
3882
3883 // Only handle register returns for now.
3884 if (!VA.isRegLoc())
3885 return false;
3886
3887 Register Reg = getRegForValue(RV);
3888 if (!Reg)
3889 return false;
3890
3891 Register SrcReg = Reg + VA.getValNo();
3892 Register DestReg = VA.getLocReg();
3893 // Avoid a cross-class copy. This is very unlikely.
3894 if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3895 return false;
3896
3897 EVT RVEVT = TLI.getValueType(DL, RV->getType());
3898 if (!RVEVT.isSimple())
3899 return false;
3900
3901 // Vectors (of > 1 lane) in big endian need tricky handling.
3902 if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() &&
3903 !Subtarget->isLittleEndian())
3904 return false;
3905
3906 MVT RVVT = RVEVT.getSimpleVT();
3907 if (RVVT == MVT::f128)
3908 return false;
3909
3910 MVT DestVT = VA.getValVT();
3911 // Special handling for extended integers.
3912 if (RVVT != DestVT) {
3913 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3914 return false;
3915
3916 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3917 return false;
3918
3919 bool IsZExt = Outs[0].Flags.isZExt();
3920 SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3921 if (!SrcReg)
3922 return false;
3923 }
3924
3925 // "Callee" (i.e. value producer) zero extends pointers at function
3926 // boundary.
3927 if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy())
3928 SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff);
3929
3930 // Make the copy.
3931 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3932 TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3933
3934 // Add register to return instruction.
3935 RetRegs.push_back(VA.getLocReg());
3936 }
3937
3938 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3939 TII.get(AArch64::RET_ReallyLR));
3940 for (Register RetReg : RetRegs)
3941 MIB.addReg(RetReg, RegState::Implicit);
3942 return true;
3943}
3944
3945bool AArch64FastISel::selectTrunc(const Instruction *I) {
3946 Type *DestTy = I->getType();
3947 Value *Op = I->getOperand(0);
3948 Type *SrcTy = Op->getType();
3949
3950 EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3951 EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3952 if (!SrcEVT.isSimple())
3953 return false;
3954 if (!DestEVT.isSimple())
3955 return false;
3956
3957 MVT SrcVT = SrcEVT.getSimpleVT();
3958 MVT DestVT = DestEVT.getSimpleVT();
3959
3960 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3961 SrcVT != MVT::i8)
3962 return false;
3963 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3964 DestVT != MVT::i1)
3965 return false;
3966
3967 Register SrcReg = getRegForValue(Op);
3968 if (!SrcReg)
3969 return false;
3970
3971 // If we're truncating from i64 to a smaller non-legal type then generate an
3972 // AND. Otherwise, we know the high bits are undefined and a truncate only
3973 // generate a COPY. We cannot mark the source register also as result
3974 // register, because this can incorrectly transfer the kill flag onto the
3975 // source register.
3976 Register ResultReg;
3977 if (SrcVT == MVT::i64) {
3978 uint64_t Mask = 0;
3979 switch (DestVT.SimpleTy) {
3980 default:
3981 // Trunc i64 to i32 is handled by the target-independent fast-isel.
3982 return false;
3983 case MVT::i1:
3984 Mask = 0x1;
3985 break;
3986 case MVT::i8:
3987 Mask = 0xff;
3988 break;
3989 case MVT::i16:
3990 Mask = 0xffff;
3991 break;
3992 }
3993 // Issue an extract_subreg to get the lower 32-bits.
3994 Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg,
3995 AArch64::sub_32);
3996 // Create the AND instruction which performs the actual truncation.
3997 ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask);
3998 assert(ResultReg && "Unexpected AND instruction emission failure.");
3999 } else {
4000 ResultReg = createResultReg(&AArch64::GPR32RegClass);
4001 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4002 TII.get(TargetOpcode::COPY), ResultReg)
4003 .addReg(SrcReg);
4004 }
4005
4006 updateValueMap(I, ResultReg);
4007 return true;
4008}
4009
4010Register AArch64FastISel::emiti1Ext(Register SrcReg, MVT DestVT, bool IsZExt) {
4011 assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
4012 DestVT == MVT::i64) &&
4013 "Unexpected value type.");
4014 // Handle i8 and i16 as i32.
4015 if (DestVT == MVT::i8 || DestVT == MVT::i16)
4016 DestVT = MVT::i32;
4017
4018 if (IsZExt) {
4019 Register ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1);
4020 assert(ResultReg && "Unexpected AND instruction emission failure.");
4021 if (DestVT == MVT::i64) {
4022 // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
4023 // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
4024 Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4025 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4026 TII.get(AArch64::SUBREG_TO_REG), Reg64)
4027 .addReg(ResultReg)
4028 .addImm(AArch64::sub_32);
4029 ResultReg = Reg64;
4030 }
4031 return ResultReg;
4032 } else {
4033 if (DestVT == MVT::i64) {
4034 // FIXME: We're SExt i1 to i64.
4035 return Register();
4036 }
4037 return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
4038 0, 0);
4039 }
4040}
4041
4042Register AArch64FastISel::emitMul_rr(MVT RetVT, Register Op0, Register Op1) {
4043 unsigned Opc;
4044 Register ZReg;
4045 switch (RetVT.SimpleTy) {
4046 default:
4047 return Register();
4048 case MVT::i8:
4049 case MVT::i16:
4050 case MVT::i32:
4051 RetVT = MVT::i32;
4052 Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
4053 case MVT::i64:
4054 Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
4055 }
4056
4057 const TargetRegisterClass *RC =
4058 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4059 return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg);
4060}
4061
4062Register AArch64FastISel::emitSMULL_rr(MVT RetVT, Register Op0, Register Op1) {
4063 if (RetVT != MVT::i64)
4064 return Register();
4065
4066 return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
4067 Op0, Op1, AArch64::XZR);
4068}
4069
4070Register AArch64FastISel::emitUMULL_rr(MVT RetVT, Register Op0, Register Op1) {
4071 if (RetVT != MVT::i64)
4072 return Register();
4073
4074 return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
4075 Op0, Op1, AArch64::XZR);
4076}
4077
4078Register AArch64FastISel::emitLSL_rr(MVT RetVT, Register Op0Reg,
4079 Register Op1Reg) {
4080 unsigned Opc = 0;
4081 bool NeedTrunc = false;
4082 uint64_t Mask = 0;
4083 switch (RetVT.SimpleTy) {
4084 default:
4085 return Register();
4086 case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
4087 case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4088 case MVT::i32: Opc = AArch64::LSLVWr; break;
4089 case MVT::i64: Opc = AArch64::LSLVXr; break;
4090 }
4091
4092 const TargetRegisterClass *RC =
4093 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4094 if (NeedTrunc)
4095 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4096
4097 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4098 if (NeedTrunc)
4099 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4100 return ResultReg;
4101}
4102
4103Register AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, Register Op0,
4104 uint64_t Shift, bool IsZExt) {
4105 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4106 "Unexpected source/return type pair.");
4107 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4108 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4109 "Unexpected source value type.");
4110 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4111 RetVT == MVT::i64) && "Unexpected return value type.");
4112
4113 bool Is64Bit = (RetVT == MVT::i64);
4114 unsigned RegSize = Is64Bit ? 64 : 32;
4115 unsigned DstBits = RetVT.getSizeInBits();
4116 unsigned SrcBits = SrcVT.getSizeInBits();
4117 const TargetRegisterClass *RC =
4118 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4119
4120 // Just emit a copy for "zero" shifts.
4121 if (Shift == 0) {
4122 if (RetVT == SrcVT) {
4123 Register ResultReg = createResultReg(RC);
4124 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4125 TII.get(TargetOpcode::COPY), ResultReg)
4126 .addReg(Op0);
4127 return ResultReg;
4128 } else
4129 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4130 }
4131
4132 // Don't deal with undefined shifts.
4133 if (Shift >= DstBits)
4134 return Register();
4135
4136 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4137 // {S|U}BFM Wd, Wn, #r, #s
4138 // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4139
4140 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4141 // %2 = shl i16 %1, 4
4142 // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4143 // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4144 // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4145 // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4146
4147 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4148 // %2 = shl i16 %1, 8
4149 // Wd<32+7-24,32-24> = Wn<7:0>
4150 // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4151 // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4152 // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4153
4154 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4155 // %2 = shl i16 %1, 12
4156 // Wd<32+3-20,32-20> = Wn<3:0>
4157 // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4158 // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4159 // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4160
4161 unsigned ImmR = RegSize - Shift;
4162 // Limit the width to the length of the source type.
4163 unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4164 static const unsigned OpcTable[2][2] = {
4165 {AArch64::SBFMWri, AArch64::SBFMXri},
4166 {AArch64::UBFMWri, AArch64::UBFMXri}
4167 };
4168 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4169 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4170 Register TmpReg = MRI.createVirtualRegister(RC);
4171 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4172 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4173 .addReg(Op0)
4174 .addImm(AArch64::sub_32);
4175 Op0 = TmpReg;
4176 }
4177 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4178}
4179
4180Register AArch64FastISel::emitLSR_rr(MVT RetVT, Register Op0Reg,
4181 Register Op1Reg) {
4182 unsigned Opc = 0;
4183 bool NeedTrunc = false;
4184 uint64_t Mask = 0;
4185 switch (RetVT.SimpleTy) {
4186 default:
4187 return Register();
4188 case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
4189 case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4190 case MVT::i32: Opc = AArch64::LSRVWr; break;
4191 case MVT::i64: Opc = AArch64::LSRVXr; break;
4192 }
4193
4194 const TargetRegisterClass *RC =
4195 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4196 if (NeedTrunc) {
4197 Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask);
4198 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4199 }
4200 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4201 if (NeedTrunc)
4202 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4203 return ResultReg;
4204}
4205
4206Register AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, Register Op0,
4207 uint64_t Shift, bool IsZExt) {
4208 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4209 "Unexpected source/return type pair.");
4210 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4211 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4212 "Unexpected source value type.");
4213 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4214 RetVT == MVT::i64) && "Unexpected return value type.");
4215
4216 bool Is64Bit = (RetVT == MVT::i64);
4217 unsigned RegSize = Is64Bit ? 64 : 32;
4218 unsigned DstBits = RetVT.getSizeInBits();
4219 unsigned SrcBits = SrcVT.getSizeInBits();
4220 const TargetRegisterClass *RC =
4221 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4222
4223 // Just emit a copy for "zero" shifts.
4224 if (Shift == 0) {
4225 if (RetVT == SrcVT) {
4226 Register ResultReg = createResultReg(RC);
4227 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4228 TII.get(TargetOpcode::COPY), ResultReg)
4229 .addReg(Op0);
4230 return ResultReg;
4231 } else
4232 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4233 }
4234
4235 // Don't deal with undefined shifts.
4236 if (Shift >= DstBits)
4237 return Register();
4238
4239 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4240 // {S|U}BFM Wd, Wn, #r, #s
4241 // Wd<s-r:0> = Wn<s:r> when r <= s
4242
4243 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4244 // %2 = lshr i16 %1, 4
4245 // Wd<7-4:0> = Wn<7:4>
4246 // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4247 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4248 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4249
4250 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4251 // %2 = lshr i16 %1, 8
4252 // Wd<7-7,0> = Wn<7:7>
4253 // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4254 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4255 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4256
4257 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4258 // %2 = lshr i16 %1, 12
4259 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4260 // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4261 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4262 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4263
4264 if (Shift >= SrcBits && IsZExt)
4265 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4266
4267 // It is not possible to fold a sign-extend into the LShr instruction. In this
4268 // case emit a sign-extend.
4269 if (!IsZExt) {
4270 Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4271 if (!Op0)
4272 return Register();
4273 SrcVT = RetVT;
4274 SrcBits = SrcVT.getSizeInBits();
4275 IsZExt = true;
4276 }
4277
4278 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4279 unsigned ImmS = SrcBits - 1;
4280 static const unsigned OpcTable[2][2] = {
4281 {AArch64::SBFMWri, AArch64::SBFMXri},
4282 {AArch64::UBFMWri, AArch64::UBFMXri}
4283 };
4284 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4285 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4286 Register TmpReg = MRI.createVirtualRegister(RC);
4287 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4288 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4289 .addReg(Op0)
4290 .addImm(AArch64::sub_32);
4291 Op0 = TmpReg;
4292 }
4293 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4294}
4295
4296Register AArch64FastISel::emitASR_rr(MVT RetVT, Register Op0Reg,
4297 Register Op1Reg) {
4298 unsigned Opc = 0;
4299 bool NeedTrunc = false;
4300 uint64_t Mask = 0;
4301 switch (RetVT.SimpleTy) {
4302 default:
4303 return Register();
4304 case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
4305 case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4306 case MVT::i32: Opc = AArch64::ASRVWr; break;
4307 case MVT::i64: Opc = AArch64::ASRVXr; break;
4308 }
4309
4310 const TargetRegisterClass *RC =
4311 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4312 if (NeedTrunc) {
4313 Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
4314 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4315 }
4316 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4317 if (NeedTrunc)
4318 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4319 return ResultReg;
4320}
4321
4322Register AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, Register Op0,
4323 uint64_t Shift, bool IsZExt) {
4324 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4325 "Unexpected source/return type pair.");
4326 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4327 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4328 "Unexpected source value type.");
4329 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4330 RetVT == MVT::i64) && "Unexpected return value type.");
4331
4332 bool Is64Bit = (RetVT == MVT::i64);
4333 unsigned RegSize = Is64Bit ? 64 : 32;
4334 unsigned DstBits = RetVT.getSizeInBits();
4335 unsigned SrcBits = SrcVT.getSizeInBits();
4336 const TargetRegisterClass *RC =
4337 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4338
4339 // Just emit a copy for "zero" shifts.
4340 if (Shift == 0) {
4341 if (RetVT == SrcVT) {
4342 Register ResultReg = createResultReg(RC);
4343 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4344 TII.get(TargetOpcode::COPY), ResultReg)
4345 .addReg(Op0);
4346 return ResultReg;
4347 } else
4348 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4349 }
4350
4351 // Don't deal with undefined shifts.
4352 if (Shift >= DstBits)
4353 return Register();
4354
4355 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4356 // {S|U}BFM Wd, Wn, #r, #s
4357 // Wd<s-r:0> = Wn<s:r> when r <= s
4358
4359 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4360 // %2 = ashr i16 %1, 4
4361 // Wd<7-4:0> = Wn<7:4>
4362 // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4363 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4364 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4365
4366 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4367 // %2 = ashr i16 %1, 8
4368 // Wd<7-7,0> = Wn<7:7>
4369 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4370 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4371 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4372
4373 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4374 // %2 = ashr i16 %1, 12
4375 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4376 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4377 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4378 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4379
4380 if (Shift >= SrcBits && IsZExt)
4381 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4382
4383 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4384 unsigned ImmS = SrcBits - 1;
4385 static const unsigned OpcTable[2][2] = {
4386 {AArch64::SBFMWri, AArch64::SBFMXri},
4387 {AArch64::UBFMWri, AArch64::UBFMXri}
4388 };
4389 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4390 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4391 Register TmpReg = MRI.createVirtualRegister(RC);
4392 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4393 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4394 .addReg(Op0)
4395 .addImm(AArch64::sub_32);
4396 Op0 = TmpReg;
4397 }
4398 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4399}
4400
4401Register AArch64FastISel::emitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT,
4402 bool IsZExt) {
4403 assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4404
4405 // FastISel does not have plumbing to deal with extensions where the SrcVT or
4406 // DestVT are odd things, so test to make sure that they are both types we can
4407 // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4408 // bail out to SelectionDAG.
4409 if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4410 (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4411 ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
4412 (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
4413 return Register();
4414
4415 unsigned Opc;
4416 unsigned Imm = 0;
4417
4418 switch (SrcVT.SimpleTy) {
4419 default:
4420 return Register();
4421 case MVT::i1:
4422 return emiti1Ext(SrcReg, DestVT, IsZExt);
4423 case MVT::i8:
4424 if (DestVT == MVT::i64)
4425 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4426 else
4427 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4428 Imm = 7;
4429 break;
4430 case MVT::i16:
4431 if (DestVT == MVT::i64)
4432 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4433 else
4434 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4435 Imm = 15;
4436 break;
4437 case MVT::i32:
4438 assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4439 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4440 Imm = 31;
4441 break;
4442 }
4443
4444 // Handle i8 and i16 as i32.
4445 if (DestVT == MVT::i8 || DestVT == MVT::i16)
4446 DestVT = MVT::i32;
4447 else if (DestVT == MVT::i64) {
4448 Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4449 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4450 TII.get(AArch64::SUBREG_TO_REG), Src64)
4451 .addReg(SrcReg)
4452 .addImm(AArch64::sub_32);
4453 SrcReg = Src64;
4454 }
4455
4456 const TargetRegisterClass *RC =
4457 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4458 return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm);
4459}
4460
4461static bool isZExtLoad(const MachineInstr *LI) {
4462 switch (LI->getOpcode()) {
4463 default:
4464 return false;
4465 case AArch64::LDURBBi:
4466 case AArch64::LDURHHi:
4467 case AArch64::LDURWi:
4468 case AArch64::LDRBBui:
4469 case AArch64::LDRHHui:
4470 case AArch64::LDRWui:
4471 case AArch64::LDRBBroX:
4472 case AArch64::LDRHHroX:
4473 case AArch64::LDRWroX:
4474 case AArch64::LDRBBroW:
4475 case AArch64::LDRHHroW:
4476 case AArch64::LDRWroW:
4477 return true;
4478 }
4479}
4480
4481static bool isSExtLoad(const MachineInstr *LI) {
4482 switch (LI->getOpcode()) {
4483 default:
4484 return false;
4485 case AArch64::LDURSBWi:
4486 case AArch64::LDURSHWi:
4487 case AArch64::LDURSBXi:
4488 case AArch64::LDURSHXi:
4489 case AArch64::LDURSWi:
4490 case AArch64::LDRSBWui:
4491 case AArch64::LDRSHWui:
4492 case AArch64::LDRSBXui:
4493 case AArch64::LDRSHXui:
4494 case AArch64::LDRSWui:
4495 case AArch64::LDRSBWroX:
4496 case AArch64::LDRSHWroX:
4497 case AArch64::LDRSBXroX:
4498 case AArch64::LDRSHXroX:
4499 case AArch64::LDRSWroX:
4500 case AArch64::LDRSBWroW:
4501 case AArch64::LDRSHWroW:
4502 case AArch64::LDRSBXroW:
4503 case AArch64::LDRSHXroW:
4504 case AArch64::LDRSWroW:
4505 return true;
4506 }
4507}
4508
4509bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4510 MVT SrcVT) {
4511 const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4512 if (!LI || !LI->hasOneUse())
4513 return false;
4514
4515 // Check if the load instruction has already been selected.
4516 Register Reg = lookUpRegForValue(LI);
4517 if (!Reg)
4518 return false;
4519
4520 MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4521 if (!MI)
4522 return false;
4523
4524 // Check if the correct load instruction has been emitted - SelectionDAG might
4525 // have emitted a zero-extending load, but we need a sign-extending load.
4526 bool IsZExt = isa<ZExtInst>(I);
4527 const auto *LoadMI = MI;
4528 if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4529 LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4530 Register LoadReg = MI->getOperand(1).getReg();
4531 LoadMI = MRI.getUniqueVRegDef(LoadReg);
4532 assert(LoadMI && "Expected valid instruction");
4533 }
4534 if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4535 return false;
4536
4537 // Nothing to be done.
4538 if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4539 updateValueMap(I, Reg);
4540 return true;
4541 }
4542
4543 if (IsZExt) {
4544 Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
4545 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4546 TII.get(AArch64::SUBREG_TO_REG), Reg64)
4547 .addReg(Reg, getKillRegState(true))
4548 .addImm(AArch64::sub_32);
4549 Reg = Reg64;
4550 } else {
4551 assert((MI->getOpcode() == TargetOpcode::COPY &&
4552 MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4553 "Expected copy instruction");
4554 Reg = MI->getOperand(1).getReg();
4556 removeDeadCode(I, std::next(I));
4557 }
4558 updateValueMap(I, Reg);
4559 return true;
4560}
4561
4562bool AArch64FastISel::selectIntExt(const Instruction *I) {
4564 "Unexpected integer extend instruction.");
4565 MVT RetVT;
4566 MVT SrcVT;
4567 if (!isTypeSupported(I->getType(), RetVT))
4568 return false;
4569
4570 if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4571 return false;
4572
4573 // Try to optimize already sign-/zero-extended values from load instructions.
4574 if (optimizeIntExtLoad(I, RetVT, SrcVT))
4575 return true;
4576
4577 Register SrcReg = getRegForValue(I->getOperand(0));
4578 if (!SrcReg)
4579 return false;
4580
4581 // Try to optimize already sign-/zero-extended values from function arguments.
4582 bool IsZExt = isa<ZExtInst>(I);
4583 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4584 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4585 if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4586 Register ResultReg = createResultReg(&AArch64::GPR64RegClass);
4587 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4588 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4589 .addReg(SrcReg)
4590 .addImm(AArch64::sub_32);
4591 SrcReg = ResultReg;
4592 }
4593
4594 updateValueMap(I, SrcReg);
4595 return true;
4596 }
4597 }
4598
4599 Register ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4600 if (!ResultReg)
4601 return false;
4602
4603 updateValueMap(I, ResultReg);
4604 return true;
4605}
4606
4607bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4608 EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4609 if (!DestEVT.isSimple())
4610 return false;
4611
4612 MVT DestVT = DestEVT.getSimpleVT();
4613 if (DestVT != MVT::i64 && DestVT != MVT::i32)
4614 return false;
4615
4616 unsigned DivOpc;
4617 bool Is64bit = (DestVT == MVT::i64);
4618 switch (ISDOpcode) {
4619 default:
4620 return false;
4621 case ISD::SREM:
4622 DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4623 break;
4624 case ISD::UREM:
4625 DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4626 break;
4627 }
4628 unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4629 Register Src0Reg = getRegForValue(I->getOperand(0));
4630 if (!Src0Reg)
4631 return false;
4632
4633 Register Src1Reg = getRegForValue(I->getOperand(1));
4634 if (!Src1Reg)
4635 return false;
4636
4637 const TargetRegisterClass *RC =
4638 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4639 Register QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg);
4640 assert(QuotReg && "Unexpected DIV instruction emission failure.");
4641 // The remainder is computed as numerator - (quotient * denominator) using the
4642 // MSUB instruction.
4643 Register ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg);
4644 updateValueMap(I, ResultReg);
4645 return true;
4646}
4647
4648bool AArch64FastISel::selectMul(const Instruction *I) {
4649 MVT VT;
4650 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4651 return false;
4652
4653 if (VT.isVector())
4654 return selectBinaryOp(I, ISD::MUL);
4655
4656 const Value *Src0 = I->getOperand(0);
4657 const Value *Src1 = I->getOperand(1);
4658 if (const auto *C = dyn_cast<ConstantInt>(Src0))
4659 if (C->getValue().isPowerOf2())
4660 std::swap(Src0, Src1);
4661
4662 // Try to simplify to a shift instruction.
4663 if (const auto *C = dyn_cast<ConstantInt>(Src1))
4664 if (C->getValue().isPowerOf2()) {
4665 uint64_t ShiftVal = C->getValue().logBase2();
4666 MVT SrcVT = VT;
4667 bool IsZExt = true;
4668 if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4669 if (!isIntExtFree(ZExt)) {
4670 MVT VT;
4671 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4672 SrcVT = VT;
4673 IsZExt = true;
4674 Src0 = ZExt->getOperand(0);
4675 }
4676 }
4677 } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4678 if (!isIntExtFree(SExt)) {
4679 MVT VT;
4680 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4681 SrcVT = VT;
4682 IsZExt = false;
4683 Src0 = SExt->getOperand(0);
4684 }
4685 }
4686 }
4687
4688 Register Src0Reg = getRegForValue(Src0);
4689 if (!Src0Reg)
4690 return false;
4691
4692 Register ResultReg = emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt);
4693
4694 if (ResultReg) {
4695 updateValueMap(I, ResultReg);
4696 return true;
4697 }
4698 }
4699
4700 Register Src0Reg = getRegForValue(I->getOperand(0));
4701 if (!Src0Reg)
4702 return false;
4703
4704 Register Src1Reg = getRegForValue(I->getOperand(1));
4705 if (!Src1Reg)
4706 return false;
4707
4708 Register ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg);
4709
4710 if (!ResultReg)
4711 return false;
4712
4713 updateValueMap(I, ResultReg);
4714 return true;
4715}
4716
4717bool AArch64FastISel::selectShift(const Instruction *I) {
4718 MVT RetVT;
4719 if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4720 return false;
4721
4722 if (RetVT.isVector())
4723 return selectOperator(I, I->getOpcode());
4724
4725 if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4726 Register ResultReg;
4727 uint64_t ShiftVal = C->getZExtValue();
4728 MVT SrcVT = RetVT;
4729 bool IsZExt = I->getOpcode() != Instruction::AShr;
4730 const Value *Op0 = I->getOperand(0);
4731 if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4732 if (!isIntExtFree(ZExt)) {
4733 MVT TmpVT;
4734 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4735 SrcVT = TmpVT;
4736 IsZExt = true;
4737 Op0 = ZExt->getOperand(0);
4738 }
4739 }
4740 } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4741 if (!isIntExtFree(SExt)) {
4742 MVT TmpVT;
4743 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4744 SrcVT = TmpVT;
4745 IsZExt = false;
4746 Op0 = SExt->getOperand(0);
4747 }
4748 }
4749 }
4750
4751 Register Op0Reg = getRegForValue(Op0);
4752 if (!Op0Reg)
4753 return false;
4754
4755 switch (I->getOpcode()) {
4756 default: llvm_unreachable("Unexpected instruction.");
4757 case Instruction::Shl:
4758 ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4759 break;
4760 case Instruction::AShr:
4761 ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4762 break;
4763 case Instruction::LShr:
4764 ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4765 break;
4766 }
4767 if (!ResultReg)
4768 return false;
4769
4770 updateValueMap(I, ResultReg);
4771 return true;
4772 }
4773
4774 Register Op0Reg = getRegForValue(I->getOperand(0));
4775 if (!Op0Reg)
4776 return false;
4777
4778 Register Op1Reg = getRegForValue(I->getOperand(1));
4779 if (!Op1Reg)
4780 return false;
4781
4782 Register ResultReg;
4783 switch (I->getOpcode()) {
4784 default: llvm_unreachable("Unexpected instruction.");
4785 case Instruction::Shl:
4786 ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg);
4787 break;
4788 case Instruction::AShr:
4789 ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg);
4790 break;
4791 case Instruction::LShr:
4792 ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg);
4793 break;
4794 }
4795
4796 if (!ResultReg)
4797 return false;
4798
4799 updateValueMap(I, ResultReg);
4800 return true;
4801}
4802
4803bool AArch64FastISel::selectBitCast(const Instruction *I) {
4804 MVT RetVT, SrcVT;
4805
4806 if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4807 return false;
4808 if (!isTypeLegal(I->getType(), RetVT))
4809 return false;
4810
4811 unsigned Opc;
4812 if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4813 Opc = AArch64::FMOVWSr;
4814 else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4815 Opc = AArch64::FMOVXDr;
4816 else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4817 Opc = AArch64::FMOVSWr;
4818 else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4819 Opc = AArch64::FMOVDXr;
4820 else
4821 return false;
4822
4823 const TargetRegisterClass *RC = nullptr;
4824 switch (RetVT.SimpleTy) {
4825 default: llvm_unreachable("Unexpected value type.");
4826 case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4827 case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4828 case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4829 case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4830 }
4831 Register Op0Reg = getRegForValue(I->getOperand(0));
4832 if (!Op0Reg)
4833 return false;
4834
4835 Register ResultReg = fastEmitInst_r(Opc, RC, Op0Reg);
4836 if (!ResultReg)
4837 return false;
4838
4839 updateValueMap(I, ResultReg);
4840 return true;
4841}
4842
4843bool AArch64FastISel::selectFRem(const Instruction *I) {
4844 MVT RetVT;
4845 if (!isTypeLegal(I->getType(), RetVT))
4846 return false;
4847
4848 RTLIB::LibcallImpl LCImpl =
4849 LibcallLowering->getLibcallImpl(RTLIB::getREM(RetVT));
4850 if (LCImpl == RTLIB::Unsupported)
4851 return false;
4852
4853 ArgListTy Args;
4854 Args.reserve(I->getNumOperands());
4855
4856 // Populate the argument list.
4857 for (auto &Arg : I->operands())
4858 Args.emplace_back(Arg);
4859
4860 CallLoweringInfo CLI;
4861 MCContext &Ctx = MF->getContext();
4862 CallingConv::ID CC = LibcallLowering->getLibcallImplCallingConv(LCImpl);
4863 StringRef FuncName = RTLIB::RuntimeLibcallsInfo::getLibcallImplName(LCImpl);
4864
4865 CLI.setCallee(DL, Ctx, CC, I->getType(), FuncName, std::move(Args));
4866 if (!lowerCallTo(CLI))
4867 return false;
4868 updateValueMap(I, CLI.ResultReg);
4869 return true;
4870}
4871
4872bool AArch64FastISel::selectSDiv(const Instruction *I) {
4873 MVT VT;
4874 if (!isTypeLegal(I->getType(), VT))
4875 return false;
4876
4877 if (!isa<ConstantInt>(I->getOperand(1)))
4878 return selectBinaryOp(I, ISD::SDIV);
4879
4880 const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4881 if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4882 !(C.isPowerOf2() || C.isNegatedPowerOf2()))
4883 return selectBinaryOp(I, ISD::SDIV);
4884
4885 unsigned Lg2 = C.countr_zero();
4886 Register Src0Reg = getRegForValue(I->getOperand(0));
4887 if (!Src0Reg)
4888 return false;
4889
4890 if (cast<BinaryOperator>(I)->isExact()) {
4891 Register ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2);
4892 if (!ResultReg)
4893 return false;
4894 updateValueMap(I, ResultReg);
4895 return true;
4896 }
4897
4898 int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4899 Register AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne);
4900 if (!AddReg)
4901 return false;
4902
4903 // (Src0 < 0) ? Pow2 - 1 : 0;
4904 if (!emitICmp_ri(VT, Src0Reg, 0))
4905 return false;
4906
4907 unsigned SelectOpc;
4908 const TargetRegisterClass *RC;
4909 if (VT == MVT::i64) {
4910 SelectOpc = AArch64::CSELXr;
4911 RC = &AArch64::GPR64RegClass;
4912 } else {
4913 SelectOpc = AArch64::CSELWr;
4914 RC = &AArch64::GPR32RegClass;
4915 }
4916 Register SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg,
4918 if (!SelectReg)
4919 return false;
4920
4921 // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4922 // negate the result.
4923 Register ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4924 Register ResultReg;
4925 if (C.isNegative())
4926 ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg,
4927 AArch64_AM::ASR, Lg2);
4928 else
4929 ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2);
4930
4931 if (!ResultReg)
4932 return false;
4933
4934 updateValueMap(I, ResultReg);
4935 return true;
4936}
4937
4938/// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4939/// have to duplicate it for AArch64, because otherwise we would fail during the
4940/// sign-extend emission.
4941Register AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4942 Register IdxN = getRegForValue(Idx);
4943 if (!IdxN)
4944 // Unhandled operand. Halt "fast" selection and bail.
4945 return Register();
4946
4947 // If the index is smaller or larger than intptr_t, truncate or extend it.
4948 MVT PtrVT = TLI.getPointerTy(DL);
4949 EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4950 if (IdxVT.bitsLT(PtrVT)) {
4951 IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
4952 } else if (IdxVT.bitsGT(PtrVT))
4953 llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4954 return IdxN;
4955}
4956
4957/// This is mostly a copy of the existing FastISel GEP code, but we have to
4958/// duplicate it for AArch64, because otherwise we would bail out even for
4959/// simple cases. This is because the standard fastEmit functions don't cover
4960/// MUL at all and ADD is lowered very inefficientily.
4961bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4962 if (Subtarget->isTargetILP32())
4963 return false;
4964
4965 Register N = getRegForValue(I->getOperand(0));
4966 if (!N)
4967 return false;
4968
4969 // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4970 // into a single N = N + TotalOffset.
4971 uint64_t TotalOffs = 0;
4972 MVT VT = TLI.getPointerTy(DL);
4974 GTI != E; ++GTI) {
4975 const Value *Idx = GTI.getOperand();
4976 if (auto *StTy = GTI.getStructTypeOrNull()) {
4977 unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4978 // N = N + Offset
4979 if (Field)
4980 TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4981 } else {
4982 // If this is a constant subscript, handle it quickly.
4983 if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4984 if (CI->isZero())
4985 continue;
4986 // N = N + Offset
4987 TotalOffs += GTI.getSequentialElementStride(DL) *
4988 cast<ConstantInt>(CI)->getSExtValue();
4989 continue;
4990 }
4991 if (TotalOffs) {
4992 N = emitAdd_ri_(VT, N, TotalOffs);
4993 if (!N)
4994 return false;
4995 TotalOffs = 0;
4996 }
4997
4998 // N = N + Idx * ElementSize;
4999 uint64_t ElementSize = GTI.getSequentialElementStride(DL);
5000 Register IdxN = getRegForGEPIndex(Idx);
5001 if (!IdxN)
5002 return false;
5003
5004 if (ElementSize != 1) {
5005 Register C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
5006 if (!C)
5007 return false;
5008 IdxN = emitMul_rr(VT, IdxN, C);
5009 if (!IdxN)
5010 return false;
5011 }
5012 N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN);
5013 if (!N)
5014 return false;
5015 }
5016 }
5017 if (TotalOffs) {
5018 N = emitAdd_ri_(VT, N, TotalOffs);
5019 if (!N)
5020 return false;
5021 }
5022 updateValueMap(I, N);
5023 return true;
5024}
5025
5026bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
5027 assert(TM.getOptLevel() == CodeGenOptLevel::None &&
5028 "cmpxchg survived AtomicExpand at optlevel > -O0");
5029
5030 auto *RetPairTy = cast<StructType>(I->getType());
5031 Type *RetTy = RetPairTy->getTypeAtIndex(0U);
5032 assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5033 "cmpxchg has a non-i1 status result");
5034
5035 MVT VT;
5036 if (!isTypeLegal(RetTy, VT))
5037 return false;
5038
5039 const TargetRegisterClass *ResRC;
5040 unsigned Opc, CmpOpc;
5041 // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5042 // extractvalue selection doesn't support that.
5043 if (VT == MVT::i32) {
5044 Opc = AArch64::CMP_SWAP_32;
5045 CmpOpc = AArch64::SUBSWrs;
5046 ResRC = &AArch64::GPR32RegClass;
5047 } else if (VT == MVT::i64) {
5048 Opc = AArch64::CMP_SWAP_64;
5049 CmpOpc = AArch64::SUBSXrs;
5050 ResRC = &AArch64::GPR64RegClass;
5051 } else {
5052 return false;
5053 }
5054
5055 const MCInstrDesc &II = TII.get(Opc);
5056
5057 Register AddrReg = getRegForValue(I->getPointerOperand());
5058 Register DesiredReg = getRegForValue(I->getCompareOperand());
5059 Register NewReg = getRegForValue(I->getNewValOperand());
5060
5061 if (!AddrReg || !DesiredReg || !NewReg)
5062 return false;
5063
5064 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
5065 DesiredReg = constrainOperandRegClass(II, DesiredReg, II.getNumDefs() + 1);
5066 NewReg = constrainOperandRegClass(II, NewReg, II.getNumDefs() + 2);
5067
5068 const Register ResultReg1 = createResultReg(ResRC);
5069 const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5070 const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5071
5072 // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5073 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
5074 .addDef(ResultReg1)
5075 .addDef(ScratchReg)
5076 .addUse(AddrReg)
5077 .addUse(DesiredReg)
5078 .addUse(NewReg);
5079
5080 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc))
5081 .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5082 .addUse(ResultReg1)
5083 .addUse(DesiredReg)
5084 .addImm(0);
5085
5086 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr))
5087 .addDef(ResultReg2)
5088 .addUse(AArch64::WZR)
5089 .addUse(AArch64::WZR)
5091
5092 assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5093 updateValueMap(I, ResultReg1, 2);
5094 return true;
5095}
5096
5097bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5098 if (TLI.fallBackToDAGISel(*I))
5099 return false;
5100 switch (I->getOpcode()) {
5101 default:
5102 break;
5103 case Instruction::Add:
5104 case Instruction::Sub:
5105 return selectAddSub(I);
5106 case Instruction::Mul:
5107 return selectMul(I);
5108 case Instruction::SDiv:
5109 return selectSDiv(I);
5110 case Instruction::SRem:
5111 if (!selectBinaryOp(I, ISD::SREM))
5112 return selectRem(I, ISD::SREM);
5113 return true;
5114 case Instruction::URem:
5115 if (!selectBinaryOp(I, ISD::UREM))
5116 return selectRem(I, ISD::UREM);
5117 return true;
5118 case Instruction::Shl:
5119 case Instruction::LShr:
5120 case Instruction::AShr:
5121 return selectShift(I);
5122 case Instruction::And:
5123 case Instruction::Or:
5124 case Instruction::Xor:
5125 return selectLogicalOp(I);
5126 case Instruction::CondBr:
5127 return selectBranch(I);
5128 case Instruction::IndirectBr:
5129 return selectIndirectBr(I);
5130 case Instruction::BitCast:
5132 return selectBitCast(I);
5133 return true;
5134 case Instruction::FPToSI:
5135 if (!selectCast(I, ISD::FP_TO_SINT))
5136 return selectFPToInt(I, /*Signed=*/true);
5137 return true;
5138 case Instruction::FPToUI:
5139 return selectFPToInt(I, /*Signed=*/false);
5140 case Instruction::ZExt:
5141 case Instruction::SExt:
5142 return selectIntExt(I);
5143 case Instruction::Trunc:
5144 if (!selectCast(I, ISD::TRUNCATE))
5145 return selectTrunc(I);
5146 return true;
5147 case Instruction::FPExt:
5148 return selectFPExt(I);
5149 case Instruction::FPTrunc:
5150 return selectFPTrunc(I);
5151 case Instruction::SIToFP:
5152 if (!selectCast(I, ISD::SINT_TO_FP))
5153 return selectIntToFP(I, /*Signed=*/true);
5154 return true;
5155 case Instruction::UIToFP:
5156 return selectIntToFP(I, /*Signed=*/false);
5157 case Instruction::Load:
5158 return selectLoad(I);
5159 case Instruction::Store:
5160 return selectStore(I);
5161 case Instruction::FCmp:
5162 case Instruction::ICmp:
5163 return selectCmp(I);
5164 case Instruction::Select:
5165 return selectSelect(I);
5166 case Instruction::Ret:
5167 return selectRet(I);
5168 case Instruction::FRem:
5169 return selectFRem(I);
5170 case Instruction::GetElementPtr:
5171 return selectGetElementPtr(I);
5172 case Instruction::AtomicCmpXchg:
5173 return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5174 }
5175
5176 // fall-back to target-independent instruction selection.
5177 return selectOperator(I, I->getOpcode());
5178}
5179
5181 const TargetLibraryInfo *LibInfo,
5182 const LibcallLoweringInfo *LibcallLowering) {
5183
5184 SMEAttrs CallerAttrs =
5185 FuncInfo.MF->getInfo<AArch64FunctionInfo>()->getSMEFnAttrs();
5186 if (CallerAttrs.hasZAState() || CallerAttrs.hasZT0State() ||
5187 CallerAttrs.hasStreamingInterfaceOrBody() ||
5188 CallerAttrs.hasStreamingCompatibleInterface() ||
5189 CallerAttrs.hasAgnosticZAInterface())
5190 return nullptr;
5191 return new AArch64FastISel(FuncInfo, LibInfo, LibcallLowering);
5192}
static bool isIntExtFree(const Instruction *I)
Check if the sign-/zero-extend will be a noop.
static bool isSExtLoad(const MachineInstr *LI)
static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred)
static bool isMulPowOf2(const Value *I)
Check if the multiply is by a power-of-2 constant.
static unsigned getImplicitScaleFactor(MVT VT)
Determine the implicit scale factor that is applied by a memory operation for a given value type.
static bool isZExtLoad(const MachineInstr *LI)
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV),...
static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, int Offset, bool IsPostDec)
Emit a load-pair instruction for frame-destroy.
static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, int Offset, bool IsPreDec)
Emit a store-pair instruction for frame-setup.
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
basic Basic Alias true
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This file defines the FastISel class.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
uint64_t IntrinsicInst * II
OptimizedStructLayoutField Field
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
SI Pre allocate WWM Registers
This file defines the SmallVector class.
static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
Value * RHS
Value * LHS
static const unsigned FramePtr
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
bool isAnyArgRegReserved(const MachineFunction &MF) const
void emitReservedArgRegCallError(const MachineFunction &MF) const
Register getFrameRegister(const MachineFunction &MF) const override
const AArch64RegisterInfo * getRegisterInfo() const override
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
bool hasCustomCallingConv() const
PointerType * getType() const
Overload to return most specific pointer type.
InstListType::const_iterator const_iterator
Definition BasicBlock.h:171
Register getLocReg() const
LocInfo getLocInfo() const
unsigned getValNo() const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:679
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition InstrTypes.h:693
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:691
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:690
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:684
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:687
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:688
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:685
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:692
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:689
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition InstrTypes.h:678
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:686
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
bool isUnsigned() const
Definition InstrTypes.h:936
Value * getCondition() const
BasicBlock * getSuccessor(unsigned i) const
const APFloat & getValueAPF() const
Definition Constants.h:463
bool isNegative() const
Return true if the sign bit is set.
Definition Constants.h:470
bool isZero() const
Return true if the value is positive or negative zero.
Definition Constants.h:467
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition Constants.h:174
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:74
constexpr bool isVector() const
One or more elements.
Definition TypeSize.h:324
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
bool selectBitCast(const User *I)
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
PointerType * getType() const
Global values are always pointers.
iterator_range< succ_iterator > successors()
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Tracks which library functions to use for a particular subtarget.
Machine Value Type.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
bool isVector() const
Return true if this is a vector value type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
bool isValid() const
Return true if this is a valid simple valuetype.
static MVT getIntegerVT(unsigned BitWidth)
bool is64BitVector() const
Return true if this is a 64-bit vector type.
MachineInstrBundleIterator< MachineInstr > iterator
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setFrameAddressIsTaken(bool T)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Flags
Flags values. These may be or'd together.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
Value * getLength() const
unsigned getDestAddressSpace() const
bool isVolatile() const
constexpr unsigned id() const
Definition Register.h:100
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasStreamingCompatibleInterface() const
bool hasAgnosticZAInterface() const
bool hasStreamingInterfaceOrBody() const
void reserve(size_type N)
void push_back(const T &Elt)
TypeSize getElementOffset(unsigned Idx) const
Definition DataLayout.h:767
Provides information about what library functions are available for the current target.
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:290
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition Type.h:281
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:284
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:278
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:328
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:440
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo, const LibcallLoweringInfo *libcallLowering)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:880
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:926
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
LLVM_ABI Libcall getREM(EVT VT)
@ User
could "use" a pointer
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition Utils.cpp:56
LLVM_ABI void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
bool CC_AArch64_Win64PCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
constexpr RegState getKillRegState(bool B)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
bool CC_AArch64_DarwinPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
gep_type_iterator gep_type_end(const User *GEP)
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
bool CC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
generic_gep_type_iterator<> gep_type_iterator
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool CC_AArch64_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
bool RetCC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
gep_type_iterator gep_type_begin(const User *GEP)
bool CC_AArch64_Win64_CFGuard_Check(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:292
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:308
ElementCount getVectorElementCount() const
Definition ValueTypes.h:358
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
static StringRef getLibcallImplName(RTLIB::LibcallImpl CallImpl)
Get the libcall routine name for the specified libcall implementation.