57#include "llvm/IR/IntrinsicsAArch64.h"
81class AArch64FastISel final :
public FastISel {
84 using BaseKind =
enum {
90 BaseKind
Kind = RegBase;
96 unsigned OffsetReg = 0;
104 void setKind(BaseKind K) {
Kind =
K; }
105 BaseKind getKind()
const {
return Kind; }
108 bool isRegBase()
const {
return Kind == RegBase; }
109 bool isFIBase()
const {
return Kind == FrameIndexBase; }
111 void setReg(
unsigned Reg) {
112 assert(isRegBase() &&
"Invalid base register access!");
117 assert(isRegBase() &&
"Invalid base register access!");
121 void setOffsetReg(
unsigned Reg) {
125 unsigned getOffsetReg()
const {
129 void setFI(
unsigned FI) {
130 assert(isFIBase() &&
"Invalid base frame index access!");
134 unsigned getFI()
const {
135 assert(isFIBase() &&
"Invalid base frame index access!");
139 void setOffset(int64_t O) {
Offset =
O; }
141 void setShift(
unsigned S) { Shift = S; }
142 unsigned getShift() {
return Shift; }
171 bool selectRem(
const Instruction *
I,
unsigned ISDOpcode);
184 bool isTypeLegal(
Type *Ty,
MVT &VT);
185 bool isTypeSupported(
Type *Ty,
MVT &VT,
bool IsVectorAllowed =
false);
186 bool isValueAvailable(
const Value *V)
const;
187 bool computeAddress(
const Value *Obj, Address &
Addr,
Type *Ty =
nullptr);
188 bool computeCallAddress(
const Value *V, Address &
Addr);
189 bool simplifyAddress(Address &
Addr,
MVT VT);
194 bool tryEmitSmallMemCpy(Address Dest, Address Src,
uint64_t Len,
203 unsigned emitAddSub(
bool UseAdd,
MVT RetVT,
const Value *LHS,
204 const Value *RHS,
bool SetFlags =
false,
205 bool WantResult =
true,
bool IsZExt =
false);
206 unsigned emitAddSub_rr(
bool UseAdd,
MVT RetVT,
unsigned LHSReg,
207 unsigned RHSReg,
bool SetFlags =
false,
208 bool WantResult =
true);
209 unsigned emitAddSub_ri(
bool UseAdd,
MVT RetVT,
unsigned LHSReg,
210 uint64_t Imm,
bool SetFlags =
false,
211 bool WantResult =
true);
212 unsigned emitAddSub_rs(
bool UseAdd,
MVT RetVT,
unsigned LHSReg,
214 uint64_t ShiftImm,
bool SetFlags =
false,
215 bool WantResult =
true);
216 unsigned emitAddSub_rx(
bool UseAdd,
MVT RetVT,
unsigned LHSReg,
218 uint64_t ShiftImm,
bool SetFlags =
false,
219 bool WantResult =
true);
222 bool emitCompareAndBranch(
const BranchInst *BI);
224 bool emitICmp(
MVT RetVT,
const Value *LHS,
const Value *RHS,
bool IsZExt);
225 bool emitICmp_ri(
MVT RetVT,
unsigned LHSReg,
uint64_t Imm);
226 bool emitFCmp(
MVT RetVT,
const Value *LHS,
const Value *RHS);
231 bool emitStoreRelease(
MVT VT,
unsigned SrcReg,
unsigned AddrReg,
233 unsigned emitIntExt(
MVT SrcVT,
unsigned SrcReg,
MVT DestVT,
bool isZExt);
234 unsigned emiti1Ext(
unsigned SrcReg,
MVT DestVT,
bool isZExt);
235 unsigned emitAdd(
MVT RetVT,
const Value *LHS,
const Value *RHS,
236 bool SetFlags =
false,
bool WantResult =
true,
237 bool IsZExt =
false);
238 unsigned emitAdd_ri_(
MVT VT,
unsigned Op0, int64_t Imm);
239 unsigned emitSub(
MVT RetVT,
const Value *LHS,
const Value *RHS,
240 bool SetFlags =
false,
bool WantResult =
true,
241 bool IsZExt =
false);
242 unsigned emitSubs_rr(
MVT RetVT,
unsigned LHSReg,
unsigned RHSReg,
243 bool WantResult =
true);
244 unsigned emitSubs_rs(
MVT RetVT,
unsigned LHSReg,
unsigned RHSReg,
246 bool WantResult =
true);
247 unsigned emitLogicalOp(
unsigned ISDOpc,
MVT RetVT,
const Value *LHS,
249 unsigned emitLogicalOp_ri(
unsigned ISDOpc,
MVT RetVT,
unsigned LHSReg,
251 unsigned emitLogicalOp_rs(
unsigned ISDOpc,
MVT RetVT,
unsigned LHSReg,
252 unsigned RHSReg,
uint64_t ShiftImm);
253 unsigned emitAnd_ri(
MVT RetVT,
unsigned LHSReg,
uint64_t Imm);
254 unsigned emitMul_rr(
MVT RetVT,
unsigned Op0,
unsigned Op1);
255 unsigned emitSMULL_rr(
MVT RetVT,
unsigned Op0,
unsigned Op1);
256 unsigned emitUMULL_rr(
MVT RetVT,
unsigned Op0,
unsigned Op1);
257 unsigned emitLSL_rr(
MVT RetVT,
unsigned Op0Reg,
unsigned Op1Reg);
258 unsigned emitLSL_ri(
MVT RetVT,
MVT SrcVT,
unsigned Op0Reg,
uint64_t Imm,
260 unsigned emitLSR_rr(
MVT RetVT,
unsigned Op0Reg,
unsigned Op1Reg);
261 unsigned emitLSR_ri(
MVT RetVT,
MVT SrcVT,
unsigned Op0Reg,
uint64_t Imm,
263 unsigned emitASR_rr(
MVT RetVT,
unsigned Op0Reg,
unsigned Op1Reg);
264 unsigned emitASR_ri(
MVT RetVT,
MVT SrcVT,
unsigned Op0Reg,
uint64_t Imm,
265 bool IsZExt =
false);
276 bool finishCall(CallLoweringInfo &CLI,
unsigned NumBytes);
293#include "AArch64GenFastISel.inc"
300 assert((isa<ZExtInst>(
I) || isa<SExtInst>(
I)) &&
301 "Unexpected integer extend instruction.");
302 assert(!
I->getType()->isVectorTy() &&
I->getType()->isIntegerTy() &&
303 "Unexpected value type.");
304 bool IsZExt = isa<ZExtInst>(
I);
306 if (
const auto *LI = dyn_cast<LoadInst>(
I->getOperand(0)))
310 if (
const auto *
Arg = dyn_cast<Argument>(
I->getOperand(0)))
311 if ((IsZExt &&
Arg->hasZExtAttr()) || (!IsZExt &&
Arg->hasSExtAttr()))
347unsigned AArch64FastISel::fastMaterializeAlloca(
const AllocaInst *AI) {
349 "Alloca should always return a pointer.");
352 if (!
FuncInfo.StaticAllocaMap.count(AI))
358 if (SI !=
FuncInfo.StaticAllocaMap.end()) {
359 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
371unsigned AArch64FastISel::materializeInt(
const ConstantInt *CI,
MVT VT) {
380 : &AArch64::GPR32RegClass;
381 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
382 Register ResultReg = createResultReg(RC);
388unsigned AArch64FastISel::materializeFP(
const ConstantFP *CFP,
MVT VT) {
392 return fastMaterializeFloatZero(CFP);
394 if (VT != MVT::f32 && VT != MVT::f64)
398 bool Is64Bit = (VT == MVT::f64);
404 unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
405 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
410 unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
412 &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
414 Register TmpReg = createResultReg(RC);
418 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
420 TII.get(TargetOpcode::COPY), ResultReg)
430 unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
431 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
435 unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
436 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
443unsigned AArch64FastISel::materializeGV(
const GlobalValue *GV) {
450 if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
453 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV,
TM);
455 EVT DestEVT = TLI.getValueType(
DL, GV->
getType(),
true);
459 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
469 if (Subtarget->isTargetILP32()) {
470 ResultReg = createResultReg(&AArch64::GPR32RegClass);
471 LdrOpc = AArch64::LDRWui;
473 ResultReg = createResultReg(&AArch64::GPR64RegClass);
474 LdrOpc = AArch64::LDRXui;
481 if (!Subtarget->isTargetILP32())
486 Register Result64 = createResultReg(&AArch64::GPR64RegClass);
488 TII.get(TargetOpcode::SUBREG_TO_REG))
514 unsigned DstReg = createResultReg(&AArch64::GPR64commonRegClass);
524 ResultReg = createResultReg(&AArch64::GPR64spRegClass);
535unsigned AArch64FastISel::fastMaterializeConstant(
const Constant *
C) {
536 EVT CEVT = TLI.getValueType(
DL,
C->getType(),
true);
544 if (isa<ConstantPointerNull>(
C)) {
545 assert(VT == MVT::i64 &&
"Expected 64-bit pointers");
549 if (
const auto *CI = dyn_cast<ConstantInt>(
C))
550 return materializeInt(CI, VT);
551 else if (
const ConstantFP *CFP = dyn_cast<ConstantFP>(
C))
552 return materializeFP(CFP, VT);
553 else if (
const GlobalValue *GV = dyn_cast<GlobalValue>(
C))
554 return materializeGV(GV);
559unsigned AArch64FastISel::fastMaterializeFloatZero(
const ConstantFP* CFP) {
561 "Floating-point constant is not a positive zero.");
563 if (!isTypeLegal(CFP->
getType(), VT))
566 if (VT != MVT::f32 && VT != MVT::f64)
569 bool Is64Bit = (VT == MVT::f64);
570 unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
571 unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
572 return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg);
577 if (
const auto *
MI = dyn_cast<MulOperator>(
I)) {
578 if (
const auto *
C = dyn_cast<ConstantInt>(
MI->getOperand(0)))
579 if (
C->getValue().isPowerOf2())
581 if (
const auto *
C = dyn_cast<ConstantInt>(
MI->getOperand(1)))
582 if (
C->getValue().isPowerOf2())
589bool AArch64FastISel::computeAddress(
const Value *Obj, Address &
Addr,
Type *Ty)
591 const User *
U =
nullptr;
592 unsigned Opcode = Instruction::UserOp1;
593 if (
const Instruction *
I = dyn_cast<Instruction>(Obj)) {
598 Opcode =
I->getOpcode();
601 }
else if (
const ConstantExpr *
C = dyn_cast<ConstantExpr>(Obj)) {
602 Opcode =
C->getOpcode();
606 if (
auto *Ty = dyn_cast<PointerType>(Obj->
getType()))
607 if (Ty->getAddressSpace() > 255)
615 case Instruction::BitCast:
617 return computeAddress(
U->getOperand(0),
Addr, Ty);
619 case Instruction::IntToPtr:
621 if (TLI.getValueType(
DL,
U->getOperand(0)->getType()) ==
622 TLI.getPointerTy(
DL))
623 return computeAddress(
U->getOperand(0),
Addr, Ty);
626 case Instruction::PtrToInt:
628 if (TLI.getValueType(
DL,
U->getType()) == TLI.getPointerTy(
DL))
629 return computeAddress(
U->getOperand(0),
Addr, Ty);
632 case Instruction::GetElementPtr: {
640 const Value *
Op = GTI.getOperand();
641 if (
StructType *STy = GTI.getStructTypeOrNull()) {
643 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
646 uint64_t S =
DL.getTypeAllocSize(GTI.getIndexedType());
648 if (
const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
653 if (canFoldAddIntoGEP(U, Op)) {
656 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
659 Op = cast<AddOperator>(Op)->getOperand(0);
663 goto unsupported_gep;
669 Addr.setOffset(TmpOffset);
670 if (computeAddress(
U->getOperand(0),
Addr, Ty))
679 case Instruction::Alloca: {
683 if (SI !=
FuncInfo.StaticAllocaMap.end()) {
684 Addr.setKind(Address::FrameIndexBase);
690 case Instruction::Add: {
695 if (isa<ConstantInt>(LHS))
698 if (
const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
700 return computeAddress(LHS,
Addr, Ty);
704 if (computeAddress(LHS,
Addr, Ty) && computeAddress(RHS,
Addr, Ty))
710 case Instruction::Sub: {
715 if (
const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
717 return computeAddress(LHS,
Addr, Ty);
721 case Instruction::Shl: {
722 if (
Addr.getOffsetReg())
725 const auto *CI = dyn_cast<ConstantInt>(
U->getOperand(1));
730 if (Val < 1 || Val > 3)
736 NumBytes = NumBits / 8;
741 if (NumBytes != (1ULL << Val))
747 const Value *Src =
U->getOperand(0);
748 if (
const auto *
I = dyn_cast<Instruction>(Src)) {
751 if (
const auto *ZE = dyn_cast<ZExtInst>(
I)) {
753 ZE->getOperand(0)->getType()->isIntegerTy(32)) {
755 Src = ZE->getOperand(0);
757 }
else if (
const auto *SE = dyn_cast<SExtInst>(
I)) {
759 SE->getOperand(0)->getType()->isIntegerTy(32)) {
761 Src = SE->getOperand(0);
767 if (
const auto *AI = dyn_cast<BinaryOperator>(Src))
768 if (AI->
getOpcode() == Instruction::And) {
772 if (
const auto *
C = dyn_cast<ConstantInt>(LHS))
773 if (
C->getValue() == 0xffffffff)
776 if (
const auto *
C = dyn_cast<ConstantInt>(RHS))
777 if (
C->getValue() == 0xffffffff) {
782 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
783 Addr.setOffsetReg(Reg);
791 Addr.setOffsetReg(Reg);
794 case Instruction::Mul: {
795 if (
Addr.getOffsetReg())
805 if (
const auto *
C = dyn_cast<ConstantInt>(LHS))
806 if (
C->getValue().isPowerOf2())
809 assert(isa<ConstantInt>(RHS) &&
"Expected an ConstantInt.");
810 const auto *
C = cast<ConstantInt>(RHS);
811 unsigned Val =
C->getValue().logBase2();
812 if (Val < 1 || Val > 3)
818 NumBytes = NumBits / 8;
823 if (NumBytes != (1ULL << Val))
830 if (
const auto *
I = dyn_cast<Instruction>(Src)) {
833 if (
const auto *ZE = dyn_cast<ZExtInst>(
I)) {
835 ZE->getOperand(0)->getType()->isIntegerTy(32)) {
837 Src = ZE->getOperand(0);
839 }
else if (
const auto *SE = dyn_cast<SExtInst>(
I)) {
841 SE->getOperand(0)->getType()->isIntegerTy(32)) {
843 Src = SE->getOperand(0);
852 Addr.setOffsetReg(Reg);
855 case Instruction::And: {
856 if (
Addr.getOffsetReg())
859 if (!Ty ||
DL.getTypeSizeInBits(Ty) != 8)
865 if (
const auto *
C = dyn_cast<ConstantInt>(LHS))
866 if (
C->getValue() == 0xffffffff)
869 if (
const auto *
C = dyn_cast<ConstantInt>(RHS))
870 if (
C->getValue() == 0xffffffff) {
878 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
879 Addr.setOffsetReg(Reg);
884 case Instruction::SExt:
885 case Instruction::ZExt: {
886 if (!
Addr.getReg() ||
Addr.getOffsetReg())
889 const Value *Src =
nullptr;
891 if (
const auto *ZE = dyn_cast<ZExtInst>(U)) {
892 if (!
isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
894 Src = ZE->getOperand(0);
896 }
else if (
const auto *SE = dyn_cast<SExtInst>(U)) {
897 if (!
isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
899 Src = SE->getOperand(0);
910 Addr.setOffsetReg(Reg);
915 if (
Addr.isRegBase() && !
Addr.getReg()) {
923 if (!
Addr.getOffsetReg()) {
927 Addr.setOffsetReg(Reg);
934bool AArch64FastISel::computeCallAddress(
const Value *V, Address &
Addr) {
935 const User *
U =
nullptr;
936 unsigned Opcode = Instruction::UserOp1;
939 if (
const auto *
I = dyn_cast<Instruction>(V)) {
940 Opcode =
I->getOpcode();
942 InMBB =
I->getParent() ==
FuncInfo.MBB->getBasicBlock();
943 }
else if (
const auto *
C = dyn_cast<ConstantExpr>(V)) {
944 Opcode =
C->getOpcode();
950 case Instruction::BitCast:
953 return computeCallAddress(
U->getOperand(0),
Addr);
955 case Instruction::IntToPtr:
958 TLI.getValueType(
DL,
U->getOperand(0)->getType()) ==
959 TLI.getPointerTy(
DL))
960 return computeCallAddress(
U->getOperand(0),
Addr);
962 case Instruction::PtrToInt:
964 if (InMBB && TLI.getValueType(
DL,
U->getType()) == TLI.getPointerTy(
DL))
965 return computeCallAddress(
U->getOperand(0),
Addr);
969 if (
const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
970 Addr.setGlobalValue(GV);
975 if (!
Addr.getGlobalValue()) {
976 Addr.setReg(getRegForValue(V));
977 return Addr.getReg() != 0;
983bool AArch64FastISel::isTypeLegal(
Type *Ty,
MVT &VT) {
984 EVT evt = TLI.getValueType(
DL, Ty,
true);
986 if (Subtarget->isTargetILP32() && Ty->
isPointerTy())
990 if (evt == MVT::Other || !evt.
isSimple())
1000 return TLI.isTypeLegal(VT);
1007bool AArch64FastISel::isTypeSupported(
Type *Ty,
MVT &VT,
bool IsVectorAllowed) {
1011 if (isTypeLegal(Ty, VT))
1016 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
1022bool AArch64FastISel::isValueAvailable(
const Value *V)
const {
1023 if (!isa<Instruction>(V))
1026 const auto *
I = cast<Instruction>(V);
1030bool AArch64FastISel::simplifyAddress(Address &
Addr,
MVT VT) {
1031 if (Subtarget->isTargetILP32())
1038 bool ImmediateOffsetNeedsLowering =
false;
1039 bool RegisterOffsetNeedsLowering =
false;
1042 ImmediateOffsetNeedsLowering =
true;
1043 else if (
Offset > 0 && !(
Offset & (ScaleFactor - 1)) &&
1044 !isUInt<12>(
Offset / ScaleFactor))
1045 ImmediateOffsetNeedsLowering =
true;
1050 if (!ImmediateOffsetNeedsLowering &&
Addr.getOffset() &&
Addr.getOffsetReg())
1051 RegisterOffsetNeedsLowering =
true;
1054 if (
Addr.isRegBase() &&
Addr.getOffsetReg() && !
Addr.getReg())
1055 RegisterOffsetNeedsLowering =
true;
1060 if ((ImmediateOffsetNeedsLowering ||
Addr.getOffsetReg()) &&
Addr.isFIBase())
1062 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1068 Addr.setKind(Address::RegBase);
1069 Addr.setReg(ResultReg);
1072 if (RegisterOffsetNeedsLowering) {
1073 unsigned ResultReg = 0;
1074 if (
Addr.getReg()) {
1077 ResultReg = emitAddSub_rx(
true, MVT::i64,
Addr.getReg(),
1078 Addr.getOffsetReg(),
Addr.getExtendType(),
1081 ResultReg = emitAddSub_rs(
true, MVT::i64,
Addr.getReg(),
1086 ResultReg = emitLSL_ri(MVT::i64, MVT::i32,
Addr.getOffsetReg(),
1087 Addr.getShift(),
true);
1089 ResultReg = emitLSL_ri(MVT::i64, MVT::i32,
Addr.getOffsetReg(),
1090 Addr.getShift(),
false);
1092 ResultReg = emitLSL_ri(MVT::i64, MVT::i64,
Addr.getOffsetReg(),
1098 Addr.setReg(ResultReg);
1099 Addr.setOffsetReg(0);
1106 if (ImmediateOffsetNeedsLowering) {
1110 ResultReg = emitAdd_ri_(MVT::i64,
Addr.getReg(),
Offset);
1116 Addr.setReg(ResultReg);
1122void AArch64FastISel::addLoadStoreOperands(Address &
Addr,
1125 unsigned ScaleFactor,
1127 int64_t
Offset =
Addr.getOffset() / ScaleFactor;
1129 if (
Addr.isFIBase()) {
1130 int FI =
Addr.getFI();
1133 MMO =
FuncInfo.MF->getMachineMemOperand(
1135 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
1139 assert(
Addr.isRegBase() &&
"Unexpected address kind.");
1146 if (
Addr.getOffsetReg()) {
1147 assert(
Addr.getOffset() == 0 &&
"Unexpected offset");
1162unsigned AArch64FastISel::emitAddSub(
bool UseAdd,
MVT RetVT,
const Value *LHS,
1163 const Value *RHS,
bool SetFlags,
1164 bool WantResult,
bool IsZExt) {
1166 bool NeedExtend =
false;
1189 if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1193 if (UseAdd &&
LHS->
hasOneUse() && isValueAvailable(LHS))
1198 if (UseAdd &&
LHS->
hasOneUse() && isValueAvailable(LHS))
1199 if (
const auto *SI = dyn_cast<BinaryOperator>(LHS))
1200 if (isa<ConstantInt>(
SI->getOperand(1)))
1201 if (
SI->getOpcode() == Instruction::Shl ||
1202 SI->getOpcode() == Instruction::LShr ||
1203 SI->getOpcode() == Instruction::AShr )
1206 Register LHSReg = getRegForValue(LHS);
1211 LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1213 unsigned ResultReg = 0;
1214 if (
const auto *
C = dyn_cast<ConstantInt>(RHS)) {
1215 uint64_t Imm = IsZExt ?
C->getZExtValue() :
C->getSExtValue();
1216 if (
C->isNegative())
1217 ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags,
1220 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags,
1222 }
else if (
const auto *
C = dyn_cast<Constant>(RHS))
1223 if (
C->isNullValue())
1224 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult);
1231 isValueAvailable(RHS)) {
1232 if (
const auto *SI = dyn_cast<BinaryOperator>(RHS))
1233 if (
const auto *
C = dyn_cast<ConstantInt>(
SI->getOperand(1)))
1234 if ((
SI->getOpcode() == Instruction::Shl) && (
C->getZExtValue() < 4)) {
1235 Register RHSReg = getRegForValue(
SI->getOperand(0));
1238 return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType,
1239 C->getZExtValue(), SetFlags, WantResult);
1241 Register RHSReg = getRegForValue(RHS);
1244 return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0,
1245 SetFlags, WantResult);
1251 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1252 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1254 if (
const auto *
C = dyn_cast<ConstantInt>(MulLHS))
1255 if (
C->getValue().isPowerOf2())
1258 assert(isa<ConstantInt>(MulRHS) &&
"Expected a ConstantInt.");
1259 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1260 Register RHSReg = getRegForValue(MulLHS);
1263 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg,
AArch64_AM::LSL,
1264 ShiftVal, SetFlags, WantResult);
1272 if (
const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1273 if (
const auto *
C = dyn_cast<ConstantInt>(
SI->getOperand(1))) {
1275 switch (
SI->getOpcode()) {
1283 Register RHSReg = getRegForValue(
SI->getOperand(0));
1286 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType,
1287 ShiftVal, SetFlags, WantResult);
1295 Register RHSReg = getRegForValue(RHS);
1300 RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1302 return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult);
1305unsigned AArch64FastISel::emitAddSub_rr(
bool UseAdd,
MVT RetVT,
unsigned LHSReg,
1306 unsigned RHSReg,
bool SetFlags,
1308 assert(LHSReg && RHSReg &&
"Invalid register number.");
1310 if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1311 RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1314 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1317 static const unsigned OpcTable[2][2][2] = {
1318 { { AArch64::SUBWrr, AArch64::SUBXrr },
1319 { AArch64::ADDWrr, AArch64::ADDXrr } },
1320 { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1321 { AArch64::ADDSWrr, AArch64::ADDSXrr } }
1323 bool Is64Bit = RetVT == MVT::i64;
1324 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1326 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1329 ResultReg = createResultReg(RC);
1331 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1342unsigned AArch64FastISel::emitAddSub_ri(
bool UseAdd,
MVT RetVT,
unsigned LHSReg,
1345 assert(LHSReg &&
"Invalid register number.");
1347 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1351 if (isUInt<12>(Imm))
1353 else if ((Imm & 0xfff000) == Imm) {
1359 static const unsigned OpcTable[2][2][2] = {
1360 { { AArch64::SUBWri, AArch64::SUBXri },
1361 { AArch64::ADDWri, AArch64::ADDXri } },
1362 { { AArch64::SUBSWri, AArch64::SUBSXri },
1363 { AArch64::ADDSWri, AArch64::ADDSXri } }
1365 bool Is64Bit = RetVT == MVT::i64;
1366 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1369 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1371 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1374 ResultReg = createResultReg(RC);
1376 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1387unsigned AArch64FastISel::emitAddSub_rs(
bool UseAdd,
MVT RetVT,
unsigned LHSReg,
1392 assert(LHSReg && RHSReg &&
"Invalid register number.");
1393 assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1394 RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1396 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1403 static const unsigned OpcTable[2][2][2] = {
1404 { { AArch64::SUBWrs, AArch64::SUBXrs },
1405 { AArch64::ADDWrs, AArch64::ADDXrs } },
1406 { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1407 { AArch64::ADDSWrs, AArch64::ADDSXrs } }
1409 bool Is64Bit = RetVT == MVT::i64;
1410 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1412 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1415 ResultReg = createResultReg(RC);
1417 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1425 .
addImm(getShifterImm(ShiftType, ShiftImm));
1429unsigned AArch64FastISel::emitAddSub_rx(
bool UseAdd,
MVT RetVT,
unsigned LHSReg,
1434 assert(LHSReg && RHSReg &&
"Invalid register number.");
1435 assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1436 RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1438 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1444 static const unsigned OpcTable[2][2][2] = {
1445 { { AArch64::SUBWrx, AArch64::SUBXrx },
1446 { AArch64::ADDWrx, AArch64::ADDXrx } },
1447 { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1448 { AArch64::ADDSWrx, AArch64::ADDSXrx } }
1450 bool Is64Bit = RetVT == MVT::i64;
1451 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1454 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1456 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1459 ResultReg = createResultReg(RC);
1461 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1469 .
addImm(getArithExtendImm(ExtType, ShiftImm));
1473bool AArch64FastISel::emitCmp(
const Value *LHS,
const Value *RHS,
bool IsZExt) {
1475 EVT EVT = TLI.getValueType(
DL, Ty,
true);
1488 return emitICmp(VT, LHS, RHS, IsZExt);
1491 return emitFCmp(VT, LHS, RHS);
1495bool AArch64FastISel::emitICmp(
MVT RetVT,
const Value *LHS,
const Value *RHS,
1497 return emitSub(RetVT, LHS, RHS,
true,
false,
1501bool AArch64FastISel::emitICmp_ri(
MVT RetVT,
unsigned LHSReg,
uint64_t Imm) {
1502 return emitAddSub_ri(
false, RetVT, LHSReg, Imm,
1506bool AArch64FastISel::emitFCmp(
MVT RetVT,
const Value *LHS,
const Value *RHS) {
1507 if (RetVT != MVT::f32 && RetVT != MVT::f64)
1512 bool UseImm =
false;
1513 if (
const auto *CFP = dyn_cast<ConstantFP>(RHS))
1517 Register LHSReg = getRegForValue(LHS);
1522 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1528 Register RHSReg = getRegForValue(RHS);
1532 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1539unsigned AArch64FastISel::emitAdd(
MVT RetVT,
const Value *LHS,
const Value *RHS,
1540 bool SetFlags,
bool WantResult,
bool IsZExt) {
1541 return emitAddSub(
true, RetVT, LHS, RHS, SetFlags, WantResult,
1550unsigned AArch64FastISel::emitAdd_ri_(
MVT VT,
unsigned Op0, int64_t Imm) {
1553 ResultReg = emitAddSub_ri(
false, VT, Op0, -Imm);
1555 ResultReg = emitAddSub_ri(
true, VT, Op0, Imm);
1564 ResultReg = emitAddSub_rr(
true, VT, Op0, CReg);
1568unsigned AArch64FastISel::emitSub(
MVT RetVT,
const Value *LHS,
const Value *RHS,
1569 bool SetFlags,
bool WantResult,
bool IsZExt) {
1570 return emitAddSub(
false, RetVT, LHS, RHS, SetFlags, WantResult,
1574unsigned AArch64FastISel::emitSubs_rr(
MVT RetVT,
unsigned LHSReg,
1575 unsigned RHSReg,
bool WantResult) {
1576 return emitAddSub_rr(
false, RetVT, LHSReg, RHSReg,
1580unsigned AArch64FastISel::emitSubs_rs(
MVT RetVT,
unsigned LHSReg,
1583 uint64_t ShiftImm,
bool WantResult) {
1584 return emitAddSub_rs(
false, RetVT, LHSReg, RHSReg, ShiftType,
1585 ShiftImm,
true, WantResult);
1588unsigned AArch64FastISel::emitLogicalOp(
unsigned ISDOpc,
MVT RetVT,
1591 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1601 if (
const auto *SI = dyn_cast<ShlOperator>(LHS))
1602 if (isa<ConstantInt>(
SI->getOperand(1)))
1605 Register LHSReg = getRegForValue(LHS);
1609 unsigned ResultReg = 0;
1610 if (
const auto *
C = dyn_cast<ConstantInt>(RHS)) {
1612 ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm);
1620 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1621 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1623 if (
const auto *
C = dyn_cast<ConstantInt>(MulLHS))
1624 if (
C->getValue().isPowerOf2())
1627 assert(isa<ConstantInt>(MulRHS) &&
"Expected a ConstantInt.");
1628 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1630 Register RHSReg = getRegForValue(MulLHS);
1633 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1641 if (
const auto *SI = dyn_cast<ShlOperator>(RHS))
1642 if (
const auto *
C = dyn_cast<ConstantInt>(
SI->getOperand(1))) {
1644 Register RHSReg = getRegForValue(
SI->getOperand(0));
1647 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1653 Register RHSReg = getRegForValue(RHS);
1658 ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg);
1659 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1661 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1666unsigned AArch64FastISel::emitLogicalOp_ri(
unsigned ISDOpc,
MVT RetVT,
1669 "ISD nodes are not consecutive!");
1670 static const unsigned OpcTable[3][2] = {
1671 { AArch64::ANDWri, AArch64::ANDXri },
1672 { AArch64::ORRWri, AArch64::ORRXri },
1673 { AArch64::EORWri, AArch64::EORXri }
1686 Opc = OpcTable[
Idx][0];
1687 RC = &AArch64::GPR32spRegClass;
1692 Opc = OpcTable[ISDOpc -
ISD::AND][1];
1693 RC = &AArch64::GPR64spRegClass;
1702 fastEmitInst_ri(Opc, RC, LHSReg,
1704 if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc !=
ISD::AND) {
1706 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1711unsigned AArch64FastISel::emitLogicalOp_rs(
unsigned ISDOpc,
MVT RetVT,
1712 unsigned LHSReg,
unsigned RHSReg,
1715 "ISD nodes are not consecutive!");
1716 static const unsigned OpcTable[3][2] = {
1717 { AArch64::ANDWrs, AArch64::ANDXrs },
1718 { AArch64::ORRWrs, AArch64::ORRXrs },
1719 { AArch64::EORWrs, AArch64::EORXrs }
1735 Opc = OpcTable[ISDOpc -
ISD::AND][0];
1736 RC = &AArch64::GPR32RegClass;
1739 Opc = OpcTable[ISDOpc -
ISD::AND][1];
1740 RC = &AArch64::GPR64RegClass;
1744 fastEmitInst_rri(Opc, RC, LHSReg, RHSReg,
1746 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1748 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1753unsigned AArch64FastISel::emitAnd_ri(
MVT RetVT,
unsigned LHSReg,
1755 return emitLogicalOp_ri(
ISD::AND, RetVT, LHSReg, Imm);
1758unsigned AArch64FastISel::emitLoad(
MVT VT,
MVT RetVT, Address
Addr,
1760 if (!TLI.allowsMisalignedMemoryAccesses(VT))
1764 if (!simplifyAddress(
Addr, VT))
1773 bool UseScaled =
true;
1774 if ((
Addr.getOffset() < 0) || (
Addr.getOffset() & (ScaleFactor - 1))) {
1779 static const unsigned GPOpcTable[2][8][4] = {
1781 { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1783 { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
1785 { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
1787 { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1789 { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1791 { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1793 { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1795 { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1799 { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1801 { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1803 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1805 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1807 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1809 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1811 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1813 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1818 static const unsigned FPOpcTable[4][2] = {
1819 { AArch64::LDURSi, AArch64::LDURDi },
1820 { AArch64::LDRSui, AArch64::LDRDui },
1821 { AArch64::LDRSroX, AArch64::LDRDroX },
1822 { AArch64::LDRSroW, AArch64::LDRDroW }
1827 bool UseRegOffset =
Addr.isRegBase() && !
Addr.getOffset() &&
Addr.getReg() &&
1828 Addr.getOffsetReg();
1829 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1834 bool IsRet64Bit = RetVT == MVT::i64;
1840 Opc = GPOpcTable[WantZExt][2 *
Idx + IsRet64Bit][0];
1841 RC = (IsRet64Bit && !WantZExt) ?
1842 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1845 Opc = GPOpcTable[WantZExt][2 *
Idx + IsRet64Bit][1];
1846 RC = (IsRet64Bit && !WantZExt) ?
1847 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1850 Opc = GPOpcTable[WantZExt][2 *
Idx + IsRet64Bit][2];
1851 RC = (IsRet64Bit && !WantZExt) ?
1852 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1855 Opc = GPOpcTable[WantZExt][2 *
Idx + IsRet64Bit][3];
1856 RC = &AArch64::GPR64RegClass;
1859 Opc = FPOpcTable[
Idx][0];
1860 RC = &AArch64::FPR32RegClass;
1863 Opc = FPOpcTable[
Idx][1];
1864 RC = &AArch64::FPR64RegClass;
1869 Register ResultReg = createResultReg(RC);
1871 TII.get(Opc), ResultReg);
1875 if (VT == MVT::i1) {
1876 unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1);
1877 assert(ANDReg &&
"Unexpected AND instruction emission failure.");
1883 if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1884 Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
1886 TII.get(AArch64::SUBREG_TO_REG), Reg64)
1889 .
addImm(AArch64::sub_32);
1895bool AArch64FastISel::selectAddSub(
const Instruction *
I) {
1897 if (!isTypeSupported(
I->getType(), VT,
true))
1901 return selectOperator(
I,
I->getOpcode());
1904 switch (
I->getOpcode()) {
1907 case Instruction::Add:
1908 ResultReg = emitAdd(VT,
I->getOperand(0),
I->getOperand(1));
1910 case Instruction::Sub:
1911 ResultReg = emitSub(VT,
I->getOperand(0),
I->getOperand(1));
1917 updateValueMap(
I, ResultReg);
1921bool AArch64FastISel::selectLogicalOp(
const Instruction *
I) {
1923 if (!isTypeSupported(
I->getType(), VT,
true))
1927 return selectOperator(
I,
I->getOpcode());
1930 switch (
I->getOpcode()) {
1933 case Instruction::And:
1934 ResultReg = emitLogicalOp(
ISD::AND, VT,
I->getOperand(0),
I->getOperand(1));
1936 case Instruction::Or:
1937 ResultReg = emitLogicalOp(
ISD::OR, VT,
I->getOperand(0),
I->getOperand(1));
1939 case Instruction::Xor:
1940 ResultReg = emitLogicalOp(
ISD::XOR, VT,
I->getOperand(0),
I->getOperand(1));
1946 updateValueMap(
I, ResultReg);
1950bool AArch64FastISel::selectLoad(
const Instruction *
I) {
1955 if (!isTypeSupported(
I->getType(), VT,
true) ||
1956 cast<LoadInst>(
I)->isAtomic())
1959 const Value *SV =
I->getOperand(0);
1960 if (TLI.supportSwiftError()) {
1963 if (
const Argument *
Arg = dyn_cast<Argument>(SV)) {
1964 if (
Arg->hasSwiftErrorAttr())
1968 if (
const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1969 if (Alloca->isSwiftError())
1976 if (!computeAddress(
I->getOperand(0),
Addr,
I->getType()))
1980 bool WantZExt =
true;
1982 const Value *IntExtVal =
nullptr;
1983 if (
I->hasOneUse()) {
1984 if (
const auto *ZE = dyn_cast<ZExtInst>(
I->use_begin()->getUser())) {
1985 if (isTypeSupported(ZE->getType(), RetVT))
1989 }
else if (
const auto *SE = dyn_cast<SExtInst>(
I->use_begin()->getUser())) {
1990 if (isTypeSupported(SE->getType(), RetVT))
1998 unsigned ResultReg =
1999 emitLoad(VT, RetVT,
Addr, WantZExt, createMachineMemOperandFor(
I));
2020 auto *
MI =
MRI.getUniqueVRegDef(Reg);
2022 if (RetVT == MVT::i64 && VT <= MVT::i32) {
2026 ResultReg = std::prev(
I)->getOperand(0).getReg();
2027 removeDeadCode(
I, std::next(
I));
2029 ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2032 updateValueMap(
I, ResultReg);
2041 for (
auto &Opnd :
MI->uses()) {
2043 Reg = Opnd.getReg();
2048 removeDeadCode(
I, std::next(
I));
2051 MI =
MRI.getUniqueVRegDef(Reg);
2053 updateValueMap(IntExtVal, ResultReg);
2057 updateValueMap(
I, ResultReg);
2061bool AArch64FastISel::emitStoreRelease(
MVT VT,
unsigned SrcReg,
2066 default:
return false;
2067 case MVT::i8: Opc = AArch64::STLRB;
break;
2068 case MVT::i16: Opc = AArch64::STLRH;
break;
2069 case MVT::i32: Opc = AArch64::STLRW;
break;
2070 case MVT::i64: Opc = AArch64::STLRX;
break;
2083bool AArch64FastISel::emitStore(
MVT VT,
unsigned SrcReg, Address
Addr,
2085 if (!TLI.allowsMisalignedMemoryAccesses(VT))
2089 if (!simplifyAddress(
Addr, VT))
2098 bool UseScaled =
true;
2099 if ((
Addr.getOffset() < 0) || (
Addr.getOffset() & (ScaleFactor - 1))) {
2104 static const unsigned OpcTable[4][6] = {
2105 { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
2106 AArch64::STURSi, AArch64::STURDi },
2107 { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
2108 AArch64::STRSui, AArch64::STRDui },
2109 { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2110 AArch64::STRSroX, AArch64::STRDroX },
2111 { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2112 AArch64::STRSroW, AArch64::STRDroW }
2116 bool VTIsi1 =
false;
2117 bool UseRegOffset =
Addr.isRegBase() && !
Addr.getOffset() &&
Addr.getReg() &&
2118 Addr.getOffsetReg();
2119 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2126 case MVT::i1: VTIsi1 =
true; [[fallthrough]];
2127 case MVT::i8: Opc = OpcTable[
Idx][0];
break;
2128 case MVT::i16: Opc = OpcTable[
Idx][1];
break;
2129 case MVT::i32: Opc = OpcTable[
Idx][2];
break;
2130 case MVT::i64: Opc = OpcTable[
Idx][3];
break;
2131 case MVT::f32: Opc = OpcTable[
Idx][4];
break;
2132 case MVT::f64: Opc = OpcTable[
Idx][5];
break;
2136 if (VTIsi1 && SrcReg != AArch64::WZR) {
2137 unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1);
2138 assert(ANDReg &&
"Unexpected AND instruction emission failure.");
2151bool AArch64FastISel::selectStore(
const Instruction *
I) {
2153 const Value *Op0 =
I->getOperand(0);
2157 if (!isTypeSupported(Op0->
getType(), VT,
true))
2160 const Value *PtrV =
I->getOperand(1);
2161 if (TLI.supportSwiftError()) {
2164 if (
const Argument *
Arg = dyn_cast<Argument>(PtrV)) {
2165 if (
Arg->hasSwiftErrorAttr())
2169 if (
const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2170 if (Alloca->isSwiftError())
2177 unsigned SrcReg = 0;
2178 if (
const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2180 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2181 }
else if (
const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2182 if (CF->isZero() && !CF->isNegative()) {
2184 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2189 SrcReg = getRegForValue(Op0);
2194 auto *
SI = cast<StoreInst>(
I);
2197 if (
SI->isAtomic()) {
2202 Register AddrReg = getRegForValue(PtrV);
2203 return emitStoreRelease(VT, SrcReg, AddrReg,
2204 createMachineMemOperandFor(
I));
2265bool AArch64FastISel::emitCompareAndBranch(
const BranchInst *BI) {
2269 if (
FuncInfo.MF->getFunction().hasFnAttribute(
2270 Attribute::SpeculativeLoadHardening))
2299 switch (Predicate) {
2304 if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2307 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2310 if (
const auto *AI = dyn_cast<BinaryOperator>(LHS))
2311 if (AI->
getOpcode() == Instruction::And && isValueAvailable(AI)) {
2315 if (
const auto *
C = dyn_cast<ConstantInt>(AndLHS))
2316 if (
C->getValue().isPowerOf2())
2319 if (
const auto *
C = dyn_cast<ConstantInt>(AndRHS))
2320 if (
C->getValue().isPowerOf2()) {
2321 TestBit =
C->getValue().logBase2();
2333 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2341 if (!isa<ConstantInt>(RHS))
2344 if (cast<ConstantInt>(RHS)->getValue() !=
APInt(BW, -1,
true))
2352 static const unsigned OpcTable[2][2][2] = {
2353 { {AArch64::CBZW, AArch64::CBZX },
2354 {AArch64::CBNZW, AArch64::CBNZX} },
2355 { {AArch64::TBZW, AArch64::TBZX },
2356 {AArch64::TBNZW, AArch64::TBNZX} }
2359 bool IsBitTest = TestBit != -1;
2360 bool Is64Bit = BW == 64;
2361 if (TestBit < 32 && TestBit >= 0)
2364 unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2367 Register SrcReg = getRegForValue(LHS);
2371 if (BW == 64 && !Is64Bit)
2372 SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32);
2374 if ((BW < 32) && !IsBitTest)
2375 SrcReg = emitIntExt(VT, SrcReg, MVT::i32,
true);
2390bool AArch64FastISel::selectBranch(
const Instruction *
I) {
2402 if (CI->
hasOneUse() && isValueAvailable(CI)) {
2405 switch (Predicate) {
2409 fastEmitBranch(FBB, MIMD.getDL());
2412 fastEmitBranch(
TBB, MIMD.getDL());
2417 if (emitCompareAndBranch(BI))
2434 switch (Predicate) {
2463 }
else if (
const auto *CI = dyn_cast<ConstantInt>(BI->
getCondition())) {
2501 unsigned Opcode = AArch64::TBNZW;
2504 Opcode = AArch64::TBZW;
2511 .
addReg(ConstrainedCondReg)
2519bool AArch64FastISel::selectIndirectBr(
const Instruction *
I) {
2538 const CmpInst *CI = cast<CmpInst>(
I);
2546 unsigned ResultReg = 0;
2547 switch (Predicate) {
2551 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2553 TII.get(TargetOpcode::COPY), ResultReg)
2557 ResultReg = fastEmit_i(MVT::i32, MVT::i32,
ISD::Constant, 1);
2562 updateValueMap(
I, ResultReg);
2570 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2574 static unsigned CondCodeTable[2][2] = {
2579 switch (Predicate) {
2591 Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2603 updateValueMap(
I, ResultReg);
2617 updateValueMap(
I, ResultReg);
2623bool AArch64FastISel::optimizeSelect(
const SelectInst *SI) {
2624 if (!
SI->getType()->isIntegerTy(1))
2627 const Value *Src1Val, *Src2Val;
2629 bool NeedExtraOp =
false;
2630 if (
auto *CI = dyn_cast<ConstantInt>(
SI->getTrueValue())) {
2632 Src1Val =
SI->getCondition();
2633 Src2Val =
SI->getFalseValue();
2634 Opc = AArch64::ORRWrr;
2637 Src1Val =
SI->getFalseValue();
2638 Src2Val =
SI->getCondition();
2639 Opc = AArch64::BICWrr;
2641 }
else if (
auto *CI = dyn_cast<ConstantInt>(
SI->getFalseValue())) {
2643 Src1Val =
SI->getCondition();
2644 Src2Val =
SI->getTrueValue();
2645 Opc = AArch64::ORRWrr;
2649 Src1Val =
SI->getCondition();
2650 Src2Val =
SI->getTrueValue();
2651 Opc = AArch64::ANDWrr;
2658 Register Src1Reg = getRegForValue(Src1Val);
2662 Register Src2Reg = getRegForValue(Src2Val);
2667 Src1Reg = emitLogicalOp_ri(
ISD::XOR, MVT::i32, Src1Reg, 1);
2669 Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2671 updateValueMap(SI, ResultReg);
2675bool AArch64FastISel::selectSelect(
const Instruction *
I) {
2676 assert(isa<SelectInst>(
I) &&
"Expected a select instruction.");
2678 if (!isTypeSupported(
I->getType(), VT))
2690 Opc = AArch64::CSELWr;
2691 RC = &AArch64::GPR32RegClass;
2694 Opc = AArch64::CSELXr;
2695 RC = &AArch64::GPR64RegClass;
2698 Opc = AArch64::FCSELSrrr;
2699 RC = &AArch64::FPR32RegClass;
2702 Opc = AArch64::FCSELDrrr;
2703 RC = &AArch64::FPR64RegClass;
2712 if (optimizeSelect(SI))
2716 if (foldXALUIntrinsic(
CC,
I,
Cond)) {
2721 }
else if (isa<CmpInst>(
Cond) && cast<CmpInst>(
Cond)->hasOneUse() &&
2722 isValueAvailable(
Cond)) {
2723 const auto *
Cmp = cast<CmpInst>(
Cond);
2726 const Value *FoldSelect =
nullptr;
2727 switch (Predicate) {
2731 FoldSelect =
SI->getFalseValue();
2734 FoldSelect =
SI->getTrueValue();
2739 Register SrcReg = getRegForValue(FoldSelect);
2743 updateValueMap(
I, SrcReg);
2753 switch (Predicate) {
2781 Register Src1Reg = getRegForValue(
SI->getTrueValue());
2782 Register Src2Reg = getRegForValue(
SI->getFalseValue());
2784 if (!Src1Reg || !Src2Reg)
2788 Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC);
2790 Register ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg,
CC);
2791 updateValueMap(
I, ResultReg);
2795bool AArch64FastISel::selectFPExt(
const Instruction *
I) {
2797 if (!
I->getType()->isDoubleTy() || !
V->getType()->isFloatTy())
2804 Register ResultReg = createResultReg(&AArch64::FPR64RegClass);
2807 updateValueMap(
I, ResultReg);
2811bool AArch64FastISel::selectFPTrunc(
const Instruction *
I) {
2813 if (!
I->getType()->isFloatTy() || !
V->getType()->isDoubleTy())
2820 Register ResultReg = createResultReg(&AArch64::FPR32RegClass);
2823 updateValueMap(
I, ResultReg);
2830 if (!isTypeLegal(
I->getType(), DestVT) || DestVT.
isVector())
2833 Register SrcReg = getRegForValue(
I->getOperand(0));
2837 EVT SrcVT = TLI.getValueType(
DL,
I->getOperand(0)->getType(),
true);
2838 if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
2842 if (SrcVT == MVT::f64) {
2844 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2846 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2849 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2851 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2853 Register ResultReg = createResultReg(
2854 DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2857 updateValueMap(
I, ResultReg);
2863 if (!isTypeLegal(
I->getType(), DestVT) || DestVT.
isVector())
2866 if (DestVT == MVT::f16)
2869 assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2870 "Unexpected value type.");
2872 Register SrcReg = getRegForValue(
I->getOperand(0));
2876 EVT SrcVT = TLI.getValueType(
DL,
I->getOperand(0)->getType(),
true);
2879 if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2887 if (SrcVT == MVT::i64) {
2889 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2891 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2894 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2896 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2899 Register ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg);
2900 updateValueMap(
I, ResultReg);
2904bool AArch64FastISel::fastLowerArguments() {
2916 if (Subtarget->hasCustomCallingConv())
2920 unsigned GPRCnt = 0;
2921 unsigned FPRCnt = 0;
2922 for (
auto const &
Arg :
F->args()) {
2923 if (
Arg.hasAttribute(Attribute::ByVal) ||
2924 Arg.hasAttribute(Attribute::InReg) ||
2925 Arg.hasAttribute(Attribute::StructRet) ||
2926 Arg.hasAttribute(Attribute::SwiftSelf) ||
2927 Arg.hasAttribute(Attribute::SwiftAsync) ||
2928 Arg.hasAttribute(Attribute::SwiftError) ||
2929 Arg.hasAttribute(Attribute::Nest))
2936 EVT ArgVT = TLI.getValueType(
DL, ArgTy);
2945 (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2948 if (VT >= MVT::i1 && VT <= MVT::i64)
2950 else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.
is64BitVector() ||
2956 if (GPRCnt > 8 || FPRCnt > 8)
2961 { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2962 AArch64::W5, AArch64::W6, AArch64::W7 },
2963 { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2964 AArch64::X5, AArch64::X6, AArch64::X7 },
2965 { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2966 AArch64::H5, AArch64::H6, AArch64::H7 },
2967 { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2968 AArch64::S5, AArch64::S6, AArch64::S7 },
2969 { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2970 AArch64::D5, AArch64::D6, AArch64::D7 },
2971 { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2972 AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2976 unsigned FPRIdx = 0;
2977 for (
auto const &
Arg :
F->args()) {
2978 MVT VT = TLI.getSimpleValueType(
DL,
Arg.getType());
2981 if (VT >= MVT::i1 && VT <= MVT::i32) {
2983 RC = &AArch64::GPR32RegClass;
2985 }
else if (VT == MVT::i64) {
2987 RC = &AArch64::GPR64RegClass;
2988 }
else if (VT == MVT::f16) {
2990 RC = &AArch64::FPR16RegClass;
2991 }
else if (VT == MVT::f32) {
2993 RC = &AArch64::FPR32RegClass;
2996 RC = &AArch64::FPR64RegClass;
2999 RC = &AArch64::FPR128RegClass;
3007 Register ResultReg = createResultReg(RC);
3009 TII.get(TargetOpcode::COPY), ResultReg)
3011 updateValueMap(&
Arg, ResultReg);
3016bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3018 unsigned &NumBytes) {
3022 CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(
CC));
3025 NumBytes = CCInfo.getStackSize();
3028 unsigned AdjStackDown =
TII.getCallFrameSetupOpcode();
3034 const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3035 MVT ArgVT = OutVTs[VA.getValNo()];
3037 Register ArgReg = getRegForValue(ArgVal);
3042 switch (VA.getLocInfo()) {
3046 MVT DestVT = VA.getLocVT();
3048 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT,
false);
3056 MVT DestVT = VA.getLocVT();
3058 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT,
true);
3068 if (VA.isRegLoc() && !VA.needsCustom()) {
3070 TII.get(TargetOpcode::COPY), VA.getLocReg()).
addReg(ArgReg);
3071 CLI.OutRegs.push_back(VA.getLocReg());
3072 }
else if (VA.needsCustom()) {
3076 assert(VA.isMemLoc() &&
"Assuming store on stack.");
3079 if (isa<UndefValue>(ArgVal))
3085 unsigned BEAlign = 0;
3086 if (ArgSize < 8 && !Subtarget->isLittleEndian())
3087 BEAlign = 8 - ArgSize;
3090 Addr.setKind(Address::RegBase);
3091 Addr.setReg(AArch64::SP);
3092 Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3106bool AArch64FastISel::finishCall(CallLoweringInfo &CLI,
unsigned NumBytes) {
3110 unsigned AdjStackUp =
TII.getCallFrameDestroyOpcode();
3117 CCInfo.AnalyzeCallResult(CLI.Ins, CCAssignFnForCall(
CC));
3120 for (
unsigned i = 0; i != RVLocs.
size(); ++i) {
3123 unsigned CopyReg = ResultReg + i;
3126 if (CopyVT.
isVector() && !Subtarget->isLittleEndian())
3136 CLI.ResultReg = ResultReg;
3137 CLI.NumResultRegs = RVLocs.
size();
3142bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3144 bool IsTailCall = CLI.IsTailCall;
3145 bool IsVarArg = CLI.IsVarArg;
3154 if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
3155 !Subtarget->noBTIAtReturnTwice() &&
3160 if (CLI.CB && CLI.CB->isIndirectCall() &&
3170 if (Subtarget->isTargetILP32())
3186 for (
auto Flag : CLI.OutFlags)
3188 Flag.isSwiftSelf() ||
Flag.isSwiftAsync() ||
Flag.isSwiftError())
3193 OutVTs.
reserve(CLI.OutVals.size());
3195 for (
auto *Val : CLI.OutVals) {
3197 if (!isTypeLegal(Val->getType(), VT) &&
3198 !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3215 if (Subtarget->isTargetWindows() &&
Addr.getGlobalValue() &&
3216 Addr.getGlobalValue()->hasExternalWeakLinkage())
3221 if (!processCallArgs(CLI, OutVTs, NumBytes))
3225 if (
RegInfo->isAnyArgRegReserved(*MF))
3226 RegInfo->emitReservedArgRegCallError(*MF);
3230 if (Subtarget->useSmallAddressing()) {
3236 else if (
Addr.getGlobalValue())
3238 else if (
Addr.getReg()) {
3244 unsigned CallReg = 0;
3246 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3251 CallReg = createResultReg(&AArch64::GPR64RegClass);
3253 TII.get(AArch64::LDRXui), CallReg)
3257 }
else if (
Addr.getGlobalValue())
3258 CallReg = materializeGV(
Addr.getGlobalValue());
3259 else if (
Addr.getReg())
3260 CallReg =
Addr.getReg();
3271 for (
auto Reg : CLI.OutRegs)
3281 return finishCall(CLI, NumBytes);
3286 return Len / Alignment->value() <= 4;
3291bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3294 if (!isMemCpySmall(Len, Alignment))
3297 int64_t UnscaledOffset = 0;
3303 if (!Alignment || *Alignment >= 8) {
3314 assert(Alignment &&
"Alignment is set in this branch");
3316 if (Len >= 4 && *Alignment == 4)
3318 else if (Len >= 2 && *Alignment == 2)
3325 unsigned ResultReg =
emitLoad(VT, VT, Src);
3334 UnscaledOffset +=
Size;
3337 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3338 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3349 if (!isa<ExtractValueInst>(
Cond))
3352 const auto *EV = cast<ExtractValueInst>(
Cond);
3353 if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3356 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3360 cast<StructType>(
Callee->getReturnType())->getTypeAtIndex(0U);
3361 if (!isTypeLegal(
RetTy, RetVT))
3364 if (RetVT != MVT::i32 && RetVT != MVT::i64)
3367 const Value *
LHS = II->getArgOperand(0);
3368 const Value *
RHS = II->getArgOperand(1);
3371 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3379 case Intrinsic::smul_with_overflow:
3380 if (
const auto *
C = dyn_cast<ConstantInt>(RHS))
3381 if (
C->getValue() == 2)
3382 IID = Intrinsic::sadd_with_overflow;
3384 case Intrinsic::umul_with_overflow:
3385 if (
const auto *
C = dyn_cast<ConstantInt>(RHS))
3386 if (
C->getValue() == 2)
3387 IID = Intrinsic::uadd_with_overflow;
3395 case Intrinsic::sadd_with_overflow:
3396 case Intrinsic::ssub_with_overflow:
3399 case Intrinsic::uadd_with_overflow:
3402 case Intrinsic::usub_with_overflow:
3405 case Intrinsic::smul_with_overflow:
3406 case Intrinsic::umul_with_overflow:
3412 if (!isValueAvailable(II))
3418 for (
auto Itr = std::prev(Start); Itr !=
End; --Itr) {
3421 if (!isa<ExtractValueInst>(Itr))
3425 const auto *EVI = cast<ExtractValueInst>(Itr);
3426 if (EVI->getAggregateOperand() != II)
3434bool AArch64FastISel::fastLowerIntrinsicCall(
const IntrinsicInst *II) {
3437 default:
return false;
3438 case Intrinsic::frameaddress: {
3444 Register SrcReg =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3453 unsigned Depth = cast<ConstantInt>(II->
getOperand(0))->getZExtValue();
3455 DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3457 assert(DestReg &&
"Unexpected LDR instruction emission failure.");
3461 updateValueMap(II, SrcReg);
3464 case Intrinsic::sponentry: {
3469 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3471 TII.get(AArch64::ADDXri), ResultReg)
3476 updateValueMap(II, ResultReg);
3479 case Intrinsic::memcpy:
3480 case Intrinsic::memmove: {
3481 const auto *MTI = cast<MemTransferInst>(II);
3483 if (MTI->isVolatile())
3489 if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3492 uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3494 if (MTI->getDestAlign() || MTI->getSourceAlign())
3495 Alignment = std::min(MTI->getDestAlign().valueOrOne(),
3496 MTI->getSourceAlign().valueOrOne());
3497 if (isMemCpySmall(Len, Alignment)) {
3499 if (!computeAddress(MTI->getRawDest(), Dest) ||
3500 !computeAddress(MTI->getRawSource(), Src))
3502 if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3507 if (!MTI->getLength()->getType()->isIntegerTy(64))
3510 if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3515 const char *IntrMemName = isa<MemCpyInst>(II) ?
"memcpy" :
"memmove";
3516 return lowerCallTo(II, IntrMemName, II->
arg_size() - 1);
3518 case Intrinsic::memset: {
3519 const MemSetInst *MSI = cast<MemSetInst>(II);
3532 return lowerCallTo(II,
"memset", II->
arg_size() - 1);
3534 case Intrinsic::sin:
3535 case Intrinsic::cos:
3536 case Intrinsic::pow: {
3538 if (!isTypeLegal(II->
getType(), RetVT))
3541 if (RetVT != MVT::f32 && RetVT != MVT::f64)
3545 { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3546 { RTLIB::COS_F32, RTLIB::COS_F64 },
3547 { RTLIB::POW_F32, RTLIB::POW_F64 }
3550 bool Is64Bit = RetVT == MVT::f64;
3554 case Intrinsic::sin:
3555 LC = LibCallTable[0][Is64Bit];
3557 case Intrinsic::cos:
3558 LC = LibCallTable[1][Is64Bit];
3560 case Intrinsic::pow:
3561 LC = LibCallTable[2][Is64Bit];
3569 for (
auto &
Arg : II->
args()) {
3572 Entry.Ty =
Arg->getType();
3573 Args.push_back(Entry);
3576 CallLoweringInfo CLI;
3578 CLI.setCallee(
DL, Ctx, TLI.getLibcallCallingConv(LC), II->
getType(),
3579 TLI.getLibcallName(LC), std::move(Args));
3580 if (!lowerCallTo(CLI))
3582 updateValueMap(II, CLI.ResultReg);
3585 case Intrinsic::fabs: {
3587 if (!isTypeLegal(II->
getType(), VT))
3595 Opc = AArch64::FABSSr;
3598 Opc = AArch64::FABSDr;
3604 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3607 updateValueMap(II, ResultReg);
3610 case Intrinsic::trap:
3614 case Intrinsic::debugtrap:
3619 case Intrinsic::sqrt: {
3623 if (!isTypeLegal(
RetTy, VT))
3630 unsigned ResultReg = fastEmit_r(VT, VT,
ISD::FSQRT, Op0Reg);
3634 updateValueMap(II, ResultReg);
3637 case Intrinsic::sadd_with_overflow:
3638 case Intrinsic::uadd_with_overflow:
3639 case Intrinsic::ssub_with_overflow:
3640 case Intrinsic::usub_with_overflow:
3641 case Intrinsic::smul_with_overflow:
3642 case Intrinsic::umul_with_overflow: {
3645 auto *Ty = cast<StructType>(
Callee->getReturnType());
3649 if (!isTypeLegal(
RetTy, VT))
3652 if (VT != MVT::i32 && VT != MVT::i64)
3658 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->
isCommutative())
3666 case Intrinsic::smul_with_overflow:
3667 if (
const auto *
C = dyn_cast<ConstantInt>(RHS))
3668 if (
C->getValue() == 2) {
3669 IID = Intrinsic::sadd_with_overflow;
3673 case Intrinsic::umul_with_overflow:
3674 if (
const auto *
C = dyn_cast<ConstantInt>(RHS))
3675 if (
C->getValue() == 2) {
3676 IID = Intrinsic::uadd_with_overflow;
3682 unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3686 case Intrinsic::sadd_with_overflow:
3687 ResultReg1 = emitAdd(VT, LHS, RHS,
true);
3690 case Intrinsic::uadd_with_overflow:
3691 ResultReg1 = emitAdd(VT, LHS, RHS,
true);
3694 case Intrinsic::ssub_with_overflow:
3695 ResultReg1 = emitSub(VT, LHS, RHS,
true);
3698 case Intrinsic::usub_with_overflow:
3699 ResultReg1 = emitSub(VT, LHS, RHS,
true);
3702 case Intrinsic::smul_with_overflow: {
3704 Register LHSReg = getRegForValue(LHS);
3708 Register RHSReg = getRegForValue(RHS);
3712 if (VT == MVT::i32) {
3713 MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg);
3715 fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3717 emitAddSub_rx(
false, MVT::i64, MulReg, MulSubReg,
3722 assert(VT == MVT::i64 &&
"Unexpected value type.");
3725 MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3726 unsigned SMULHReg = fastEmit_rr(VT, VT,
ISD::MULHS, LHSReg, RHSReg);
3732 case Intrinsic::umul_with_overflow: {
3734 Register LHSReg = getRegForValue(LHS);
3738 Register RHSReg = getRegForValue(RHS);
3742 if (VT == MVT::i32) {
3743 MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg);
3746 TII.get(AArch64::ANDSXri), AArch64::XZR)
3749 MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3751 assert(VT == MVT::i64 &&
"Unexpected value type.");
3754 MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3755 unsigned UMULHReg = fastEmit_rr(VT, VT,
ISD::MULHU, LHSReg, RHSReg);
3756 emitSubs_rr(VT, AArch64::XZR, UMULHReg,
false);
3763 ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3765 TII.get(TargetOpcode::COPY), ResultReg1).
addReg(MulReg);
3771 ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3772 AArch64::WZR, AArch64::WZR,
3773 getInvertedCondCode(
CC));
3775 assert((ResultReg1 + 1) == ResultReg2 &&
3776 "Nonconsecutive result registers.");
3777 updateValueMap(II, ResultReg1, 2);
3780 case Intrinsic::aarch64_crc32b:
3781 case Intrinsic::aarch64_crc32h:
3782 case Intrinsic::aarch64_crc32w:
3783 case Intrinsic::aarch64_crc32x:
3784 case Intrinsic::aarch64_crc32cb:
3785 case Intrinsic::aarch64_crc32ch:
3786 case Intrinsic::aarch64_crc32cw:
3787 case Intrinsic::aarch64_crc32cx: {
3788 if (!Subtarget->hasCRC())
3795 case Intrinsic::aarch64_crc32b:
3796 Opc = AArch64::CRC32Brr;
3798 case Intrinsic::aarch64_crc32h:
3799 Opc = AArch64::CRC32Hrr;
3801 case Intrinsic::aarch64_crc32w:
3802 Opc = AArch64::CRC32Wrr;
3804 case Intrinsic::aarch64_crc32x:
3805 Opc = AArch64::CRC32Xrr;
3807 case Intrinsic::aarch64_crc32cb:
3808 Opc = AArch64::CRC32CBrr;
3810 case Intrinsic::aarch64_crc32ch:
3811 Opc = AArch64::CRC32CHrr;
3813 case Intrinsic::aarch64_crc32cw:
3814 Opc = AArch64::CRC32CWrr;
3816 case Intrinsic::aarch64_crc32cx:
3817 Opc = AArch64::CRC32CXrr;
3823 if (!LHSReg || !RHSReg)
3827 fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, LHSReg, RHSReg);
3828 updateValueMap(II, ResultReg);
3837 const Function &
F = *
I->getParent()->getParent();
3845 if (TLI.supportSwiftError() &&
3846 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3849 if (TLI.supportSplitCSR(
FuncInfo.MF))
3855 if (
Ret->getNumOperands() > 0) {
3865 CCInfo.AnalyzeReturn(Outs, RetCC);
3868 if (ValLocs.
size() != 1)
3872 const Value *RV =
Ret->getOperand(0);
3890 if (!
MRI.getRegClass(SrcReg)->contains(DestReg))
3899 !Subtarget->isLittleEndian())
3903 if (RVVT == MVT::f128)
3908 if (RVVT != DestVT) {
3909 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3912 if (!Outs[0].
Flags.isZExt() && !Outs[0].Flags.isSExt())
3915 bool IsZExt = Outs[0].Flags.isZExt();
3916 SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3924 SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff);
3928 TII.get(TargetOpcode::COPY), DestReg).
addReg(SrcReg);
3935 TII.get(AArch64::RET_ReallyLR));
3936 for (
unsigned RetReg : RetRegs)
3941bool AArch64FastISel::selectTrunc(
const Instruction *
I) {
3942 Type *DestTy =
I->getType();
3944 Type *SrcTy =
Op->getType();
3946 EVT SrcEVT = TLI.getValueType(
DL, SrcTy,
true);
3947 EVT DestEVT = TLI.getValueType(
DL, DestTy,
true);
3956 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3959 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3963 Register SrcReg = getRegForValue(Op);
3973 if (SrcVT == MVT::i64) {
3990 Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg,
3993 ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask);
3994 assert(ResultReg &&
"Unexpected AND instruction emission failure.");
3996 ResultReg = createResultReg(&AArch64::GPR32RegClass);
3998 TII.get(TargetOpcode::COPY), ResultReg)
4002 updateValueMap(
I, ResultReg);
4006unsigned AArch64FastISel::emiti1Ext(
unsigned SrcReg,
MVT DestVT,
bool IsZExt) {
4007 assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
4008 DestVT == MVT::i64) &&
4009 "Unexpected value type.");
4011 if (DestVT == MVT::i8 || DestVT == MVT::i16)
4015 unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1);
4016 assert(ResultReg &&
"Unexpected AND instruction emission failure.");
4017 if (DestVT == MVT::i64) {
4020 Register Reg64 =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4022 TII.get(AArch64::SUBREG_TO_REG), Reg64)
4025 .
addImm(AArch64::sub_32);
4030 if (DestVT == MVT::i64) {
4034 return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
4039unsigned AArch64FastISel::emitMul_rr(
MVT RetVT,
unsigned Op0,
unsigned Op1) {
4047 Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR;
break;
4049 Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR;
break;
4053 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4054 return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg);
4057unsigned AArch64FastISel::emitSMULL_rr(
MVT RetVT,
unsigned Op0,
unsigned Op1) {
4058 if (RetVT != MVT::i64)
4061 return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
4062 Op0, Op1, AArch64::XZR);
4065unsigned AArch64FastISel::emitUMULL_rr(
MVT RetVT,
unsigned Op0,
unsigned Op1) {
4066 if (RetVT != MVT::i64)
4069 return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
4070 Op0, Op1, AArch64::XZR);
4073unsigned AArch64FastISel::emitLSL_rr(
MVT RetVT,
unsigned Op0Reg,
4076 bool NeedTrunc =
false;
4080 case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc =
true;
Mask = 0xff;
break;
4081 case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc =
true;
Mask = 0xffff;
break;
4082 case MVT::i32: Opc = AArch64::LSLVWr;
break;
4083 case MVT::i64: Opc = AArch64::LSLVXr;
break;
4087 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4089 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4091 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4093 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4097unsigned AArch64FastISel::emitLSL_ri(
MVT RetVT,
MVT SrcVT,
unsigned Op0,
4100 "Unexpected source/return type pair.");
4101 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4102 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4103 "Unexpected source value type.");
4104 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4105 RetVT == MVT::i64) &&
"Unexpected return value type.");
4107 bool Is64Bit = (RetVT == MVT::i64);
4108 unsigned RegSize = Is64Bit ? 64 : 32;
4112 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4116 if (RetVT == SrcVT) {
4117 Register ResultReg = createResultReg(RC);
4119 TII.get(TargetOpcode::COPY), ResultReg)
4123 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4127 if (Shift >= DstBits)
4155 unsigned ImmR =
RegSize - Shift;
4157 unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4158 static const unsigned OpcTable[2][2] = {
4159 {AArch64::SBFMWri, AArch64::SBFMXri},
4160 {AArch64::UBFMWri, AArch64::UBFMXri}
4162 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4163 if (SrcVT.
SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4166 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4169 .
addImm(AArch64::sub_32);
4172 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4175unsigned AArch64FastISel::emitLSR_rr(
MVT RetVT,
unsigned Op0Reg,
4178 bool NeedTrunc =
false;
4182 case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc =
true;
Mask = 0xff;
break;
4183 case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc =
true;
Mask = 0xffff;
break;
4184 case MVT::i32: Opc = AArch64::LSRVWr;
break;
4185 case MVT::i64: Opc = AArch64::LSRVXr;
break;
4189 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4191 Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask);
4192 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4194 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4196 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4200unsigned AArch64FastISel::emitLSR_ri(
MVT RetVT,
MVT SrcVT,
unsigned Op0,
4203 "Unexpected source/return type pair.");
4204 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4205 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4206 "Unexpected source value type.");
4207 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4208 RetVT == MVT::i64) &&
"Unexpected return value type.");
4210 bool Is64Bit = (RetVT == MVT::i64);
4211 unsigned RegSize = Is64Bit ? 64 : 32;
4215 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4219 if (RetVT == SrcVT) {
4220 Register ResultReg = createResultReg(RC);
4222 TII.get(TargetOpcode::COPY), ResultReg)
4226 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4230 if (Shift >= DstBits)
4258 if (Shift >= SrcBits && IsZExt)
4264 Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4272 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4273 unsigned ImmS = SrcBits - 1;
4274 static const unsigned OpcTable[2][2] = {
4275 {AArch64::SBFMWri, AArch64::SBFMXri},
4276 {AArch64::UBFMWri, AArch64::UBFMXri}
4278 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4279 if (SrcVT.
SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4282 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4285 .
addImm(AArch64::sub_32);
4288 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4291unsigned AArch64FastISel::emitASR_rr(
MVT RetVT,
unsigned Op0Reg,
4294 bool NeedTrunc =
false;
4298 case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc =
true;
Mask = 0xff;
break;
4299 case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc =
true;
Mask = 0xffff;
break;
4300 case MVT::i32: Opc = AArch64::ASRVWr;
break;
4301 case MVT::i64: Opc = AArch64::ASRVXr;
break;
4305 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4307 Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32,
false);
4308 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4310 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4312 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4316unsigned AArch64FastISel::emitASR_ri(
MVT RetVT,
MVT SrcVT,
unsigned Op0,
4319 "Unexpected source/return type pair.");
4320 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4321 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4322 "Unexpected source value type.");
4323 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4324 RetVT == MVT::i64) &&
"Unexpected return value type.");
4326 bool Is64Bit = (RetVT == MVT::i64);
4327 unsigned RegSize = Is64Bit ? 64 : 32;
4331 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4335 if (RetVT == SrcVT) {
4336 Register ResultReg = createResultReg(RC);
4338 TII.get(TargetOpcode::COPY), ResultReg)
4342 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4346 if (Shift >= DstBits)
4374 if (Shift >= SrcBits && IsZExt)
4377 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4378 unsigned ImmS = SrcBits - 1;
4379 static const unsigned OpcTable[2][2] = {
4380 {AArch64::SBFMWri, AArch64::SBFMXri},
4381 {AArch64::UBFMWri, AArch64::UBFMXri}
4383 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4384 if (SrcVT.
SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4387 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4390 .
addImm(AArch64::sub_32);
4393 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4396unsigned AArch64FastISel::emitIntExt(
MVT SrcVT,
unsigned SrcReg,
MVT DestVT,
4398 assert(DestVT != MVT::i1 &&
"ZeroExt/SignExt an i1?");
4404 if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4405 (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4406 ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
4407 (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
4417 return emiti1Ext(SrcReg, DestVT, IsZExt);
4419 if (DestVT == MVT::i64)
4420 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4422 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4426 if (DestVT == MVT::i64)
4427 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4429 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4433 assert(DestVT == MVT::i64 &&
"IntExt i32 to i32?!?");
4434 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4440 if (DestVT == MVT::i8 || DestVT == MVT::i16)
4442 else if (DestVT == MVT::i64) {
4443 Register Src64 =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4445 TII.get(AArch64::SUBREG_TO_REG), Src64)
4448 .
addImm(AArch64::sub_32);
4453 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4454 return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm);
4461 case AArch64::LDURBBi:
4462 case AArch64::LDURHHi:
4463 case AArch64::LDURWi:
4464 case AArch64::LDRBBui:
4465 case AArch64::LDRHHui:
4466 case AArch64::LDRWui:
4467 case AArch64::LDRBBroX:
4468 case AArch64::LDRHHroX:
4469 case AArch64::LDRWroX:
4470 case AArch64::LDRBBroW:
4471 case AArch64::LDRHHroW:
4472 case AArch64::LDRWroW:
4481 case AArch64::LDURSBWi:
4482 case AArch64::LDURSHWi:
4483 case AArch64::LDURSBXi:
4484 case AArch64::LDURSHXi:
4485 case AArch64::LDURSWi:
4486 case AArch64::LDRSBWui:
4487 case AArch64::LDRSHWui:
4488 case AArch64::LDRSBXui:
4489 case AArch64::LDRSHXui:
4490 case AArch64::LDRSWui:
4491 case AArch64::LDRSBWroX:
4492 case AArch64::LDRSHWroX:
4493 case AArch64::LDRSBXroX:
4494 case AArch64::LDRSHXroX:
4495 case AArch64::LDRSWroX:
4496 case AArch64::LDRSBWroW:
4497 case AArch64::LDRSHWroW:
4498 case AArch64::LDRSBXroW:
4499 case AArch64::LDRSHXroW:
4500 case AArch64::LDRSWroW:
4505bool AArch64FastISel::optimizeIntExtLoad(
const Instruction *
I,
MVT RetVT,
4507 const auto *LI = dyn_cast<LoadInst>(
I->getOperand(0));
4508 if (!LI || !LI->hasOneUse())
4522 bool IsZExt = isa<ZExtInst>(
I);
4523 const auto *LoadMI =
MI;
4524 if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4525 LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4526 Register LoadReg =
MI->getOperand(1).getReg();
4527 LoadMI =
MRI.getUniqueVRegDef(LoadReg);
4528 assert(LoadMI &&
"Expected valid instruction");
4534 if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4535 updateValueMap(
I, Reg);
4540 Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
4542 TII.get(AArch64::SUBREG_TO_REG), Reg64)
4545 .
addImm(AArch64::sub_32);
4548 assert((
MI->getOpcode() == TargetOpcode::COPY &&
4549 MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4550 "Expected copy instruction");
4551 Reg =
MI->getOperand(1).getReg();
4553 removeDeadCode(
I, std::next(
I));
4555 updateValueMap(
I, Reg);
4559bool AArch64FastISel::selectIntExt(
const Instruction *
I) {
4560 assert((isa<ZExtInst>(
I) || isa<SExtInst>(
I)) &&
4561 "Unexpected integer extend instruction.");
4564 if (!isTypeSupported(
I->getType(), RetVT))
4567 if (!isTypeSupported(
I->getOperand(0)->getType(), SrcVT))
4571 if (optimizeIntExtLoad(
I, RetVT, SrcVT))
4574 Register SrcReg = getRegForValue(
I->getOperand(0));
4579 bool IsZExt = isa<ZExtInst>(
I);
4580 if (
const auto *
Arg = dyn_cast<Argument>(
I->getOperand(0))) {
4581 if ((IsZExt &&
Arg->hasZExtAttr()) || (!IsZExt &&
Arg->hasSExtAttr())) {
4582 if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4583 Register ResultReg = createResultReg(&AArch64::GPR64RegClass);
4585 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4588 .
addImm(AArch64::sub_32);
4592 updateValueMap(
I, SrcReg);
4597 unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4601 updateValueMap(
I, ResultReg);
4605bool AArch64FastISel::selectRem(
const Instruction *
I,
unsigned ISDOpcode) {
4606 EVT DestEVT = TLI.getValueType(
DL,
I->getType(),
true);
4611 if (DestVT != MVT::i64 && DestVT != MVT::i32)
4615 bool Is64bit = (DestVT == MVT::i64);
4616 switch (ISDOpcode) {
4620 DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4623 DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4626 unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4627 Register Src0Reg = getRegForValue(
I->getOperand(0));
4631 Register Src1Reg = getRegForValue(
I->getOperand(1));
4636 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4637 Register QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg);
4638 assert(QuotReg &&
"Unexpected DIV instruction emission failure.");
4641 Register ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg);
4642 updateValueMap(
I, ResultReg);
4648 if (!isTypeSupported(
I->getType(), VT,
true))
4654 const Value *Src0 =
I->getOperand(0);
4655 const Value *Src1 =
I->getOperand(1);
4656 if (
const auto *
C = dyn_cast<ConstantInt>(Src0))
4657 if (
C->getValue().isPowerOf2())
4661 if (
const auto *
C = dyn_cast<ConstantInt>(Src1))
4662 if (
C->getValue().isPowerOf2()) {
4663 uint64_t ShiftVal =
C->getValue().logBase2();
4666 if (
const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4669 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4672 Src0 = ZExt->getOperand(0);
4675 }
else if (
const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4678 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4681 Src0 = SExt->getOperand(0);
4686 Register Src0Reg = getRegForValue(Src0);
4690 unsigned ResultReg =
4691 emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt);
4694 updateValueMap(
I, ResultReg);
4699 Register Src0Reg = getRegForValue(
I->getOperand(0));
4703 Register Src1Reg = getRegForValue(
I->getOperand(1));
4707 unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg);
4712 updateValueMap(
I, ResultReg);
4716bool AArch64FastISel::selectShift(
const Instruction *
I) {
4718 if (!isTypeSupported(
I->getType(), RetVT,
true))
4722 return selectOperator(
I,
I->getOpcode());
4724 if (
const auto *
C = dyn_cast<ConstantInt>(
I->getOperand(1))) {
4725 unsigned ResultReg = 0;
4728 bool IsZExt =
I->getOpcode() != Instruction::AShr;
4729 const Value *Op0 =
I->getOperand(0);
4730 if (
const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4733 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4736 Op0 = ZExt->getOperand(0);
4739 }
else if (
const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4742 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4745 Op0 = SExt->getOperand(0);
4750 Register Op0Reg = getRegForValue(Op0);
4754 switch (
I->getOpcode()) {
4756 case Instruction::Shl:
4757 ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4759 case Instruction::AShr:
4760 ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4762 case Instruction::LShr:
4763 ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4769 updateValueMap(
I, ResultReg);
4773 Register Op0Reg = getRegForValue(
I->getOperand(0));
4777 Register Op1Reg = getRegForValue(
I->getOperand(1));
4781 unsigned ResultReg = 0;
4782 switch (
I->getOpcode()) {
4784 case Instruction::Shl:
4785 ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg);
4787 case Instruction::AShr:
4788 ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg);
4790 case Instruction::LShr:
4791 ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg);
4798 updateValueMap(
I, ResultReg);
4802bool AArch64FastISel::selectBitCast(
const Instruction *
I) {
4805 if (!isTypeLegal(
I->getOperand(0)->getType(), SrcVT))
4807 if (!isTypeLegal(
I->getType(), RetVT))
4811 if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4812 Opc = AArch64::FMOVWSr;
4813 else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4814 Opc = AArch64::FMOVXDr;
4815 else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4816 Opc = AArch64::FMOVSWr;
4817 else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4818 Opc = AArch64::FMOVDXr;
4825 case MVT::i32: RC = &AArch64::GPR32RegClass;
break;
4826 case MVT::i64: RC = &AArch64::GPR64RegClass;
break;
4827 case MVT::f32: RC = &AArch64::FPR32RegClass;
break;
4828 case MVT::f64: RC = &AArch64::FPR64RegClass;
break;
4830 Register Op0Reg = getRegForValue(
I->getOperand(0));
4834 Register ResultReg = fastEmitInst_r(Opc, RC, Op0Reg);
4838 updateValueMap(
I, ResultReg);
4842bool AArch64FastISel::selectFRem(
const Instruction *
I) {
4844 if (!isTypeLegal(
I->getType(), RetVT))
4852 LC = RTLIB::REM_F32;
4855 LC = RTLIB::REM_F64;
4860 Args.reserve(
I->getNumOperands());
4863 for (
auto &
Arg :
I->operands()) {
4866 Entry.Ty =
Arg->getType();
4867 Args.push_back(Entry);
4870 CallLoweringInfo CLI;
4872 CLI.setCallee(
DL, Ctx, TLI.getLibcallCallingConv(LC),
I->getType(),
4873 TLI.getLibcallName(LC), std::move(Args));
4874 if (!lowerCallTo(CLI))
4876 updateValueMap(
I, CLI.ResultReg);
4880bool AArch64FastISel::selectSDiv(
const Instruction *
I) {
4882 if (!isTypeLegal(
I->getType(), VT))
4885 if (!isa<ConstantInt>(
I->getOperand(1)))
4888 const APInt &
C = cast<ConstantInt>(
I->getOperand(1))->getValue();
4889 if ((VT != MVT::i32 && VT != MVT::i64) || !
C ||
4890 !(
C.isPowerOf2() ||
C.isNegatedPowerOf2()))
4893 unsigned Lg2 =
C.countr_zero();
4894 Register Src0Reg = getRegForValue(
I->getOperand(0));
4898 if (cast<BinaryOperator>(
I)->isExact()) {
4899 unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2);
4902 updateValueMap(
I, ResultReg);
4906 int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4907 unsigned AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne);
4912 if (!emitICmp_ri(VT, Src0Reg, 0))
4917 if (VT == MVT::i64) {
4918 SelectOpc = AArch64::CSELXr;
4919 RC = &AArch64::GPR64RegClass;
4921 SelectOpc = AArch64::CSELWr;
4922 RC = &AArch64::GPR32RegClass;
4924 Register SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg,
4931 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4934 ResultReg = emitAddSub_rs(
false, VT, ZeroReg, SelectReg,
4937 ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2);
4942 updateValueMap(
I, ResultReg);
4949unsigned AArch64FastISel::getRegForGEPIndex(
const Value *
Idx) {
4956 MVT PtrVT = TLI.getPointerTy(
DL);
4958 if (IdxVT.
bitsLT(PtrVT)) {
4959 IdxN = emitIntExt(IdxVT.
getSimpleVT(), IdxN, PtrVT,
false);
4960 }
else if (IdxVT.
bitsGT(PtrVT))
4961 llvm_unreachable(
"AArch64 FastISel doesn't support types larger than i64");
4969bool AArch64FastISel::selectGetElementPtr(
const Instruction *
I) {
4970 if (Subtarget->isTargetILP32())
4973 Register N = getRegForValue(
I->getOperand(0));
4980 MVT VT = TLI.getPointerTy(
DL);
4983 const Value *
Idx = GTI.getOperand();
4984 if (
auto *StTy = GTI.getStructTypeOrNull()) {
4985 unsigned Field = cast<ConstantInt>(
Idx)->getZExtValue();
4988 TotalOffs +=
DL.getStructLayout(StTy)->getElementOffset(
Field);
4990 Type *Ty = GTI.getIndexedType();
4993 if (
const auto *CI = dyn_cast<ConstantInt>(
Idx)) {
4998 DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
5002 N = emitAdd_ri_(VT,
N, TotalOffs);
5009 uint64_t ElementSize =
DL.getTypeAllocSize(Ty);
5010 unsigned IdxN = getRegForGEPIndex(
Idx);
5014 if (ElementSize != 1) {
5018 IdxN = emitMul_rr(VT, IdxN,
C);
5028 N = emitAdd_ri_(VT,
N, TotalOffs);
5032 updateValueMap(
I,
N);
5038 "cmpxchg survived AtomicExpand at optlevel > -O0");
5040 auto *RetPairTy = cast<StructType>(
I->getType());
5041 Type *
RetTy = RetPairTy->getTypeAtIndex(0U);
5042 assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5043 "cmpxchg has a non-i1 status result");
5046 if (!isTypeLegal(
RetTy, VT))
5050 unsigned Opc, CmpOpc;
5053 if (VT == MVT::i32) {
5054 Opc = AArch64::CMP_SWAP_32;
5055 CmpOpc = AArch64::SUBSWrs;
5056 ResRC = &AArch64::GPR32RegClass;
5057 }
else if (VT == MVT::i64) {
5058 Opc = AArch64::CMP_SWAP_64;
5059 CmpOpc = AArch64::SUBSXrs;
5060 ResRC = &AArch64::GPR64RegClass;
5068 II, getRegForValue(
I->getPointerOperand()), II.
getNumDefs());
5070 II, getRegForValue(
I->getCompareOperand()), II.
getNumDefs() + 1);
5072 II, getRegForValue(
I->getNewValOperand()), II.
getNumDefs() + 2);
5074 const Register ResultReg1 = createResultReg(ResRC);
5075 const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5076 const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5087 .
addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5098 assert((ResultReg1 + 1) == ResultReg2 &&
"Nonconsecutive result registers.");
5099 updateValueMap(
I, ResultReg1, 2);
5103bool AArch64FastISel::fastSelectInstruction(
const Instruction *
I) {
5104 if (TLI.fallBackToDAGISel(*
I))
5106 switch (
I->getOpcode()) {
5109 case Instruction::Add:
5110 case Instruction::Sub:
5111 return selectAddSub(
I);
5112 case Instruction::Mul:
5113 return selectMul(
I);
5114 case Instruction::SDiv:
5115 return selectSDiv(
I);
5116 case Instruction::SRem:
5120 case Instruction::URem:
5124 case Instruction::Shl:
5125 case Instruction::LShr:
5126 case Instruction::AShr:
5127 return selectShift(
I);
5128 case Instruction::And:
5129 case Instruction::Or:
5130 case Instruction::Xor:
5131 return selectLogicalOp(
I);
5132 case Instruction::Br:
5133 return selectBranch(
I);
5134 case Instruction::IndirectBr:
5135 return selectIndirectBr(
I);
5136 case Instruction::BitCast:
5138 return selectBitCast(
I);
5140 case Instruction::FPToSI:
5142 return selectFPToInt(
I,
true);
5144 case Instruction::FPToUI:
5145 return selectFPToInt(
I,
false);
5146 case Instruction::ZExt:
5147 case Instruction::SExt:
5148 return selectIntExt(
I);
5149 case Instruction::Trunc:
5151 return selectTrunc(
I);
5153 case Instruction::FPExt:
5154 return selectFPExt(
I);
5155 case Instruction::FPTrunc:
5156 return selectFPTrunc(
I);
5157 case Instruction::SIToFP:
5159 return selectIntToFP(
I,
true);
5161 case Instruction::UIToFP:
5162 return selectIntToFP(
I,
false);