44#include "llvm/IR/IntrinsicsAArch64.h"
52#define DEBUG_TYPE "aarch64-isel"
55using namespace MIPatternMatch;
56using namespace AArch64GISelUtils;
65#define GET_GLOBALISEL_PREDICATE_BITSET
66#include "AArch64GenGlobalISel.inc"
67#undef GET_GLOBALISEL_PREDICATE_BITSET
82 InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
87 ProduceNonFlagSettingCondBr =
135 bool tryOptAndIntoCompareBranch(
MachineInstr &AndInst,
bool Invert,
214 bool selectVectorLoadIntrinsic(
unsigned Opc,
unsigned NumVecs,
216 bool selectVectorLoadLaneIntrinsic(
unsigned Opc,
unsigned NumVecs,
218 void selectVectorStoreIntrinsic(
MachineInstr &
I,
unsigned NumVecs,
220 bool selectVectorStoreLaneIntrinsic(
MachineInstr &
I,
unsigned NumVecs,
237 unsigned emitConstantPoolEntry(
const Constant *CPVal,
256 std::optional<CmpInst::Predicate> = std::nullopt)
const;
259 emitInstr(
unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
260 std::initializer_list<llvm::SrcOp> SrcOps,
262 const ComplexRendererFns &RenderFns = std::nullopt)
const;
297 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
318 MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,
340 std::pair<MachineInstr *, AArch64CC::CondCode>
375 ComplexRendererFns selectShiftA_32(
const MachineOperand &Root)
const;
376 ComplexRendererFns selectShiftB_32(
const MachineOperand &Root)
const;
377 ComplexRendererFns selectShiftA_64(
const MachineOperand &Root)
const;
378 ComplexRendererFns selectShiftB_64(
const MachineOperand &Root)
const;
380 ComplexRendererFns select12BitValueWithLeftShift(
uint64_t Immed)
const;
382 ComplexRendererFns selectNegArithImmed(
MachineOperand &Root)
const;
385 unsigned Size)
const;
387 ComplexRendererFns selectAddrModeUnscaled8(
MachineOperand &Root)
const {
388 return selectAddrModeUnscaled(Root, 1);
390 ComplexRendererFns selectAddrModeUnscaled16(
MachineOperand &Root)
const {
391 return selectAddrModeUnscaled(Root, 2);
393 ComplexRendererFns selectAddrModeUnscaled32(
MachineOperand &Root)
const {
394 return selectAddrModeUnscaled(Root, 4);
396 ComplexRendererFns selectAddrModeUnscaled64(
MachineOperand &Root)
const {
397 return selectAddrModeUnscaled(Root, 8);
399 ComplexRendererFns selectAddrModeUnscaled128(
MachineOperand &Root)
const {
400 return selectAddrModeUnscaled(Root, 16);
405 ComplexRendererFns tryFoldAddLowIntoImm(
MachineInstr &RootDef,
unsigned Size,
409 unsigned Size)
const;
411 ComplexRendererFns selectAddrModeIndexed(
MachineOperand &Root)
const {
412 return selectAddrModeIndexed(Root, Width / 8);
419 unsigned SizeInBytes)
const;
427 bool WantsExt)
const;
428 ComplexRendererFns selectAddrModeRegisterOffset(
MachineOperand &Root)
const;
430 unsigned SizeInBytes)
const;
432 ComplexRendererFns selectAddrModeXRO(
MachineOperand &Root)
const {
433 return selectAddrModeXRO(Root, Width / 8);
437 unsigned SizeInBytes)
const;
439 ComplexRendererFns selectAddrModeWRO(
MachineOperand &Root)
const {
440 return selectAddrModeWRO(Root, Width / 8);
444 bool AllowROR =
false)
const;
446 ComplexRendererFns selectArithShiftedRegister(
MachineOperand &Root)
const {
447 return selectShiftedRegister(Root);
450 ComplexRendererFns selectLogicalShiftedRegister(
MachineOperand &Root)
const {
451 return selectShiftedRegister(Root,
true);
461 bool IsLoadStore =
false)
const;
472 ComplexRendererFns selectArithExtendedRegister(
MachineOperand &Root)
const;
477 int OpIdx = -1)
const;
479 int OpIdx = -1)
const;
481 int OpIdx = -1)
const;
485 int OpIdx = -1)
const;
487 int OpIdx = -1)
const;
489 int OpIdx = -1)
const;
492 int OpIdx = -1)
const;
498 bool tryOptSelect(
GSelect &Sel);
505 bool isLoadStoreOfNumBytes(
const MachineInstr &
MI,
unsigned NumBytes)
const;
518 bool ProduceNonFlagSettingCondBr =
false;
527#define GET_GLOBALISEL_PREDICATES_DECL
528#include "AArch64GenGlobalISel.inc"
529#undef GET_GLOBALISEL_PREDICATES_DECL
533#define GET_GLOBALISEL_TEMPORARIES_DECL
534#include "AArch64GenGlobalISel.inc"
535#undef GET_GLOBALISEL_TEMPORARIES_DECL
540#define GET_GLOBALISEL_IMPL
541#include "AArch64GenGlobalISel.inc"
542#undef GET_GLOBALISEL_IMPL
544AArch64InstructionSelector::AArch64InstructionSelector(
547 :
TM(
TM), STI(STI),
TII(*STI.getInstrInfo()),
TRI(*STI.getRegisterInfo()),
550#include
"AArch64GenGlobalISel.inc"
553#include
"AArch64GenGlobalISel.inc"
565 bool GetAllRegSet =
false) {
566 if (RB.
getID() == AArch64::GPRRegBankID) {
568 return GetAllRegSet ? &AArch64::GPR32allRegClass
569 : &AArch64::GPR32RegClass;
571 return GetAllRegSet ? &AArch64::GPR64allRegClass
572 : &AArch64::GPR64RegClass;
574 return &AArch64::XSeqPairsClassRegClass;
578 if (RB.
getID() == AArch64::FPRRegBankID) {
581 return &AArch64::FPR8RegClass;
583 return &AArch64::FPR16RegClass;
585 return &AArch64::FPR32RegClass;
587 return &AArch64::FPR64RegClass;
589 return &AArch64::FPR128RegClass;
601 bool GetAllRegSet =
false) {
602 unsigned RegBankID = RB.
getID();
604 if (RegBankID == AArch64::GPRRegBankID) {
605 if (SizeInBits <= 32)
606 return GetAllRegSet ? &AArch64::GPR32allRegClass
607 : &AArch64::GPR32RegClass;
608 if (SizeInBits == 64)
609 return GetAllRegSet ? &AArch64::GPR64allRegClass
610 : &AArch64::GPR64RegClass;
611 if (SizeInBits == 128)
612 return &AArch64::XSeqPairsClassRegClass;
615 if (RegBankID == AArch64::FPRRegBankID) {
616 switch (SizeInBits) {
620 return &AArch64::FPR8RegClass;
622 return &AArch64::FPR16RegClass;
624 return &AArch64::FPR32RegClass;
626 return &AArch64::FPR64RegClass;
628 return &AArch64::FPR128RegClass;
638 switch (
TRI.getRegSizeInBits(*RC)) {
646 if (RC != &AArch64::FPR32RegClass)
656 dbgs() <<
"Couldn't find appropriate subregister for register class.");
665 switch (RB.
getID()) {
666 case AArch64::GPRRegBankID:
668 case AArch64::FPRRegBankID:
691 const unsigned RegClassIDs[],
693 unsigned NumRegs = Regs.
size();
696 assert(NumRegs >= 2 && NumRegs <= 4 &&
697 "Only support between two and 4 registers in a tuple!");
699 auto *DesiredClass =
TRI->getRegClass(RegClassIDs[NumRegs - 2]);
701 MIB.
buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
702 for (
unsigned I = 0, E = Regs.
size();
I < E; ++
I) {
703 RegSequence.addUse(Regs[
I]);
704 RegSequence.addImm(SubRegs[
I]);
706 return RegSequence.getReg(0);
711 static const unsigned RegClassIDs[] = {
712 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
713 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
714 AArch64::dsub2, AArch64::dsub3};
715 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
720 static const unsigned RegClassIDs[] = {
721 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
722 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
723 AArch64::qsub2, AArch64::qsub3};
724 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
729 auto &
MBB = *
MI.getParent();
731 auto &
MRI = MF.getRegInfo();
737 else if (Root.
isReg()) {
742 Immed = ValAndVReg->Value.getSExtValue();
758 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
765 for (
auto &MO :
I.operands()) {
768 LLVM_DEBUG(
dbgs() <<
"Generic inst non-reg operands are unsupported\n");
776 if (!MO.getReg().isVirtual()) {
777 LLVM_DEBUG(
dbgs() <<
"Generic inst has physical register operand\n");
787 if (PrevOpBank && OpBank != PrevOpBank) {
788 LLVM_DEBUG(
dbgs() <<
"Generic inst operands have different banks\n");
803 case AArch64::GPRRegBankID:
805 switch (GenericOpc) {
806 case TargetOpcode::G_SHL:
807 return AArch64::LSLVWr;
808 case TargetOpcode::G_LSHR:
809 return AArch64::LSRVWr;
810 case TargetOpcode::G_ASHR:
811 return AArch64::ASRVWr;
815 }
else if (OpSize == 64) {
816 switch (GenericOpc) {
817 case TargetOpcode::G_PTR_ADD:
818 return AArch64::ADDXrr;
819 case TargetOpcode::G_SHL:
820 return AArch64::LSLVXr;
821 case TargetOpcode::G_LSHR:
822 return AArch64::LSRVXr;
823 case TargetOpcode::G_ASHR:
824 return AArch64::ASRVXr;
830 case AArch64::FPRRegBankID:
833 switch (GenericOpc) {
834 case TargetOpcode::G_FADD:
835 return AArch64::FADDSrr;
836 case TargetOpcode::G_FSUB:
837 return AArch64::FSUBSrr;
838 case TargetOpcode::G_FMUL:
839 return AArch64::FMULSrr;
840 case TargetOpcode::G_FDIV:
841 return AArch64::FDIVSrr;
846 switch (GenericOpc) {
847 case TargetOpcode::G_FADD:
848 return AArch64::FADDDrr;
849 case TargetOpcode::G_FSUB:
850 return AArch64::FSUBDrr;
851 case TargetOpcode::G_FMUL:
852 return AArch64::FMULDrr;
853 case TargetOpcode::G_FDIV:
854 return AArch64::FDIVDrr;
855 case TargetOpcode::G_OR:
856 return AArch64::ORRv8i8;
873 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
875 case AArch64::GPRRegBankID:
878 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
880 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
882 return isStore ? AArch64::STRWui : AArch64::LDRWui;
884 return isStore ? AArch64::STRXui : AArch64::LDRXui;
887 case AArch64::FPRRegBankID:
890 return isStore ? AArch64::STRBui : AArch64::LDRBui;
892 return isStore ? AArch64::STRHui : AArch64::LDRHui;
894 return isStore ? AArch64::STRSui : AArch64::LDRSui;
896 return isStore ? AArch64::STRDui : AArch64::LDRDui;
898 return isStore ? AArch64::STRQui : AArch64::LDRQui;
912 assert(SrcReg.
isValid() &&
"Expected a valid source register?");
913 assert(To &&
"Destination register class cannot be null");
920 RegOp.
setReg(SubRegCopy.getReg(0));
924 if (!
I.getOperand(0).getReg().isPhysical())
934static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
938 Register DstReg =
I.getOperand(0).getReg();
939 Register SrcReg =
I.getOperand(1).getReg();
953 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
954 SrcSize = DstSize = 32;
971 if (Reg.isPhysical())
973 LLT Ty =
MRI.getType(Reg);
979 RC = getRegClassForTypeOnBank(Ty, RB);
982 dbgs() <<
"Warning: DBG_VALUE operand has unexpected size/bank\n");
995 Register DstReg =
I.getOperand(0).getReg();
996 Register SrcReg =
I.getOperand(1).getReg();
1015 LLVM_DEBUG(
dbgs() <<
"Couldn't determine source register class\n");
1019 unsigned SrcSize =
TRI.getRegSizeInBits(*SrcRC);
1020 unsigned DstSize =
TRI.getRegSizeInBits(*DstRC);
1031 auto Copy = MIB.
buildCopy({DstTempRC}, {SrcReg});
1033 }
else if (SrcSize > DstSize) {
1040 }
else if (DstSize > SrcSize) {
1047 Register PromoteReg =
MRI.createVirtualRegister(PromotionRC);
1049 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1054 RegOp.
setReg(PromoteReg);
1073 if (
I.getOpcode() == TargetOpcode::G_ZEXT) {
1074 I.setDesc(
TII.get(AArch64::COPY));
1075 assert(SrcRegBank.
getID() == AArch64::GPRRegBankID);
1079 I.setDesc(
TII.get(AArch64::COPY));
1094 switch (GenericOpc) {
1095 case TargetOpcode::G_SITOFP:
1096 return AArch64::SCVTFUWSri;
1097 case TargetOpcode::G_UITOFP:
1098 return AArch64::UCVTFUWSri;
1099 case TargetOpcode::G_FPTOSI:
1100 return AArch64::FCVTZSUWSr;
1101 case TargetOpcode::G_FPTOUI:
1102 return AArch64::FCVTZUUWSr;
1107 switch (GenericOpc) {
1108 case TargetOpcode::G_SITOFP:
1109 return AArch64::SCVTFUXSri;
1110 case TargetOpcode::G_UITOFP:
1111 return AArch64::UCVTFUXSri;
1112 case TargetOpcode::G_FPTOSI:
1113 return AArch64::FCVTZSUWDr;
1114 case TargetOpcode::G_FPTOUI:
1115 return AArch64::FCVTZUUWDr;
1125 switch (GenericOpc) {
1126 case TargetOpcode::G_SITOFP:
1127 return AArch64::SCVTFUWDri;
1128 case TargetOpcode::G_UITOFP:
1129 return AArch64::UCVTFUWDri;
1130 case TargetOpcode::G_FPTOSI:
1131 return AArch64::FCVTZSUXSr;
1132 case TargetOpcode::G_FPTOUI:
1133 return AArch64::FCVTZUUXSr;
1138 switch (GenericOpc) {
1139 case TargetOpcode::G_SITOFP:
1140 return AArch64::SCVTFUXDri;
1141 case TargetOpcode::G_UITOFP:
1142 return AArch64::UCVTFUXDri;
1143 case TargetOpcode::G_FPTOSI:
1144 return AArch64::FCVTZSUXDr;
1145 case TargetOpcode::G_FPTOUI:
1146 return AArch64::FCVTZUUXDr;
1165 RBI.getRegBank(True,
MRI,
TRI)->getID() &&
1166 "Expected both select operands to have the same regbank?");
1167 LLT Ty =
MRI.getType(True);
1172 "Expected 32 bit or 64 bit select only?");
1173 const bool Is32Bit =
Size == 32;
1174 if (RBI.getRegBank(True,
MRI,
TRI)->getID() != AArch64::GPRRegBankID) {
1175 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1176 auto FCSel = MIB.
buildInstr(Opc, {Dst}, {True, False}).addImm(
CC);
1182 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1184 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &
CC, &
MRI,
1199 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1216 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1235 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1251 auto TryOptSelectCst = [&Opc, &True, &False, &
CC, Is32Bit, &
MRI,
1257 if (!TrueCst && !FalseCst)
1260 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1261 if (TrueCst && FalseCst) {
1262 int64_t
T = TrueCst->Value.getSExtValue();
1263 int64_t
F = FalseCst->Value.getSExtValue();
1265 if (
T == 0 &&
F == 1) {
1267 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1273 if (
T == 0 &&
F == -1) {
1275 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1283 int64_t
T = TrueCst->Value.getSExtValue();
1286 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1295 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1304 int64_t
F = FalseCst->Value.getSExtValue();
1307 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1314 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1322 Optimized |= TryFoldBinOpIntoSelect(False, True,
false);
1323 Optimized |= TryFoldBinOpIntoSelect(True, False,
true);
1444 assert(Reg.isValid() &&
"Expected valid register!");
1445 bool HasZext =
false;
1447 unsigned Opc =
MI->getOpcode();
1449 if (!
MI->getOperand(0).isReg() ||
1450 !
MRI.hasOneNonDBGUse(
MI->getOperand(0).getReg()))
1457 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1458 Opc == TargetOpcode::G_TRUNC) {
1459 if (Opc == TargetOpcode::G_ZEXT)
1462 Register NextReg =
MI->getOperand(1).getReg();
1464 if (!NextReg.
isValid() || !
MRI.hasOneNonDBGUse(NextReg))
1473 std::optional<uint64_t>
C;
1478 case TargetOpcode::G_AND:
1479 case TargetOpcode::G_XOR: {
1480 TestReg =
MI->getOperand(1).getReg();
1481 Register ConstantReg =
MI->getOperand(2).getReg();
1492 C = VRegAndVal->Value.getZExtValue();
1494 C = VRegAndVal->Value.getSExtValue();
1498 case TargetOpcode::G_ASHR:
1499 case TargetOpcode::G_LSHR:
1500 case TargetOpcode::G_SHL: {
1501 TestReg =
MI->getOperand(1).getReg();
1505 C = VRegAndVal->Value.getSExtValue();
1517 unsigned TestRegSize =
MRI.getType(TestReg).getSizeInBits();
1521 case TargetOpcode::G_AND:
1523 if ((*
C >> Bit) & 1)
1526 case TargetOpcode::G_SHL:
1529 if (*
C <= Bit && (Bit - *
C) < TestRegSize) {
1534 case TargetOpcode::G_ASHR:
1539 if (Bit >= TestRegSize)
1540 Bit = TestRegSize - 1;
1542 case TargetOpcode::G_LSHR:
1544 if ((Bit + *
C) < TestRegSize) {
1549 case TargetOpcode::G_XOR:
1558 if ((*
C >> Bit) & 1)
1577 assert(ProduceNonFlagSettingCondBr &&
1578 "Cannot emit TB(N)Z with speculation tracking!");
1583 LLT Ty =
MRI.getType(TestReg);
1586 assert(Bit < 64 &&
"Bit is too large!");
1590 bool UseWReg =
Bit < 32;
1591 unsigned NecessarySize = UseWReg ? 32 : 64;
1592 if (
Size != NecessarySize)
1593 TestReg = moveScalarRegClass(
1594 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1597 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1598 {AArch64::TBZW, AArch64::TBNZW}};
1599 unsigned Opc = OpcTable[UseWReg][IsNegative];
1606bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1609 assert(AndInst.
getOpcode() == TargetOpcode::G_AND &&
"Expected G_AND only?");
1636 int32_t
Bit = MaybeBit->Value.exactLogBase2();
1643 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1651 assert(ProduceNonFlagSettingCondBr &&
"CBZ does not set flags!");
1653 assert(RBI.getRegBank(CompareReg,
MRI,
TRI)->getID() ==
1654 AArch64::GPRRegBankID &&
1655 "Expected GPRs only?");
1656 auto Ty =
MRI.getType(CompareReg);
1659 assert(Width <= 64 &&
"Expected width to be at most 64?");
1660 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1661 {AArch64::CBNZW, AArch64::CBNZX}};
1662 unsigned Opc = OpcTable[IsNegative][Width == 64];
1663 auto BranchMI = MIB.
buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1668bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1671 assert(
I.getOpcode() == TargetOpcode::G_BRCOND);
1683 I.eraseFromParent();
1687bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1690 assert(
I.getOpcode() == TargetOpcode::G_BRCOND);
1696 if (!ProduceNonFlagSettingCondBr)
1715 if (VRegAndVal && !AndInst) {
1716 int64_t
C = VRegAndVal->Value.getSExtValue();
1722 emitTestBit(LHS, Bit,
false, DestMBB, MIB);
1723 I.eraseFromParent();
1731 emitTestBit(LHS, Bit,
true, DestMBB, MIB);
1732 I.eraseFromParent();
1740 emitTestBit(LHS, Bit,
false, DestMBB, MIB);
1741 I.eraseFromParent();
1755 if (VRegAndVal && VRegAndVal->Value == 0) {
1763 tryOptAndIntoCompareBranch(
1765 I.eraseFromParent();
1770 auto LHSTy =
MRI.getType(LHS);
1771 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1773 I.eraseFromParent();
1782bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1785 assert(
I.getOpcode() == TargetOpcode::G_BRCOND);
1786 if (tryOptCompareBranchFedByICmp(
I, ICmp, MIB))
1796 I.eraseFromParent();
1800bool AArch64InstructionSelector::selectCompareBranch(
1802 Register CondReg =
I.getOperand(0).getReg();
1807 if (CCMIOpc == TargetOpcode::G_FCMP)
1808 return selectCompareBranchFedByFCmp(
I, *CCMI, MIB);
1809 if (CCMIOpc == TargetOpcode::G_ICMP)
1810 return selectCompareBranchFedByICmp(
I, *CCMI, MIB);
1815 if (ProduceNonFlagSettingCondBr) {
1816 emitTestBit(CondReg, 0,
true,
1817 I.getOperand(1).getMBB(), MIB);
1818 I.eraseFromParent();
1828 .
addMBB(
I.getOperand(1).getMBB());
1829 I.eraseFromParent();
1837 assert(
MRI.getType(Reg).isVector() &&
"Expected a *vector* shift operand");
1848 return std::nullopt;
1850 int64_t Imm = *ShiftImm;
1852 return std::nullopt;
1856 return std::nullopt;
1859 return std::nullopt;
1863 return std::nullopt;
1867 return std::nullopt;
1871 return std::nullopt;
1877bool AArch64InstructionSelector::selectVectorSHL(
MachineInstr &
I,
1879 assert(
I.getOpcode() == TargetOpcode::G_SHL);
1880 Register DstReg =
I.getOperand(0).getReg();
1881 const LLT Ty =
MRI.getType(DstReg);
1882 Register Src1Reg =
I.getOperand(1).getReg();
1883 Register Src2Reg =
I.getOperand(2).getReg();
1894 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1896 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1898 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1900 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1902 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1904 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1906 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1912 auto Shl = MIB.
buildInstr(Opc, {DstReg}, {Src1Reg});
1918 I.eraseFromParent();
1922bool AArch64InstructionSelector::selectVectorAshrLshr(
1924 assert(
I.getOpcode() == TargetOpcode::G_ASHR ||
1925 I.getOpcode() == TargetOpcode::G_LSHR);
1926 Register DstReg =
I.getOperand(0).getReg();
1927 const LLT Ty =
MRI.getType(DstReg);
1928 Register Src1Reg =
I.getOperand(1).getReg();
1929 Register Src2Reg =
I.getOperand(2).getReg();
1934 bool IsASHR =
I.getOpcode() == TargetOpcode::G_ASHR;
1944 unsigned NegOpc = 0;
1946 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
1948 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1949 NegOpc = AArch64::NEGv2i64;
1951 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1952 NegOpc = AArch64::NEGv4i32;
1954 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1955 NegOpc = AArch64::NEGv2i32;
1957 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1958 NegOpc = AArch64::NEGv4i16;
1960 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1961 NegOpc = AArch64::NEGv8i16;
1963 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1964 NegOpc = AArch64::NEGv16i8;
1966 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1967 NegOpc = AArch64::NEGv8i8;
1973 auto Neg = MIB.
buildInstr(NegOpc, {RC}, {Src2Reg});
1975 auto SShl = MIB.
buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1977 I.eraseFromParent();
1981bool AArch64InstructionSelector::selectVaStartAAPCS(
1986bool AArch64InstructionSelector::selectVaStartDarwin(
1989 Register ListReg =
I.getOperand(0).getReg();
1991 Register ArgsAddrReg =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2002 BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::ADDXri))
2010 MIB =
BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::STRXui))
2017 I.eraseFromParent();
2021void AArch64InstructionSelector::materializeLargeCMVal(
2027 auto MovZ = MIB.
buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
2038 :
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2040 if (
auto *GV = dyn_cast<GlobalValue>(V)) {
2042 GV, MovZ->getOperand(1).getOffset(), Flags));
2046 MovZ->getOperand(1).getOffset(), Flags));
2052 Register DstReg = BuildMovK(MovZ.getReg(0),
2058bool AArch64InstructionSelector::preISelLower(
MachineInstr &
I) {
2063 switch (
I.getOpcode()) {
2064 case TargetOpcode::G_STORE: {
2065 bool Changed = contractCrossBankCopyIntoStore(
I,
MRI);
2073 SrcOp.setReg(NewSrc);
2074 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass,
MRI);
2079 case TargetOpcode::G_PTR_ADD:
2080 return convertPtrAddToAdd(
I,
MRI);
2081 case TargetOpcode::G_LOAD: {
2086 Register DstReg =
I.getOperand(0).getReg();
2087 const LLT DstTy =
MRI.getType(DstReg);
2093 case AArch64::G_DUP: {
2095 LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2099 MRI.setType(
I.getOperand(0).getReg(),
2101 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2102 I.getOperand(1).setReg(NewSrc.getReg(0));
2105 case TargetOpcode::G_UITOFP:
2106 case TargetOpcode::G_SITOFP: {
2111 Register SrcReg =
I.getOperand(1).getReg();
2112 LLT SrcTy =
MRI.getType(SrcReg);
2113 LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2117 if (RBI.getRegBank(SrcReg,
MRI,
TRI)->getID() == AArch64::FPRRegBankID) {
2118 if (
I.getOpcode() == TargetOpcode::G_SITOFP)
2119 I.setDesc(
TII.get(AArch64::G_SITOF));
2121 I.setDesc(
TII.get(AArch64::G_UITOF));
2139bool AArch64InstructionSelector::convertPtrAddToAdd(
2141 assert(
I.getOpcode() == TargetOpcode::G_PTR_ADD &&
"Expected G_PTR_ADD");
2142 Register DstReg =
I.getOperand(0).getReg();
2143 Register AddOp1Reg =
I.getOperand(1).getReg();
2144 const LLT PtrTy =
MRI.getType(DstReg);
2148 const LLT CastPtrTy =
2153 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2155 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2159 I.setDesc(
TII.get(TargetOpcode::G_ADD));
2160 MRI.setType(DstReg, CastPtrTy);
2161 I.getOperand(1).setReg(PtrToInt.getReg(0));
2162 if (!select(*PtrToInt)) {
2163 LLVM_DEBUG(
dbgs() <<
"Failed to select G_PTRTOINT in convertPtrAddToAdd");
2172 I.getOperand(2).setReg(NegatedReg);
2173 I.setDesc(
TII.get(TargetOpcode::G_SUB));
2177bool AArch64InstructionSelector::earlySelectSHL(
MachineInstr &
I,
2182 assert(
I.getOpcode() == TargetOpcode::G_SHL &&
"unexpected op");
2183 const auto &MO =
I.getOperand(2);
2188 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2192 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2193 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2195 if (!Imm1Fn || !Imm2Fn)
2199 MIB.
buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2202 for (
auto &RenderFn : *Imm1Fn)
2204 for (
auto &RenderFn : *Imm2Fn)
2207 I.eraseFromParent();
2211bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2213 assert(
I.getOpcode() == TargetOpcode::G_STORE &&
"Expected G_STORE");
2231 LLT DefDstTy =
MRI.getType(DefDstReg);
2232 Register StoreSrcReg =
I.getOperand(0).getReg();
2233 LLT StoreSrcTy =
MRI.getType(StoreSrcReg);
2244 if (RBI.getRegBank(StoreSrcReg,
MRI,
TRI) ==
2245 RBI.getRegBank(DefDstReg,
MRI,
TRI))
2249 I.getOperand(0).setReg(DefDstReg);
2253bool AArch64InstructionSelector::earlySelect(
MachineInstr &
I) {
2254 assert(
I.getParent() &&
"Instruction should be in a basic block!");
2255 assert(
I.getParent()->getParent() &&
"Instruction should be in a function!");
2261 switch (
I.getOpcode()) {
2262 case AArch64::G_DUP: {
2265 Register Src =
I.getOperand(1).getReg();
2270 Register Dst =
I.getOperand(0).getReg();
2272 MRI.getType(Dst).getNumElements(),
2274 ValAndVReg->Value));
2275 if (!emitConstantVector(Dst, CV, MIB,
MRI))
2277 I.eraseFromParent();
2280 case TargetOpcode::G_SEXT:
2283 if (selectUSMovFromExtend(
I,
MRI))
2286 case TargetOpcode::G_BR:
2288 case TargetOpcode::G_SHL:
2289 return earlySelectSHL(
I,
MRI);
2290 case TargetOpcode::G_CONSTANT: {
2291 bool IsZero =
false;
2292 if (
I.getOperand(1).isCImm())
2293 IsZero =
I.getOperand(1).getCImm()->isZero();
2294 else if (
I.getOperand(1).isImm())
2295 IsZero =
I.getOperand(1).getImm() == 0;
2300 Register DefReg =
I.getOperand(0).getReg();
2301 LLT Ty =
MRI.getType(DefReg);
2303 I.getOperand(1).ChangeToRegister(AArch64::XZR,
false);
2304 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
MRI);
2306 I.getOperand(1).ChangeToRegister(AArch64::WZR,
false);
2307 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass,
MRI);
2311 I.setDesc(
TII.get(TargetOpcode::COPY));
2315 case TargetOpcode::G_ADD: {
2324 Register AddDst =
I.getOperand(0).getReg();
2325 Register AddLHS =
I.getOperand(1).getReg();
2326 Register AddRHS =
I.getOperand(2).getReg();
2328 LLT Ty =
MRI.getType(AddLHS);
2337 if (!
MRI.hasOneNonDBGUse(Reg))
2351 MRI.getType(
Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2361 Cmp = MatchCmp(AddRHS);
2365 auto &PredOp =
Cmp->getOperand(1);
2370 emitIntegerCompare(
Cmp->getOperand(2),
2371 Cmp->getOperand(3), PredOp, MIB);
2372 emitCSINC(AddDst, AddLHS, AddLHS, InvCC, MIB);
2373 I.eraseFromParent();
2376 case TargetOpcode::G_OR: {
2380 Register Dst =
I.getOperand(0).getReg();
2381 LLT Ty =
MRI.getType(Dst);
2400 if (ShiftImm >
Size || ((1ULL << ShiftImm) - 1ULL) !=
uint64_t(MaskImm))
2403 int64_t Immr =
Size - ShiftImm;
2404 int64_t Imms =
Size - ShiftImm - 1;
2405 unsigned Opc =
Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2406 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2407 I.eraseFromParent();
2410 case TargetOpcode::G_FENCE: {
2411 if (
I.getOperand(1).getImm() == 0)
2415 .
addImm(
I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
2416 I.eraseFromParent();
2425 assert(
I.getParent() &&
"Instruction should be in a basic block!");
2426 assert(
I.getParent()->getParent() &&
"Instruction should be in a function!");
2433 if (Subtarget->requiresStrictAlign()) {
2435 LLVM_DEBUG(
dbgs() <<
"AArch64 GISel does not support strict-align yet\n");
2441 unsigned Opcode =
I.getOpcode();
2443 if (!
I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2446 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2449 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2450 const Register DefReg =
I.getOperand(0).getReg();
2451 const LLT DefTy =
MRI.getType(DefReg);
2454 MRI.getRegClassOrRegBank(DefReg);
2464 DefRC = getRegClassForTypeOnBank(DefTy, RB);
2471 I.setDesc(
TII.get(TargetOpcode::PHI));
2473 return RBI.constrainGenericRegister(DefReg, *DefRC,
MRI);
2479 if (
I.isDebugInstr())
2486 if (
I.getNumOperands() !=
I.getNumExplicitOperands()) {
2488 dbgs() <<
"Generic instruction has unexpected implicit operands\n");
2495 if (preISelLower(
I)) {
2496 Opcode =
I.getOpcode();
2507 if (selectImpl(
I, *CoverageInfo))
2511 I.getOperand(0).isReg() ?
MRI.getType(
I.getOperand(0).getReg()) :
LLT{};
2514 case TargetOpcode::G_SBFX:
2515 case TargetOpcode::G_UBFX: {
2516 static const unsigned OpcTable[2][2] = {
2517 {AArch64::UBFMWri, AArch64::UBFMXri},
2518 {AArch64::SBFMWri, AArch64::SBFMXri}};
2519 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2521 unsigned Opc = OpcTable[IsSigned][
Size == 64];
2524 assert(Cst1 &&
"Should have gotten a constant for src 1?");
2527 assert(Cst2 &&
"Should have gotten a constant for src 2?");
2528 auto LSB = Cst1->Value.getZExtValue();
2529 auto Width = Cst2->Value.getZExtValue();
2531 MIB.
buildInstr(Opc, {
I.getOperand(0)}, {
I.getOperand(1)})
2533 .
addImm(LSB + Width - 1);
2534 I.eraseFromParent();
2537 case TargetOpcode::G_BRCOND:
2538 return selectCompareBranch(
I, MF,
MRI);
2540 case TargetOpcode::G_BRINDIRECT: {
2541 I.setDesc(
TII.get(AArch64::BR));
2545 case TargetOpcode::G_BRJT:
2546 return selectBrJT(
I,
MRI);
2548 case AArch64::G_ADD_LOW: {
2554 if (BaseMI->
getOpcode() != AArch64::ADRP) {
2555 I.setDesc(
TII.get(AArch64::ADDXri));
2560 "Expected small code model");
2562 auto Op2 =
I.getOperand(2);
2563 auto MovAddr = MIB.
buildInstr(AArch64::MOVaddr, {
I.getOperand(0)}, {})
2564 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2565 Op1.getTargetFlags())
2567 Op2.getTargetFlags());
2568 I.eraseFromParent();
2572 case TargetOpcode::G_FCONSTANT:
2573 case TargetOpcode::G_CONSTANT: {
2574 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2583 const Register DefReg =
I.getOperand(0).getReg();
2584 const LLT DefTy =
MRI.getType(DefReg);
2590 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2592 <<
" constant, expected: " << s16 <<
" or " << s32
2593 <<
" or " << s64 <<
" or " << s128 <<
'\n');
2597 if (RB.
getID() != AArch64::FPRRegBankID) {
2599 <<
" constant on bank: " << RB
2600 <<
", expected: FPR\n");
2608 if (DefSize != 128 &&
I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2612 if (Ty != p0 && Ty != s8 && Ty != s16) {
2614 <<
" constant, expected: " << s32 <<
", " << s64
2615 <<
", or " << p0 <<
'\n');
2619 if (RB.
getID() != AArch64::GPRRegBankID) {
2621 <<
" constant on bank: " << RB
2622 <<
", expected: GPR\n");
2639 if (TLI->isFPImmLegal(
I.getOperand(1).getFPImm()->getValueAPF(),
2646 auto *FPImm =
I.getOperand(1).getFPImm();
2649 LLVM_DEBUG(
dbgs() <<
"Failed to load double constant pool entry\n");
2653 I.eraseFromParent();
2654 return RBI.constrainGenericRegister(DefReg, FPRRC,
MRI);
2658 assert((DefSize == 32 || DefSize == 64) &&
"Unexpected const def size");
2660 const Register DefGPRReg =
MRI.createVirtualRegister(
2661 DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2667 if (!RBI.constrainGenericRegister(DefReg, FPRRC,
MRI)) {
2668 LLVM_DEBUG(
dbgs() <<
"Failed to constrain G_FCONSTANT def operand\n");
2676 }
else if (
I.getOperand(1).isCImm()) {
2677 uint64_t Val =
I.getOperand(1).getCImm()->getZExtValue();
2678 I.getOperand(1).ChangeToImmediate(Val);
2679 }
else if (
I.getOperand(1).isImm()) {
2680 uint64_t Val =
I.getOperand(1).getImm();
2681 I.getOperand(1).ChangeToImmediate(Val);
2684 const unsigned MovOpc =
2685 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2686 I.setDesc(
TII.get(MovOpc));
2690 case TargetOpcode::G_EXTRACT: {
2691 Register DstReg =
I.getOperand(0).getReg();
2692 Register SrcReg =
I.getOperand(1).getReg();
2693 LLT SrcTy =
MRI.getType(SrcReg);
2694 LLT DstTy =
MRI.getType(DstReg);
2706 unsigned Offset =
I.getOperand(2).getImm();
2715 if (SrcRB.
getID() == AArch64::GPRRegBankID) {
2717 MIB.
buildInstr(TargetOpcode::COPY, {DstReg}, {})
2719 Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2721 AArch64::GPR64RegClass, NewI->getOperand(0));
2722 I.eraseFromParent();
2728 unsigned LaneIdx =
Offset / 64;
2730 DstReg, DstRB,
LLT::scalar(64), SrcReg, LaneIdx, MIB);
2733 I.eraseFromParent();
2737 I.setDesc(
TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2743 "unexpected G_EXTRACT types");
2750 .addReg(DstReg, 0, AArch64::sub_32);
2751 RBI.constrainGenericRegister(
I.getOperand(0).getReg(),
2752 AArch64::GPR32RegClass,
MRI);
2753 I.getOperand(0).setReg(DstReg);
2758 case TargetOpcode::G_INSERT: {
2759 LLT SrcTy =
MRI.getType(
I.getOperand(2).getReg());
2760 LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2767 I.setDesc(
TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2768 unsigned LSB =
I.getOperand(3).getImm();
2769 unsigned Width =
MRI.getType(
I.getOperand(2).getReg()).getSizeInBits();
2770 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2775 "unexpected G_INSERT types");
2781 TII.get(AArch64::SUBREG_TO_REG))
2784 .
addUse(
I.getOperand(2).getReg())
2785 .
addImm(AArch64::sub_32);
2786 RBI.constrainGenericRegister(
I.getOperand(2).getReg(),
2787 AArch64::GPR32RegClass,
MRI);
2788 I.getOperand(2).setReg(SrcReg);
2792 case TargetOpcode::G_FRAME_INDEX: {
2799 I.setDesc(
TII.get(AArch64::ADDXri));
2808 case TargetOpcode::G_GLOBAL_VALUE: {
2811 if (
I.getOperand(1).isSymbol()) {
2812 OpFlags =
I.getOperand(1).getTargetFlags();
2816 GV =
I.getOperand(1).getGlobal();
2818 return selectTLSGlobalValue(
I,
MRI);
2819 OpFlags = STI.ClassifyGlobalReference(GV,
TM);
2823 I.setDesc(
TII.get(AArch64::LOADgot));
2824 I.getOperand(1).setTargetFlags(OpFlags);
2826 !
TM.isPositionIndependent()) {
2828 materializeLargeCMVal(
I, GV, OpFlags);
2829 I.eraseFromParent();
2832 I.setDesc(
TII.get(AArch64::ADR));
2833 I.getOperand(1).setTargetFlags(OpFlags);
2835 I.setDesc(
TII.get(AArch64::MOVaddr));
2838 MIB.addGlobalAddress(GV,
I.getOperand(1).getOffset(),
2844 case TargetOpcode::G_ZEXTLOAD:
2845 case TargetOpcode::G_LOAD:
2846 case TargetOpcode::G_STORE: {
2848 bool IsZExtLoad =
I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2862 if (Order != AtomicOrdering::NotAtomic &&
2863 Order != AtomicOrdering::Unordered &&
2864 Order != AtomicOrdering::Monotonic) {
2865 assert(!isa<GZExtLoad>(LdSt));
2866 if (MemSizeInBytes > 64)
2869 if (isa<GLoad>(LdSt)) {
2870 static constexpr unsigned LDAPROpcodes[] = {
2871 AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
2872 static constexpr unsigned LDAROpcodes[] = {
2873 AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
2875 STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent
2878 I.setDesc(
TII.get(Opcodes[
Log2_32(MemSizeInBytes)]));
2880 static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2881 AArch64::STLRW, AArch64::STLRX};
2883 if (
MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
2885 Register NewVal =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2886 MIB.
buildInstr(TargetOpcode::COPY, {NewVal}, {})
2887 .addReg(
I.getOperand(0).getReg(), 0, AArch64::sub_32);
2888 I.getOperand(0).setReg(NewVal);
2890 I.setDesc(
TII.get(Opcodes[
Log2_32(MemSizeInBytes)]));
2901 "Load/Store pointer operand isn't a GPR");
2902 assert(
MRI.getType(PtrReg).isPointer() &&
2903 "Load/Store pointer operand isn't a pointer");
2907 const LLT ValTy =
MRI.getType(ValReg);
2912 if (isa<GStore>(LdSt) && ValTy.
getSizeInBits() > MemSizeInBits) {
2915 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2921 .addReg(ValReg, 0,
SubReg)
2923 RBI.constrainGenericRegister(Copy, *RC,
MRI);
2925 }
else if (isa<GLoad>(LdSt) && ValTy.
getSizeInBits() > MemSizeInBits) {
2928 if (RB.
getID() == AArch64::FPRRegBankID) {
2931 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2938 MRI.setRegBank(NewDst, RB);
2941 MIB.
buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
2945 auto SubRegRC = getRegClassForTypeOnBank(
MRI.getType(OldDst), RB);
2946 RBI.constrainGenericRegister(OldDst, *SubRegRC,
MRI);
2953 auto SelectLoadStoreAddressingMode = [&]() ->
MachineInstr * {
2954 bool IsStore = isa<GStore>(
I);
2955 const unsigned NewOpc =
2957 if (NewOpc ==
I.getOpcode())
2961 selectAddrModeIndexed(
I.getOperand(1), MemSizeInBytes);
2964 I.setDesc(
TII.get(NewOpc));
2970 auto NewInst = MIB.
buildInstr(NewOpc, {}, {},
I.getFlags());
2971 Register CurValReg =
I.getOperand(0).getReg();
2972 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
2973 NewInst.cloneMemRefs(
I);
2974 for (
auto &Fn : *AddrModeFns)
2976 I.eraseFromParent();
2985 if (Opcode == TargetOpcode::G_STORE) {
2988 if (CVal && CVal->Value == 0) {
2990 case AArch64::STRWui:
2991 case AArch64::STRHHui:
2992 case AArch64::STRBBui:
2993 LoadStore->getOperand(0).setReg(AArch64::WZR);
2995 case AArch64::STRXui:
2996 LoadStore->getOperand(0).setReg(AArch64::XZR);
3002 if (IsZExtLoad || (Opcode == TargetOpcode::G_LOAD &&
3003 ValTy ==
LLT::scalar(64) && MemSizeInBits == 32)) {
3006 if (
MRI.getType(
LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
3010 Register LdReg =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3015 MIB.
buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
3018 .
addImm(AArch64::sub_32);
3020 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
3026 case TargetOpcode::G_INDEXED_ZEXTLOAD:
3027 case TargetOpcode::G_INDEXED_SEXTLOAD:
3028 return selectIndexedExtLoad(
I,
MRI);
3029 case TargetOpcode::G_INDEXED_LOAD:
3030 return selectIndexedLoad(
I,
MRI);
3031 case TargetOpcode::G_INDEXED_STORE:
3032 return selectIndexedStore(cast<GIndexedStore>(
I),
MRI);
3034 case TargetOpcode::G_LSHR:
3035 case TargetOpcode::G_ASHR:
3036 if (
MRI.getType(
I.getOperand(0).getReg()).isVector())
3037 return selectVectorAshrLshr(
I,
MRI);
3039 case TargetOpcode::G_SHL:
3040 if (Opcode == TargetOpcode::G_SHL &&
3041 MRI.getType(
I.getOperand(0).getReg()).isVector())
3042 return selectVectorSHL(
I,
MRI);
3049 Register SrcReg =
I.getOperand(1).getReg();
3050 Register ShiftReg =
I.getOperand(2).getReg();
3051 const LLT ShiftTy =
MRI.getType(ShiftReg);
3052 const LLT SrcTy =
MRI.getType(SrcReg);
3057 auto Trunc = MIB.
buildInstr(TargetOpcode::COPY, {SrcTy}, {})
3058 .addReg(ShiftReg, 0, AArch64::sub_32);
3059 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
3060 I.getOperand(2).setReg(Trunc.getReg(0));
3064 case TargetOpcode::G_OR: {
3071 const Register DefReg =
I.getOperand(0).getReg();
3075 if (NewOpc ==
I.getOpcode())
3078 I.setDesc(
TII.get(NewOpc));
3086 case TargetOpcode::G_PTR_ADD: {
3087 emitADD(
I.getOperand(0).getReg(),
I.getOperand(1),
I.getOperand(2), MIB);
3088 I.eraseFromParent();
3092 case TargetOpcode::G_SADDE:
3093 case TargetOpcode::G_UADDE:
3094 case TargetOpcode::G_SSUBE:
3095 case TargetOpcode::G_USUBE:
3096 case TargetOpcode::G_SADDO:
3097 case TargetOpcode::G_UADDO:
3098 case TargetOpcode::G_SSUBO:
3099 case TargetOpcode::G_USUBO:
3100 return selectOverflowOp(
I,
MRI);
3102 case TargetOpcode::G_PTRMASK: {
3103 Register MaskReg =
I.getOperand(2).getReg();
3110 I.setDesc(
TII.get(AArch64::ANDXri));
3111 I.getOperand(2).ChangeToImmediate(
3116 case TargetOpcode::G_PTRTOINT:
3117 case TargetOpcode::G_TRUNC: {
3118 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
3119 const LLT SrcTy =
MRI.getType(
I.getOperand(1).getReg());
3121 const Register DstReg =
I.getOperand(0).getReg();
3122 const Register SrcReg =
I.getOperand(1).getReg();
3129 dbgs() <<
"G_TRUNC/G_PTRTOINT input/output on different banks\n");
3133 if (DstRB.
getID() == AArch64::GPRRegBankID) {
3142 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC,
MRI) ||
3143 !RBI.constrainGenericRegister(DstReg, *DstRC,
MRI)) {
3144 LLVM_DEBUG(
dbgs() <<
"Failed to constrain G_TRUNC/G_PTRTOINT\n");
3148 if (DstRC == SrcRC) {
3150 }
else if (Opcode == TargetOpcode::G_TRUNC && DstTy ==
LLT::scalar(32) &&
3154 }
else if (DstRC == &AArch64::GPR32RegClass &&
3155 SrcRC == &AArch64::GPR64RegClass) {
3156 I.getOperand(1).setSubReg(AArch64::sub_32);
3159 dbgs() <<
"Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3163 I.setDesc(
TII.get(TargetOpcode::COPY));
3165 }
else if (DstRB.
getID() == AArch64::FPRRegBankID) {
3168 I.setDesc(
TII.get(AArch64::XTNv4i16));
3178 I.eraseFromParent();
3183 if (Opcode == TargetOpcode::G_PTRTOINT) {
3184 assert(DstTy.
isVector() &&
"Expected an FPR ptrtoint to be a vector");
3185 I.setDesc(
TII.get(TargetOpcode::COPY));
3193 case TargetOpcode::G_ANYEXT: {
3194 if (selectUSMovFromExtend(
I,
MRI))
3197 const Register DstReg =
I.getOperand(0).getReg();
3198 const Register SrcReg =
I.getOperand(1).getReg();
3201 if (RBDst.
getID() != AArch64::GPRRegBankID) {
3203 <<
", expected: GPR\n");
3208 if (RBSrc.
getID() != AArch64::GPRRegBankID) {
3210 <<
", expected: GPR\n");
3214 const unsigned DstSize =
MRI.getType(DstReg).getSizeInBits();
3217 LLVM_DEBUG(
dbgs() <<
"G_ANYEXT operand has no size, not a gvreg?\n");
3221 if (DstSize != 64 && DstSize > 32) {
3223 <<
", expected: 32 or 64\n");
3229 Register ExtSrc =
MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3234 .
addImm(AArch64::sub_32);
3235 I.getOperand(1).setReg(ExtSrc);
3240 case TargetOpcode::G_ZEXT:
3241 case TargetOpcode::G_SEXT_INREG:
3242 case TargetOpcode::G_SEXT: {
3243 if (selectUSMovFromExtend(
I,
MRI))
3246 unsigned Opcode =
I.getOpcode();
3247 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3248 const Register DefReg =
I.getOperand(0).getReg();
3249 Register SrcReg =
I.getOperand(1).getReg();
3250 const LLT DstTy =
MRI.getType(DefReg);
3251 const LLT SrcTy =
MRI.getType(SrcReg);
3257 if (Opcode == TargetOpcode::G_SEXT_INREG)
3258 SrcSize =
I.getOperand(2).getImm();
3264 AArch64::GPRRegBankID &&
3265 "Unexpected ext regbank");
3278 RBI.getRegBank(SrcReg,
MRI,
TRI)->getID() == AArch64::GPRRegBankID;
3279 if (LoadMI && IsGPR) {
3281 unsigned BytesLoaded =
MemOp->getSize().getValue();
3288 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3290 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3291 const Register ZReg = AArch64::WZR;
3292 MIB.
buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg})
3295 MIB.
buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3298 .
addImm(AArch64::sub_32);
3300 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3302 LLVM_DEBUG(
dbgs() <<
"Failed to constrain G_ZEXT destination\n");
3306 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3312 I.eraseFromParent();
3317 if (DstSize == 64) {
3318 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3320 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3326 SrcReg = MIB.
buildInstr(AArch64::SUBREG_TO_REG,
3327 {&AArch64::GPR64RegClass}, {})
3334 ExtI = MIB.
buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3338 }
else if (DstSize <= 32) {
3339 ExtI = MIB.
buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3348 I.eraseFromParent();
3352 case TargetOpcode::G_SITOFP:
3353 case TargetOpcode::G_UITOFP:
3354 case TargetOpcode::G_FPTOSI:
3355 case TargetOpcode::G_FPTOUI: {
3356 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg()),
3357 SrcTy =
MRI.getType(
I.getOperand(1).getReg());
3359 if (NewOpc == Opcode)
3362 I.setDesc(
TII.get(NewOpc));
3369 case TargetOpcode::G_FREEZE:
3372 case TargetOpcode::G_INTTOPTR:
3377 case TargetOpcode::G_BITCAST:
3385 case TargetOpcode::G_SELECT: {
3386 auto &Sel = cast<GSelect>(
I);
3387 const Register CondReg = Sel.getCondReg();
3388 const Register TReg = Sel.getTrueReg();
3389 const Register FReg = Sel.getFalseReg();
3391 if (tryOptSelect(Sel))
3396 Register DeadVReg =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3397 auto TstMI = MIB.
buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3400 if (!emitSelect(Sel.getReg(0), TReg, FReg,
AArch64CC::NE, MIB))
3402 Sel.eraseFromParent();
3405 case TargetOpcode::G_ICMP: {
3407 return selectVectorICmp(
I,
MRI);
3418 emitIntegerCompare(
I.getOperand(2),
I.getOperand(3),
I.getOperand(1), MIB);
3419 emitCSINC(
I.getOperand(0).getReg(), AArch64::WZR,
3420 AArch64::WZR, InvCC, MIB);
3421 I.eraseFromParent();
3425 case TargetOpcode::G_FCMP: {
3428 if (!emitFPCompare(
I.getOperand(2).getReg(),
I.getOperand(3).getReg(), MIB,
3430 !emitCSetForFCmp(
I.getOperand(0).getReg(), Pred, MIB))
3432 I.eraseFromParent();
3435 case TargetOpcode::G_VASTART:
3436 return STI.isTargetDarwin() ? selectVaStartDarwin(
I, MF,
MRI)
3437 : selectVaStartAAPCS(
I, MF,
MRI);
3438 case TargetOpcode::G_INTRINSIC:
3439 return selectIntrinsic(
I,
MRI);
3440 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3441 return selectIntrinsicWithSideEffects(
I,
MRI);
3442 case TargetOpcode::G_IMPLICIT_DEF: {
3443 I.setDesc(
TII.get(TargetOpcode::IMPLICIT_DEF));
3444 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
3445 const Register DstReg =
I.getOperand(0).getReg();
3448 RBI.constrainGenericRegister(DstReg, *DstRC,
MRI);
3451 case TargetOpcode::G_BLOCK_ADDR: {
3453 materializeLargeCMVal(
I,
I.getOperand(1).getBlockAddress(), 0);
3454 I.eraseFromParent();
3457 I.setDesc(
TII.get(AArch64::MOVaddrBA));
3458 auto MovMI =
BuildMI(
MBB,
I,
I.getDebugLoc(),
TII.get(AArch64::MOVaddrBA),
3459 I.getOperand(0).getReg())
3463 I.getOperand(1).getBlockAddress(), 0,
3465 I.eraseFromParent();
3469 case AArch64::G_DUP: {
3475 if (RBI.getRegBank(
I.getOperand(1).getReg(),
MRI,
TRI)->getID() !=
3476 AArch64::GPRRegBankID)
3478 LLT VecTy =
MRI.getType(
I.getOperand(0).getReg());
3480 I.setDesc(
TII.get(AArch64::DUPv8i8gpr));
3482 I.setDesc(
TII.get(AArch64::DUPv16i8gpr));
3484 I.setDesc(
TII.get(AArch64::DUPv4i16gpr));
3486 I.setDesc(
TII.get(AArch64::DUPv8i16gpr));
3491 case TargetOpcode::G_BUILD_VECTOR:
3492 return selectBuildVector(
I,
MRI);
3493 case TargetOpcode::G_MERGE_VALUES:
3495 case TargetOpcode::G_UNMERGE_VALUES:
3497 case TargetOpcode::G_SHUFFLE_VECTOR:
3498 return selectShuffleVector(
I,
MRI);
3499 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3500 return selectExtractElt(
I,
MRI);
3501 case TargetOpcode::G_INSERT_VECTOR_ELT:
3502 return selectInsertElt(
I,
MRI);
3503 case TargetOpcode::G_CONCAT_VECTORS:
3504 return selectConcatVectors(
I,
MRI);
3505 case TargetOpcode::G_JUMP_TABLE:
3506 return selectJumpTable(
I,
MRI);
3507 case TargetOpcode::G_MEMCPY:
3508 case TargetOpcode::G_MEMCPY_INLINE:
3509 case TargetOpcode::G_MEMMOVE:
3510 case TargetOpcode::G_MEMSET:
3511 assert(STI.hasMOPS() &&
"Shouldn't get here without +mops feature");
3512 return selectMOPS(
I,
MRI);
3518bool AArch64InstructionSelector::selectAndRestoreState(
MachineInstr &
I) {
3525bool AArch64InstructionSelector::selectMOPS(
MachineInstr &GI,
3529 case TargetOpcode::G_MEMCPY:
3530 case TargetOpcode::G_MEMCPY_INLINE:
3531 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3533 case TargetOpcode::G_MEMMOVE:
3534 Mopcode = AArch64::MOPSMemoryMovePseudo;
3536 case TargetOpcode::G_MEMSET:
3538 Mopcode = AArch64::MOPSMemorySetPseudo;
3547 const Register DstPtrCopy =
MRI.cloneVirtualRegister(DstPtr.getReg());
3548 const Register SrcValCopy =
MRI.cloneVirtualRegister(SrcOrVal.getReg());
3551 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3552 const auto &SrcValRegClass =
3553 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3556 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass,
MRI);
3557 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass,
MRI);
3558 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass,
MRI);
3568 Register DefDstPtr =
MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3569 Register DefSize =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3571 MIB.
buildInstr(Mopcode, {DefDstPtr, DefSize},
3572 {DstPtrCopy, SizeCopy, SrcValCopy});
3574 Register DefSrcPtr =
MRI.createVirtualRegister(&SrcValRegClass);
3575 MIB.
buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3576 {DstPtrCopy, SrcValCopy, SizeCopy});
3585 assert(
I.getOpcode() == TargetOpcode::G_BRJT &&
"Expected G_BRJT");
3586 Register JTAddr =
I.getOperand(0).getReg();
3587 unsigned JTI =
I.getOperand(1).getIndex();
3590 Register TargetReg =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3591 Register ScratchReg =
MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3594 auto JumpTableInst = MIB.
buildInstr(AArch64::JumpTableDest32,
3595 {TargetReg, ScratchReg}, {JTAddr,
Index})
3596 .addJumpTableIndex(JTI);
3598 MIB.
buildInstr(TargetOpcode::JUMP_TABLE_DEBUG_INFO, {},
3599 {
static_cast<int64_t
>(JTI)});
3601 MIB.
buildInstr(AArch64::BR, {}, {TargetReg});
3602 I.eraseFromParent();
3606bool AArch64InstructionSelector::selectJumpTable(
MachineInstr &
I,
3608 assert(
I.getOpcode() == TargetOpcode::G_JUMP_TABLE &&
"Expected jump table");
3609 assert(
I.getOperand(1).isJTI() &&
"Jump table op should have a JTI!");
3611 Register DstReg =
I.getOperand(0).getReg();
3612 unsigned JTI =
I.getOperand(1).getIndex();
3615 MIB.
buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3618 I.eraseFromParent();
3622bool AArch64InstructionSelector::selectTLSGlobalValue(
3624 if (!STI.isTargetMachO())
3629 const auto &GlobalOp =
I.getOperand(1);
3630 assert(GlobalOp.getOffset() == 0 &&
3631 "Shouldn't have an offset on TLS globals!");
3635 MIB.
buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3638 auto Load = MIB.
buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3639 {LoadGOT.getReg(0)})
3652 RBI.constrainGenericRegister(
I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3654 I.eraseFromParent();
3658bool AArch64InstructionSelector::selectVectorICmp(
3660 Register DstReg =
I.getOperand(0).getReg();
3661 LLT DstTy =
MRI.getType(DstReg);
3662 Register SrcReg =
I.getOperand(2).getReg();
3663 Register Src2Reg =
I.getOperand(3).getReg();
3664 LLT SrcTy =
MRI.getType(SrcReg);
3689 unsigned PredIdx = 0;
3690 bool SwapOperands =
false;
3705 SwapOperands =
true;
3709 SwapOperands =
true;
3719 SwapOperands =
true;
3723 SwapOperands =
true;
3733 static const unsigned OpcTable[4][4][9] = {
3741 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
3742 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
3743 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
3744 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
3745 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
3746 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
3752 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
3753 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
3754 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
3755 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
3756 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
3757 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
3763 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
3764 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
3765 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
3766 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
3767 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
3768 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
3777 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
3778 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
3779 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
3791 unsigned EltIdx =
Log2_32(SrcEltSize / 8);
3792 unsigned NumEltsIdx =
Log2_32(NumElts / 2);
3793 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
3801 getRegClassForTypeOnBank(SrcTy, VecRB,
true);
3803 LLVM_DEBUG(
dbgs() <<
"Could not determine source register class.\n");
3807 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
3809 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
3824 RBI.constrainGenericRegister(DstReg, *SrcRC,
MRI);
3825 I.eraseFromParent();
3829MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3832 auto Undef = MIRBuilder.
buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3834 auto BuildFn = [&](
unsigned SubregIndex) {
3838 .addImm(SubregIndex);
3846 return BuildFn(AArch64::bsub);
3848 return BuildFn(AArch64::hsub);
3850 return BuildFn(AArch64::ssub);
3852 return BuildFn(AArch64::dsub);
3859AArch64InstructionSelector::emitNarrowVector(
Register DstReg,
Register SrcReg,
3862 LLT DstTy =
MRI.getType(DstReg);
3864 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(SrcReg,
MRI,
TRI));
3865 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3872 if (
SubReg != AArch64::ssub &&
SubReg != AArch64::dsub) {
3878 .addReg(SrcReg, 0,
SubReg);
3879 RBI.constrainGenericRegister(DstReg, *RC,
MRI);
3883bool AArch64InstructionSelector::selectMergeValues(
3885 assert(
I.getOpcode() == TargetOpcode::G_MERGE_VALUES &&
"unexpected opcode");
3886 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
3887 const LLT SrcTy =
MRI.getType(
I.getOperand(1).getReg());
3891 if (
I.getNumOperands() != 3)
3898 Register DstReg =
I.getOperand(0).getReg();
3899 Register Src1Reg =
I.getOperand(1).getReg();
3900 Register Src2Reg =
I.getOperand(2).getReg();
3901 auto Tmp = MIB.
buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3902 MachineInstr *InsMI = emitLaneInsert(std::nullopt, Tmp.getReg(0), Src1Reg,
3907 Src2Reg, 1, RB, MIB);
3912 I.eraseFromParent();
3916 if (RB.
getID() != AArch64::GPRRegBankID)
3922 auto *DstRC = &AArch64::GPR64RegClass;
3923 Register SubToRegDef =
MRI.createVirtualRegister(DstRC);
3925 TII.get(TargetOpcode::SUBREG_TO_REG))
3928 .
addUse(
I.getOperand(1).getReg())
3929 .
addImm(AArch64::sub_32);
3930 Register SubToRegDef2 =
MRI.createVirtualRegister(DstRC);
3933 TII.get(TargetOpcode::SUBREG_TO_REG))
3936 .
addUse(
I.getOperand(2).getReg())
3937 .
addImm(AArch64::sub_32);
3939 *
BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::BFMXri))
3940 .
addDef(
I.getOperand(0).getReg())
3948 I.eraseFromParent();
3953 const unsigned EltSize) {
3958 CopyOpc = AArch64::DUPi8;
3959 ExtractSubReg = AArch64::bsub;
3962 CopyOpc = AArch64::DUPi16;
3963 ExtractSubReg = AArch64::hsub;
3966 CopyOpc = AArch64::DUPi32;
3967 ExtractSubReg = AArch64::ssub;
3970 CopyOpc = AArch64::DUPi64;
3971 ExtractSubReg = AArch64::dsub;
3975 LLVM_DEBUG(
dbgs() <<
"Elt size '" << EltSize <<
"' unsupported.\n");
3981MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3982 std::optional<Register> DstReg,
const RegisterBank &DstRB,
LLT ScalarTy,
3985 unsigned CopyOpc = 0;
3986 unsigned ExtractSubReg = 0;
3989 dbgs() <<
"Couldn't determine lane copy opcode for instruction.\n");
3994 getRegClassForTypeOnBank(ScalarTy, DstRB,
true);
3996 LLVM_DEBUG(
dbgs() <<
"Could not determine destination register class.\n");
4001 const LLT &VecTy =
MRI.getType(VecReg);
4003 getRegClassForTypeOnBank(VecTy, VecRB,
true);
4005 LLVM_DEBUG(
dbgs() <<
"Could not determine source register class.\n");
4012 DstReg =
MRI.createVirtualRegister(DstRC);
4015 auto Copy = MIRBuilder.
buildInstr(TargetOpcode::COPY, {*DstReg}, {})
4016 .addReg(VecReg, 0, ExtractSubReg);
4017 RBI.constrainGenericRegister(*DstReg, *DstRC,
MRI);
4026 VecTy.
getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
4027 if (!ScalarToVector)
4033 MIRBuilder.
buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
4037 RBI.constrainGenericRegister(*DstReg, *DstRC,
MRI);
4041bool AArch64InstructionSelector::selectExtractElt(
4043 assert(
I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
4044 "unexpected opcode!");
4045 Register DstReg =
I.getOperand(0).getReg();
4046 const LLT NarrowTy =
MRI.getType(DstReg);
4047 const Register SrcReg =
I.getOperand(1).getReg();
4048 const LLT WideTy =
MRI.getType(SrcReg);
4051 "source register size too small!");
4052 assert(!NarrowTy.
isVector() &&
"cannot extract vector into vector!");
4056 assert(LaneIdxOp.
isReg() &&
"Lane index operand was not a register?");
4058 if (RBI.getRegBank(DstReg,
MRI,
TRI)->getID() != AArch64::FPRRegBankID) {
4067 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4071 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
4076 I.eraseFromParent();
4080bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4082 unsigned NumElts =
I.getNumOperands() - 1;
4083 Register SrcReg =
I.getOperand(NumElts).getReg();
4084 const LLT NarrowTy =
MRI.getType(
I.getOperand(0).getReg());
4085 const LLT SrcTy =
MRI.getType(SrcReg);
4087 assert(NarrowTy.
isVector() &&
"Expected an unmerge into vectors");
4089 LLVM_DEBUG(
dbgs() <<
"Unexpected vector type for vec split unmerge");
4096 *RBI.getRegBank(
I.getOperand(0).getReg(),
MRI,
TRI);
4097 for (
unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4098 Register Dst =
I.getOperand(OpIdx).getReg();
4100 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4104 I.eraseFromParent();
4108bool AArch64InstructionSelector::selectUnmergeValues(
MachineInstr &
I,
4110 assert(
I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
4111 "unexpected opcode");
4114 if (RBI.getRegBank(
I.getOperand(0).getReg(),
MRI,
TRI)->getID() !=
4115 AArch64::FPRRegBankID ||
4116 RBI.getRegBank(
I.getOperand(1).getReg(),
MRI,
TRI)->getID() !=
4117 AArch64::FPRRegBankID) {
4118 LLVM_DEBUG(
dbgs() <<
"Unmerging vector-to-gpr and scalar-to-scalar "
4119 "currently unsupported.\n");
4125 unsigned NumElts =
I.getNumOperands() - 1;
4126 Register SrcReg =
I.getOperand(NumElts).getReg();
4127 const LLT NarrowTy =
MRI.getType(
I.getOperand(0).getReg());
4128 const LLT WideTy =
MRI.getType(SrcReg);
4131 "can only unmerge from vector or s128 types!");
4133 "source register size too small!");
4136 return selectSplitVectorUnmerge(
I,
MRI);
4140 unsigned CopyOpc = 0;
4141 unsigned ExtractSubReg = 0;
4152 unsigned NumInsertRegs = NumElts - 1;
4164 *RBI.getRegBank(SrcReg,
MRI,
TRI));
4168 assert(Found &&
"expected to find last operand's subeg idx");
4169 for (
unsigned Idx = 0;
Idx < NumInsertRegs; ++
Idx) {
4170 Register ImpDefReg =
MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4172 *
BuildMI(
MBB,
I,
I.getDebugLoc(),
TII.get(TargetOpcode::IMPLICIT_DEF),
4176 Register InsertReg =
MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4179 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4196 Register CopyTo =
I.getOperand(0).getReg();
4197 auto FirstCopy = MIB.
buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4198 .addReg(InsertRegs[0], 0, ExtractSubReg);
4202 unsigned LaneIdx = 1;
4203 for (
Register InsReg : InsertRegs) {
4204 Register CopyTo =
I.getOperand(LaneIdx).getReg();
4217 MRI.getRegClassOrNull(
I.getOperand(1).getReg());
4223 RBI.constrainGenericRegister(CopyTo, *RC,
MRI);
4224 I.eraseFromParent();
4228bool AArch64InstructionSelector::selectConcatVectors(
4230 assert(
I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4231 "Unexpected opcode");
4232 Register Dst =
I.getOperand(0).getReg();
4233 Register Op1 =
I.getOperand(1).getReg();
4234 Register Op2 =
I.getOperand(2).getReg();
4235 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4238 I.eraseFromParent();
4243AArch64InstructionSelector::emitConstantPoolEntry(
const Constant *CPVal,
4252MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4260 RC = &AArch64::FPR128RegClass;
4261 Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;
4264 RC = &AArch64::FPR64RegClass;
4265 Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;
4268 RC = &AArch64::FPR32RegClass;
4269 Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;
4272 RC = &AArch64::FPR16RegClass;
4273 Opc = AArch64::LDRHui;
4276 LLVM_DEBUG(
dbgs() <<
"Could not load from constant pool of type "
4282 auto &MF = MIRBuilder.
getMF();
4283 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4284 if (IsTiny && (
Size == 16 ||
Size == 8 ||
Size == 4)) {
4286 LoadMI = &*MIRBuilder.
buildInstr(Opc, {RC}, {}).addConstantPoolIndex(CPIdx);
4289 MIRBuilder.
buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4292 LoadMI = &*MIRBuilder.
buildInstr(Opc, {RC}, {Adrp})
4293 .addConstantPoolIndex(
4309static std::pair<unsigned, unsigned>
4311 unsigned Opc, SubregIdx;
4312 if (RB.
getID() == AArch64::GPRRegBankID) {
4314 Opc = AArch64::INSvi8gpr;
4315 SubregIdx = AArch64::bsub;
4316 }
else if (EltSize == 16) {
4317 Opc = AArch64::INSvi16gpr;
4318 SubregIdx = AArch64::ssub;
4319 }
else if (EltSize == 32) {
4320 Opc = AArch64::INSvi32gpr;
4321 SubregIdx = AArch64::ssub;
4322 }
else if (EltSize == 64) {
4323 Opc = AArch64::INSvi64gpr;
4324 SubregIdx = AArch64::dsub;
4330 Opc = AArch64::INSvi8lane;
4331 SubregIdx = AArch64::bsub;
4332 }
else if (EltSize == 16) {
4333 Opc = AArch64::INSvi16lane;
4334 SubregIdx = AArch64::hsub;
4335 }
else if (EltSize == 32) {
4336 Opc = AArch64::INSvi32lane;
4337 SubregIdx = AArch64::ssub;
4338 }
else if (EltSize == 64) {
4339 Opc = AArch64::INSvi64lane;
4340 SubregIdx = AArch64::dsub;
4345 return std::make_pair(Opc, SubregIdx);
4349 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4351 const ComplexRendererFns &RenderFns)
const {
4352 assert(Opcode &&
"Expected an opcode?");
4354 "Function should only be used to produce selected instructions!");
4355 auto MI = MIRBuilder.
buildInstr(Opcode, DstOps, SrcOps);
4357 for (
auto &Fn : *RenderFns)
4364 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4368 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4369 auto Ty =
MRI.getType(
LHS.getReg());
4372 assert((
Size == 32 ||
Size == 64) &&
"Expected a 32-bit or 64-bit type only");
4373 bool Is32Bit =
Size == 32;
4376 if (
auto Fns = selectArithImmed(RHS))
4377 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {
LHS},
4381 if (
auto Fns = selectNegArithImmed(RHS))
4382 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {
LHS},
4386 if (
auto Fns = selectArithExtendedRegister(RHS))
4387 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {
LHS},
4391 if (
auto Fns = selectShiftedRegister(RHS))
4392 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {
LHS},
4394 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {
LHS,
RHS},
4402 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4403 {{AArch64::ADDXri, AArch64::ADDWri},
4404 {AArch64::ADDXrs, AArch64::ADDWrs},
4405 {AArch64::ADDXrr, AArch64::ADDWrr},
4406 {AArch64::SUBXri, AArch64::SUBWri},
4407 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4408 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4415 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4416 {{AArch64::ADDSXri, AArch64::ADDSWri},
4417 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4418 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4419 {AArch64::SUBSXri, AArch64::SUBSWri},
4420 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4421 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4428 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4429 {{AArch64::SUBSXri, AArch64::SUBSWri},
4430 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4431 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4432 {AArch64::ADDSXri, AArch64::ADDSWri},
4433 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4434 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4441 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4443 bool Is32Bit = (
MRI->getType(
LHS.getReg()).getSizeInBits() == 32);
4444 static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr};
4445 return emitInstr(OpcTable[Is32Bit], {Dst}, {
LHS,
RHS}, MIRBuilder);
4452 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4454 bool Is32Bit = (
MRI->getType(
LHS.getReg()).getSizeInBits() == 32);
4455 static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr};
4456 return emitInstr(OpcTable[Is32Bit], {Dst}, {
LHS,
RHS}, MIRBuilder);
4463 bool Is32Bit = (
MRI.getType(
LHS.getReg()).getSizeInBits() == 32);
4464 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4465 return emitADDS(
MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4471 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4475 bool Is32Bit = (
RegSize == 32);
4476 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4477 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4478 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4482 int64_t
Imm = ValAndVReg->Value.getSExtValue();
4485 auto TstMI = MIRBuilder.
buildInstr(OpcTable[0][Is32Bit], {Ty}, {
LHS});
4492 if (
auto Fns = selectLogicalShiftedRegister(RHS))
4493 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {
LHS}, MIRBuilder, Fns);
4494 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {
LHS,
RHS}, MIRBuilder);
4497MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4500 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected LHS and RHS to be registers!");
4507 assert((
Size == 32 ||
Size == 64) &&
"Expected a 32-bit or 64-bit LHS/RHS?");
4509 if (
auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4511 auto Dst =
MRI.cloneVirtualRegister(
LHS.getReg());
4512 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4515MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4519 LLT Ty =
MRI.getType(Dst);
4521 "Expected a 32-bit scalar register?");
4523 const Register ZReg = AArch64::WZR;
4528 return emitCSINC(Dst, ZReg, ZReg, InvCC1,
4534 emitCSINC(Def1Reg, ZReg, ZReg, InvCC1, MIRBuilder);
4535 emitCSINC(Def2Reg, ZReg, ZReg, InvCC2, MIRBuilder);
4536 auto OrMI = MIRBuilder.
buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4541MachineInstr *AArch64InstructionSelector::emitFPCompare(
4543 std::optional<CmpInst::Predicate> Pred)
const {
4545 LLT Ty =
MRI.getType(LHS);
4549 assert(OpSize == 16 || OpSize == 32 || OpSize == 64);
4560 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4564 ShouldUseImm =
true;
4568 unsigned CmpOpcTbl[2][3] = {
4569 {AArch64::FCMPHrr, AArch64::FCMPSrr, AArch64::FCMPDrr},
4570 {AArch64::FCMPHri, AArch64::FCMPSri, AArch64::FCMPDri}};
4572 CmpOpcTbl[ShouldUseImm][OpSize == 16 ? 0 : (OpSize == 32 ? 1 : 2)];
4584MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4593 const LLT Op1Ty =
MRI.getType(Op1);
4594 const LLT Op2Ty =
MRI.getType(Op2);
4596 if (Op1Ty != Op2Ty) {
4597 LLVM_DEBUG(
dbgs() <<
"Could not do vector concat of differing vector tys");
4600 assert(Op1Ty.
isVector() &&
"Expected a vector for vector concat");
4603 LLVM_DEBUG(
dbgs() <<
"Vector concat not supported for full size vectors");
4619 emitScalarToVector(ScalarTy.
getSizeInBits(), DstRC, Op1, MIRBuilder);
4621 emitScalarToVector(ScalarTy.
getSizeInBits(), DstRC, Op2, MIRBuilder);
4622 if (!WidenedOp1 || !WidenedOp2) {
4623 LLVM_DEBUG(
dbgs() <<
"Could not emit a vector from scalar value");
4628 unsigned InsertOpc, InsSubRegIdx;
4629 std::tie(InsertOpc, InsSubRegIdx) =
4633 Dst =
MRI.createVirtualRegister(DstRC);
4654 Size =
TRI.getRegSizeInBits(*RC);
4656 Size =
MRI.getType(Dst).getSizeInBits();
4658 assert(
Size <= 64 &&
"Expected 64 bits or less only!");
4659 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4660 unsigned Opc = OpcTable[
Size == 64];
4661 auto CSINC = MIRBuilder.
buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
4669 unsigned Opcode =
I.getOpcode();
4673 bool NeedsNegatedCarry =
4674 (Opcode == TargetOpcode::G_USUBE || Opcode == TargetOpcode::G_SSUBE);
4684 if (SrcMI ==
I.getPrevNode()) {
4685 if (
auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(SrcMI)) {
4686 bool ProducesNegatedCarry = CarrySrcMI->isSub();
4687 if (NeedsNegatedCarry == ProducesNegatedCarry &&
4688 CarrySrcMI->isUnsigned() &&
4689 CarrySrcMI->getCarryOutReg() == CarryReg &&
4690 selectAndRestoreState(*SrcMI))
4695 Register DeadReg =
MRI->createVirtualRegister(&AArch64::GPR32RegClass);
4697 if (NeedsNegatedCarry) {
4700 return emitInstr(AArch64::SUBSWrr, {DeadReg}, {ZReg, CarryReg}, MIB);
4704 auto Fns = select12BitValueWithLeftShift(1);
4705 return emitInstr(AArch64::SUBSWri, {DeadReg}, {CarryReg}, MIB, Fns);
4708bool AArch64InstructionSelector::selectOverflowOp(
MachineInstr &
I,
4710 auto &CarryMI = cast<GAddSubCarryOut>(
I);
4712 if (
auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(&
I)) {
4714 emitCarryIn(
I, CarryInMI->getCarryInReg());
4718 auto OpAndCC = emitOverflowOp(
I.getOpcode(), CarryMI.getDstReg(),
4719 CarryMI.getLHS(), CarryMI.getRHS(), MIB);
4721 Register CarryOutReg = CarryMI.getCarryOutReg();
4724 if (!
MRI.use_nodbg_empty(CarryOutReg)) {
4730 emitCSINC(CarryOutReg, ZReg, ZReg,
4731 getInvertedCondCode(OpAndCC.second), MIB);
4734 I.eraseFromParent();
4738std::pair<MachineInstr *, AArch64CC::CondCode>
4739AArch64InstructionSelector::emitOverflowOp(
unsigned Opcode,
Register Dst,
4746 case TargetOpcode::G_SADDO:
4747 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::VS);
4748 case TargetOpcode::G_UADDO:
4749 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::HS);
4750 case TargetOpcode::G_SSUBO:
4751 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::VS);
4752 case TargetOpcode::G_USUBO:
4753 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::LO);
4754 case TargetOpcode::G_SADDE:
4755 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::VS);
4756 case TargetOpcode::G_UADDE:
4757 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::HS);
4758 case TargetOpcode::G_SSUBE:
4759 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::VS);
4760 case TargetOpcode::G_USUBE:
4761 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::LO);
4781 unsigned Depth = 0) {
4782 if (!
MRI.hasOneNonDBGUse(Val))
4786 if (isa<GAnyCmp>(ValDef)) {
4788 MustBeFirst =
false;
4794 if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
4795 bool IsOR = Opcode == TargetOpcode::G_OR;
4807 if (MustBeFirstL && MustBeFirstR)
4813 if (!CanNegateL && !CanNegateR)
4817 CanNegate = WillNegate && CanNegateL && CanNegateR;
4820 MustBeFirst = !CanNegate;
4822 assert(Opcode == TargetOpcode::G_AND &&
"Must be G_AND");
4825 MustBeFirst = MustBeFirstL || MustBeFirstR;
4832MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
4838 LLT OpTy =
MRI.getType(LHS);
4840 std::optional<ValueAndVReg>
C;
4844 if (
C &&
C->Value.ult(32))
4845 CCmpOpc = OpTy.
getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi;
4847 CCmpOpc = OpTy.
getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
4853 assert(STI.hasFullFP16() &&
"Expected Full FP16 for fp16 comparisons");
4854 CCmpOpc = AArch64::FCCMPHrr;
4857 CCmpOpc = AArch64::FCCMPSrr;
4860 CCmpOpc = AArch64::FCCMPDrr;
4870 if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi)
4871 CCmp.
addImm(
C->Value.getZExtValue());
4879MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
4886 if (
auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {
4892 if (isa<GICmp>(Cmp)) {
4903 ExtraCmp = emitFPCompare(LHS, RHS, MIB,
CC);
4914 auto Dst =
MRI.cloneVirtualRegister(LHS);
4915 if (isa<GICmp>(Cmp))
4916 return emitSUBS(Dst,
Cmp->getOperand(2),
Cmp->getOperand(3), MIB);
4917 return emitFPCompare(
Cmp->getOperand(2).getReg(),
4918 Cmp->getOperand(3).getReg(), MIB);
4923 assert(
MRI.hasOneNonDBGUse(Val) &&
"Valid conjunction/disjunction tree");
4925 bool IsOR = Opcode == TargetOpcode::G_OR;
4931 assert(ValidL &&
"Valid conjunction/disjunction tree");
4938 assert(ValidR &&
"Valid conjunction/disjunction tree");
4943 assert(!MustBeFirstR &&
"Valid conjunction/disjunction tree");
4952 bool NegateAfterAll;
4953 if (Opcode == TargetOpcode::G_OR) {
4956 assert(CanNegateR &&
"at least one side must be negatable");
4957 assert(!MustBeFirstR &&
"invalid conjunction/disjunction tree");
4961 NegateAfterR =
true;
4964 NegateR = CanNegateR;
4965 NegateAfterR = !CanNegateR;
4968 NegateAfterAll = !Negate;
4970 assert(Opcode == TargetOpcode::G_AND &&
4971 "Valid conjunction/disjunction tree");
4972 assert(!Negate &&
"Valid conjunction/disjunction tree");
4976 NegateAfterR =
false;
4977 NegateAfterAll =
false;
4993MachineInstr *AArch64InstructionSelector::emitConjunction(
4995 bool DummyCanNegate;
4996 bool DummyMustBeFirst;
5003bool AArch64InstructionSelector::tryOptSelectConjunction(
GSelect &SelI,
5015bool AArch64InstructionSelector::tryOptSelect(
GSelect &
I) {
5039 if (!
MRI.hasOneNonDBGUse(CondDefReg)) {
5041 for (
const MachineInstr &UI :
MRI.use_nodbg_instructions(CondDefReg)) {
5044 if (UI.getOpcode() != TargetOpcode::G_SELECT)
5050 unsigned CondOpc = CondDef->
getOpcode();
5051 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
5052 if (tryOptSelectConjunction(
I, *CondDef))
5058 if (CondOpc == TargetOpcode::G_ICMP) {
5086 emitSelect(
I.getOperand(0).getReg(),
I.getOperand(2).getReg(),
5087 I.getOperand(3).getReg(), CondCode, MIB);
5088 I.eraseFromParent();
5092MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
5096 "Unexpected MachineOperand");
5133 return emitCMN(LHS, RHSDef->
getOperand(2), MIRBuilder);
5144 LHSDef->
getOpcode() == TargetOpcode::G_AND) {
5147 if (!ValAndVReg || ValAndVReg->Value != 0)
5157bool AArch64InstructionSelector::selectShuffleVector(
5159 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
5160 Register Src1Reg =
I.getOperand(1).getReg();
5161 const LLT Src1Ty =
MRI.getType(Src1Reg);
5162 Register Src2Reg =
I.getOperand(2).getReg();
5163 const LLT Src2Ty =
MRI.getType(Src2Reg);
5174 LLVM_DEBUG(
dbgs() <<
"Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
5181 for (
int Val : Mask) {
5184 Val = Val < 0 ? 0 : Val;
5185 for (
unsigned Byte = 0;
Byte < BytesPerElt; ++
Byte) {
5203 emitVectorConcat(std::nullopt, Src1Reg, Src2Reg, MIB);
5210 IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
5214 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
5220 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
5221 RBI.constrainGenericRegister(
Copy.getReg(0), AArch64::FPR64RegClass,
MRI);
5222 I.eraseFromParent();
5230 auto TBL2 = MIB.
buildInstr(AArch64::TBLv16i8Two, {
I.getOperand(0)},
5233 I.eraseFromParent();
5237MachineInstr *AArch64InstructionSelector::emitLaneInsert(
5247 DstReg =
MRI.createVirtualRegister(DstRC);
5249 unsigned EltSize =
MRI.getType(EltReg).getSizeInBits();
5252 if (RB.
getID() == AArch64::FPRRegBankID) {
5253 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
5254 InsElt = MIRBuilder.
buildInstr(Opc, {*DstReg}, {SrcReg})
5256 .
addUse(InsSub->getOperand(0).getReg())
5259 InsElt = MIRBuilder.
buildInstr(Opc, {*DstReg}, {SrcReg})
5268bool AArch64InstructionSelector::selectUSMovFromExtend(
5270 if (
MI.getOpcode() != TargetOpcode::G_SEXT &&
5271 MI.getOpcode() != TargetOpcode::G_ZEXT &&
5272 MI.getOpcode() != TargetOpcode::G_ANYEXT)
5274 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SEXT;
5275 const Register DefReg =
MI.getOperand(0).getReg();
5276 const LLT DstTy =
MRI.getType(DefReg);
5279 if (DstSize != 32 && DstSize != 64)
5283 MI.getOperand(1).getReg(),
MRI);
5289 const LLT &VecTy =
MRI.getType(Src0);
5292 const MachineInstr *ScalarToVector = emitScalarToVector(
5293 VecTy.
getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
5294 assert(ScalarToVector &&
"Didn't expect emitScalarToVector to fail!");
5300 Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
5302 Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
5304 Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
5306 Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
5308 Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
5317 if (DstSize == 64 && !IsSigned) {
5318 Register NewReg =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
5319 MIB.
buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
5320 ExtI = MIB.
buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
5323 .
addImm(AArch64::sub_32);
5324 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
MRI);
5326 ExtI = MIB.
buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
5329 MI.eraseFromParent();
5333bool AArch64InstructionSelector::selectInsertElt(
MachineInstr &
I,
5335 assert(
I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
5338 Register DstReg =
I.getOperand(0).getReg();
5339 const LLT DstTy =
MRI.getType(DstReg);
5343 Register EltReg =
I.getOperand(2).getReg();
5344 const LLT EltTy =
MRI.getType(EltReg);
5346 if (EltSize < 8 || EltSize > 64)
5351 Register IdxReg =
I.getOperand(3).getReg();
5355 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
5358 Register SrcReg =
I.getOperand(1).getReg();
5361 if (VecSize < 128) {
5365 emitScalarToVector(VecSize, &AArch64::FPR128RegClass, SrcReg, MIB);
5375 emitLaneInsert(std::nullopt, SrcReg, EltReg, LaneIdx, EltRB, MIB);
5377 if (VecSize < 128) {
5388 I.eraseFromParent();
5392MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8(
5395 if (DstSize == 128) {
5396 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5398 Op = AArch64::MOVIv16b_ns;
5400 Op = AArch64::MOVIv8b_ns;
5407 auto Mov = Builder.
buildInstr(
Op, {Dst}, {}).addImm(Val);
5414MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm16(
5419 if (DstSize == 128) {
5420 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5422 Op = Inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16;
5424 Op = Inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16;
5444MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm32(
5449 if (DstSize == 128) {
5450 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5452 Op = Inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32;
5454 Op = Inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32;
5480MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64(
5484 if (DstSize == 128) {
5485 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5487 Op = AArch64::MOVIv2d_ns;
5489 Op = AArch64::MOVID;
5495 auto Mov = Builder.
buildInstr(
Op, {Dst}, {}).addImm(Val);
5502MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm321s(
5507 if (DstSize == 128) {
5508 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5510 Op = Inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl;
5512 Op = Inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl;
5532MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(
5536 bool IsWide =
false;
5537 if (DstSize == 128) {
5538 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5540 Op = AArch64::FMOVv4f32_ns;
5543 Op = AArch64::FMOVv2f32_ns;
5552 Op = AArch64::FMOVv2f64_ns;
5556 auto Mov = Builder.
buildInstr(
Op, {Dst}, {}).addImm(Val);
5561bool AArch64InstructionSelector::selectIndexedExtLoad(
5563 auto &ExtLd = cast<GIndexedAnyExtLoad>(
MI);
5565 Register WriteBack = ExtLd.getWritebackReg();
5568 LLT Ty =
MRI.getType(Dst);
5570 unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits();
5571 bool IsPre = ExtLd.isPre();
5572 bool IsSExt = isa<GIndexedSExtLoad>(ExtLd);
5573 bool InsertIntoXReg =
false;
5581 if (MemSizeBits == 8) {
5584 Opc = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
5586 Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
5587 NewLdDstTy = IsDst64 ? s64 : s32;
5589 Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
5590 InsertIntoXReg = IsDst64;
5593 }
else if (MemSizeBits == 16) {
5596 Opc = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
5598 Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
5599 NewLdDstTy = IsDst64 ? s64 : s32;
5601 Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
5602 InsertIntoXReg = IsDst64;
5605 }
else if (MemSizeBits == 32) {
5607 Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
5610 Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
5611 InsertIntoXReg = IsDst64;
5618 if (RBI.getRegBank(Dst,
MRI,
TRI)->getID() == AArch64::FPRRegBankID)
5626 .addImm(Cst->getSExtValue());
5631 if (InsertIntoXReg) {
5633 auto SubToReg = MIB.
buildInstr(TargetOpcode::SUBREG_TO_REG, {Dst}, {})
5636 .
addImm(AArch64::sub_32);
5637 RBI.constrainGenericRegister(SubToReg.getReg(0), AArch64::GPR64RegClass,
5643 MI.eraseFromParent();
5648bool AArch64InstructionSelector::selectIndexedLoad(
MachineInstr &
MI,
5650 auto &Ld = cast<GIndexedLoad>(
MI);
5652 Register WriteBack = Ld.getWritebackReg();
5655 assert(
MRI.getType(Dst).getSizeInBits() <= 128 &&
5656 "Unexpected type for indexed load");
5657 unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes();
5659 if (MemSize <
MRI.getType(Dst).getSizeInBytes())
5660 return selectIndexedExtLoad(
MI,
MRI);
5664 static constexpr unsigned GPROpcodes[] = {
5665 AArch64::LDRBBpre, AArch64::LDRHHpre, AArch64::LDRWpre,
5667 static constexpr unsigned FPROpcodes[] = {
5668 AArch64::LDRBpre, AArch64::LDRHpre, AArch64::LDRSpre, AArch64::LDRDpre,
5670 if (RBI.getRegBank(Dst,
MRI,
TRI)->getID() == AArch64::FPRRegBankID)
5671 Opc = FPROpcodes[
Log2_32(MemSize)];
5673 Opc = GPROpcodes[
Log2_32(MemSize)];
5675 static constexpr unsigned GPROpcodes[] = {
5676 AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,
5678 static constexpr unsigned FPROpcodes[] = {
5679 AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,
5680 AArch64::LDRDpost, AArch64::LDRQpost};
5681 if (RBI.getRegBank(Dst,
MRI,
TRI)->getID() == AArch64::FPRRegBankID)
5682 Opc = FPROpcodes[
Log2_32(MemSize)];
5684 Opc = GPROpcodes[
Log2_32(MemSize)];
5690 MIB.
buildInstr(Opc, {WriteBack, Dst}, {
Base}).addImm(Cst->getSExtValue());
5693 MI.eraseFromParent();
5697bool AArch64InstructionSelector::selectIndexedStore(
GIndexedStore &
I,
5703 LLT ValTy =
MRI.getType(Val);
5708 static constexpr unsigned GPROpcodes[] = {
5709 AArch64::STRBBpre, AArch64::STRHHpre, AArch64::STRWpre,
5711 static constexpr unsigned FPROpcodes[] = {
5712 AArch64::STRBpre, AArch64::STRHpre, AArch64::STRSpre, AArch64::STRDpre,
5715 if (RBI.getRegBank(Val,
MRI,
TRI)->getID() == AArch64::FPRRegBankID)
5720 static constexpr unsigned GPROpcodes[] = {
5721 AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,
5723 static constexpr unsigned FPROpcodes[] = {
5724 AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost,
5725 AArch64::STRDpost, AArch64::STRQpost};
5727 if (RBI.getRegBank(Val,
MRI,
TRI)->getID() == AArch64::FPRRegBankID)
5737 MIB.
buildInstr(Opc, {Dst}, {Val,
Base}).addImm(Cst->getSExtValue());
5738 Str.cloneMemRefs(
I);
5740 I.eraseFromParent();
5748 LLT DstTy =
MRI.getType(Dst);
5751 if (DstSize == 128) {
5753 MIRBuilder.
buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
5758 if (DstSize == 64) {
5761 .
buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
5764 .addReg(Mov.getReg(0), 0, AArch64::dsub);
5765 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass,
MRI);
5797 if (
auto *NewOp = TryMOVIWithBits(DefBits))
5801 auto TryWithFNeg = [&](
APInt DefBits,
int NumBits,
5805 APInt NegBits(DstSize, 0);
5806 unsigned NumElts = DstSize / NumBits;
5807 for (
unsigned i = 0; i < NumElts; i++)
5808 NegBits |= Neg << (NumBits * i);
5809 NegBits = DefBits ^ NegBits;
5813 if (
auto *NewOp = TryMOVIWithBits(NegBits)) {
5814 Register NewDst =
MRI.createVirtualRegister(&AArch64::FPR128RegClass);
5816 return MIRBuilder.
buildInstr(NegOpc, {Dst}, {NewDst});
5821 if ((R = TryWithFNeg(DefBits, 32, AArch64::FNEGv4f32)) ||
5822 (R = TryWithFNeg(DefBits, 64, AArch64::FNEGv2f64)) ||
5823 (STI.hasFullFP16() &&
5824 (R = TryWithFNeg(DefBits, 16, AArch64::FNEGv8f16))))
5830 LLVM_DEBUG(
dbgs() <<
"Could not generate cp load for constant vector!");
5834 auto Copy = MIRBuilder.
buildCopy(Dst, CPLoad->getOperand(0));
5835 RBI.constrainGenericRegister(
5836 Dst, *
MRI.getRegClass(CPLoad->getOperand(0).getReg()),
MRI);
5840bool AArch64InstructionSelector::tryOptConstantBuildVec(
5842 assert(
I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5844 assert(DstSize <= 128 &&
"Unexpected build_vec type!");
5850 for (
unsigned Idx = 1;
Idx <
I.getNumOperands(); ++
Idx) {
5856 const_cast<ConstantInt *
>(OpMI->getOperand(1).getCImm()));
5857 else if ((OpMI =
getOpcodeDef(TargetOpcode::G_FCONSTANT,
5858 I.getOperand(
Idx).getReg(),
MRI)))
5860 const_cast<ConstantFP *
>(OpMI->getOperand(1).getFPImm()));
5865 if (!emitConstantVector(
I.getOperand(0).getReg(), CV, MIB,
MRI))
5867 I.eraseFromParent();
5871bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5877 Register Dst =
I.getOperand(0).getReg();
5878 Register EltReg =
I.getOperand(1).getReg();
5879 LLT EltTy =
MRI.getType(EltReg);
5887 return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(), MRI);
5895 getRegClassForTypeOnBank(
MRI.getType(Dst), DstRB);
5900 auto SubregToReg = MIB.
buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
5904 I.eraseFromParent();
5906 return RBI.constrainGenericRegister(Dst, *DstRC,
MRI);
5909bool AArch64InstructionSelector::selectBuildVector(
MachineInstr &
I,
5911 assert(
I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5914 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
5915 const LLT EltTy =
MRI.getType(
I.getOperand(1).getReg());
5918 if (tryOptConstantBuildVec(
I, DstTy,
MRI))
5920 if (tryOptBuildVecToSubregToReg(
I,
MRI))
5923 if (EltSize != 8 && EltSize != 16 && EltSize != 32 && EltSize != 64)
5930 I.getOperand(1).getReg(), MIB);
5940 for (
unsigned i = 2, e = DstSize / EltSize + 1; i <
e; ++i) {
5943 Register OpReg =
I.getOperand(i).getReg();
5945 if (!getOpcodeDef<GImplicitDef>(OpReg,
MRI)) {
5946 PrevMI = &*emitLaneInsert(std::nullopt, DstVec, OpReg, i - 1, RB, MIB);
5953 if (DstSize < 128) {
5956 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec,
MRI,
TRI));
5959 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5967 if (
SubReg != AArch64::ssub &&
SubReg != AArch64::dsub) {
5968 LLVM_DEBUG(
dbgs() <<
"Unsupported destination size! (" << DstSize
5974 Register DstReg =
I.getOperand(0).getReg();
5976 MIB.
buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0,
SubReg);
5979 RBI.constrainGenericRegister(DstReg, *RC,
MRI);
5997 if (PrevMI == ScalarToVec && DstReg.
isVirtual()) {
5999 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec,
MRI,
TRI));
6000 RBI.constrainGenericRegister(DstReg, *RC,
MRI);
6004 I.eraseFromParent();
6008bool AArch64InstructionSelector::selectVectorLoadIntrinsic(
unsigned Opc,
6011 assert(
I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
6012 assert(Opc &&
"Expected an opcode?");
6013 assert(NumVecs > 1 && NumVecs < 5 &&
"Only support 2, 3, or 4 vectors");
6015 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6018 "Destination must be 64 bits or 128 bits?");
6019 unsigned SubReg =
Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
6020 auto Ptr =
I.getOperand(
I.getNumOperands() - 1).getReg();
6021 assert(
MRI.getType(
Ptr).isPointer() &&
"Expected a pointer type?");
6023 Load.cloneMemRefs(
I);
6025 Register SelectedLoadDst =
Load->getOperand(0).getReg();
6026 for (
unsigned Idx = 0;
Idx < NumVecs; ++
Idx) {
6027 auto Vec = MIB.
buildInstr(TargetOpcode::COPY, {
I.getOperand(
Idx)}, {})
6028 .addReg(SelectedLoadDst, 0,
SubReg +
Idx);
6037bool AArch64InstructionSelector::selectVectorLoadLaneIntrinsic(
6039 assert(
I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
6040 assert(Opc &&
"Expected an opcode?");
6041 assert(NumVecs > 1 && NumVecs < 5 &&
"Only support 2, 3, or 4 vectors");
6043 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6046 auto FirstSrcRegIt =
I.operands_begin() + NumVecs + 1;
6048 std::transform(FirstSrcRegIt, FirstSrcRegIt + NumVecs, Regs.
begin(),
6049 [](
auto MO) { return MO.getReg(); });
6053 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
6068 .
addImm(LaneNo->getZExtValue())
6070 Load.cloneMemRefs(
I);
6072 Register SelectedLoadDst =
Load->getOperand(0).getReg();
6073 unsigned SubReg = AArch64::qsub0;
6074 for (
unsigned Idx = 0;
Idx < NumVecs; ++
Idx) {
6075 auto Vec = MIB.
buildInstr(TargetOpcode::COPY,
6076 {Narrow ?
DstOp(&AArch64::FPR128RegClass)
6079 .addReg(SelectedLoadDst, 0,
SubReg +
Idx);
6084 !emitNarrowVector(
I.getOperand(
Idx).getReg(), WideReg, MIB,
MRI))
6090void AArch64InstructionSelector::selectVectorStoreIntrinsic(
MachineInstr &
I,
6094 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6098 std::transform(
I.operands_begin() + 1,
I.operands_begin() + 1 + NumVecs,
6099 Regs.
begin(), [](
auto MO) { return MO.getReg(); });
6108bool AArch64InstructionSelector::selectVectorStoreLaneIntrinsic(
6111 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6115 std::transform(
I.operands_begin() + 1,
I.operands_begin() + 1 + NumVecs,
6116 Regs.
begin(), [](
auto MO) { return MO.getReg(); });
6120 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
6133 .
addImm(LaneNo->getZExtValue())
6140bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
6143 unsigned IntrinID = cast<GIntrinsic>(
I).getIntrinsicID();
6154 case Intrinsic::aarch64_ldxp:
6155 case Intrinsic::aarch64_ldaxp: {
6157 IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
6158 {
I.getOperand(0).
getReg(),
I.getOperand(1).getReg()},
6164 case Intrinsic::aarch64_neon_ld1x2: {
6165 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6168 Opc = AArch64::LD1Twov8b;
6170 Opc = AArch64::LD1Twov16b;
6172 Opc = AArch64::LD1Twov4h;
6174 Opc = AArch64::LD1Twov8h;
6176 Opc = AArch64::LD1Twov2s;
6178 Opc = AArch64::LD1Twov4s;
6180 Opc = AArch64::LD1Twov2d;
6181 else if (Ty ==
S64 || Ty == P0)
6182 Opc = AArch64::LD1Twov1d;
6185 selectVectorLoadIntrinsic(Opc, 2,
I);
6188 case Intrinsic::aarch64_neon_ld1x3: {
6189 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6192 Opc = AArch64::LD1Threev8b;
6194 Opc = AArch64::LD1Threev16b;
6196 Opc = AArch64::LD1Threev4h;
6198 Opc = AArch64::LD1Threev8h;
6200 Opc = AArch64::LD1Threev2s;
6202 Opc = AArch64::LD1Threev4s;
6204 Opc = AArch64::LD1Threev2d;
6205 else if (Ty ==
S64 || Ty == P0)
6206 Opc = AArch64::LD1Threev1d;
6209 selectVectorLoadIntrinsic(Opc, 3,
I);
6212 case Intrinsic::aarch64_neon_ld1x4: {
6213 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6216 Opc = AArch64::LD1Fourv8b;
6218 Opc = AArch64::LD1Fourv16b;
6220 Opc = AArch64::LD1Fourv4h;
6222 Opc = AArch64::LD1Fourv8h;
6224 Opc = AArch64::LD1Fourv2s;
6226 Opc = AArch64::LD1Fourv4s;
6228 Opc = AArch64::LD1Fourv2d;
6229 else if (Ty ==
S64 || Ty == P0)
6230 Opc = AArch64::LD1Fourv1d;
6233 selectVectorLoadIntrinsic(Opc, 4,
I);
6236 case Intrinsic::aarch64_neon_ld2: {
6237 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6240 Opc = AArch64::LD2Twov8b;
6242 Opc = AArch64::LD2Twov16b;
6244 Opc = AArch64::LD2Twov4h;
6246 Opc = AArch64::LD2Twov8h;
6248 Opc = AArch64::LD2Twov2s;
6250 Opc = AArch64::LD2Twov4s;
6252 Opc = AArch64::LD2Twov2d;
6253 else if (Ty ==
S64 || Ty == P0)
6254 Opc = AArch64::LD1Twov1d;
6257 selectVectorLoadIntrinsic(Opc, 2,
I);
6260 case Intrinsic::aarch64_neon_ld2lane: {
6261 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6264 Opc = AArch64::LD2i8;
6266 Opc = AArch64::LD2i16;
6268 Opc = AArch64::LD2i32;
6271 Opc = AArch64::LD2i64;
6274 if (!selectVectorLoadLaneIntrinsic(Opc, 2,
I))
6278 case Intrinsic::aarch64_neon_ld2r: {
6279 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6282 Opc = AArch64::LD2Rv8b;
6284 Opc = AArch64::LD2Rv16b;
6286 Opc = AArch64::LD2Rv4h;
6288 Opc = AArch64::LD2Rv8h;
6290 Opc = AArch64::LD2Rv2s;
6292 Opc = AArch64::LD2Rv4s;
6294 Opc = AArch64::LD2Rv2d;
6295 else if (Ty ==
S64 || Ty == P0)
6296 Opc = AArch64::LD2Rv1d;
6299 selectVectorLoadIntrinsic(Opc, 2,
I);
6302 case Intrinsic::aarch64_neon_ld3: {
6303 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6306 Opc = AArch64::LD3Threev8b;
6308 Opc = AArch64::LD3Threev16b;
6310 Opc = AArch64::LD3Threev4h;
6312 Opc = AArch64::LD3Threev8h;
6314 Opc = AArch64::LD3Threev2s;
6316 Opc = AArch64::LD3Threev4s;
6318 Opc = AArch64::LD3Threev2d;
6319 else if (Ty ==
S64 || Ty == P0)
6320 Opc = AArch64::LD1Threev1d;
6323 selectVectorLoadIntrinsic(Opc, 3,
I);
6326 case Intrinsic::aarch64_neon_ld3lane: {
6327 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6330 Opc = AArch64::LD3i8;
6332 Opc = AArch64::LD3i16;
6334 Opc = AArch64::LD3i32;
6337 Opc = AArch64::LD3i64;
6340 if (!selectVectorLoadLaneIntrinsic(Opc, 3,
I))
6344 case Intrinsic::aarch64_neon_ld3r: {
6345 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6348 Opc = AArch64::LD3Rv8b;
6350 Opc = AArch64::LD3Rv16b;
6352 Opc = AArch64::LD3Rv4h;
6354 Opc = AArch64::LD3Rv8h;
6356 Opc = AArch64::LD3Rv2s;
6358 Opc = AArch64::LD3Rv4s;
6360 Opc = AArch64::LD3Rv2d;
6361 else if (Ty ==
S64 || Ty == P0)
6362 Opc = AArch64::LD3Rv1d;
6365 selectVectorLoadIntrinsic(Opc, 3,
I);
6368 case Intrinsic::aarch64_neon_ld4: {
6369 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6372 Opc = AArch64::LD4Fourv8b;
6374 Opc = AArch64::LD4Fourv16b;
6376 Opc = AArch64::LD4Fourv4h;
6378 Opc = AArch64::LD4Fourv8h;
6380 Opc = AArch64::LD4Fourv2s;
6382 Opc = AArch64::LD4Fourv4s;
6384 Opc = AArch64::LD4Fourv2d;
6385 else if (Ty ==
S64 || Ty == P0)
6386 Opc = AArch64::LD1Fourv1d;
6389 selectVectorLoadIntrinsic(Opc, 4,
I);
6392 case Intrinsic::aarch64_neon_ld4lane: {
6393 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6396 Opc = AArch64::LD4i8;
6398 Opc = AArch64::LD4i16;
6400 Opc = AArch64::LD4i32;
6403 Opc = AArch64::LD4i64;
6406 if (!selectVectorLoadLaneIntrinsic(Opc, 4,
I))
6410 case Intrinsic::aarch64_neon_ld4r: {
6411 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6414 Opc = AArch64::LD4Rv8b;
6416 Opc = AArch64::LD4Rv16b;
6418 Opc = AArch64::LD4Rv4h;
6420 Opc = AArch64::LD4Rv8h;
6422 Opc = AArch64::LD4Rv2s;
6424 Opc = AArch64::LD4Rv4s;
6426 Opc = AArch64::LD4Rv2d;
6427 else if (Ty ==
S64 || Ty == P0)
6428 Opc = AArch64::LD4Rv1d;
6431 selectVectorLoadIntrinsic(Opc, 4,
I);
6434 case Intrinsic::aarch64_neon_st1x2: {
6435 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6438 Opc = AArch64::ST1Twov8b;
6440 Opc = AArch64::ST1Twov16b;
6442 Opc = AArch64::ST1Twov4h;
6444 Opc = AArch64::ST1Twov8h;
6446 Opc = AArch64::ST1Twov2s;
6448 Opc = AArch64::ST1Twov4s;
6450 Opc = AArch64::ST1Twov2d;
6451 else if (Ty ==
S64 || Ty == P0)
6452 Opc = AArch64::ST1Twov1d;
6455 selectVectorStoreIntrinsic(
I, 2, Opc);
6458 case Intrinsic::aarch64_neon_st1x3: {
6459 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6462 Opc = AArch64::ST1Threev8b;
6464 Opc = AArch64::ST1Threev16b;
6466 Opc = AArch64::ST1Threev4h;
6468 Opc = AArch64::ST1Threev8h;
6470 Opc = AArch64::ST1Threev2s;
6472 Opc = AArch64::ST1Threev4s;
6474 Opc = AArch64::ST1Threev2d;
6475 else if (Ty ==
S64 || Ty == P0)
6476 Opc = AArch64::ST1Threev1d;
6479 selectVectorStoreIntrinsic(
I, 3, Opc);
6482 case Intrinsic::aarch64_neon_st1x4: {
6483 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6486 Opc = AArch64::ST1Fourv8b;
6488 Opc = AArch64::ST1Fourv16b;
6490 Opc = AArch64::ST1Fourv4h;
6492 Opc = AArch64::ST1Fourv8h;
6494 Opc = AArch64::ST1Fourv2s;
6496 Opc = AArch64::ST1Fourv4s;
6498 Opc = AArch64::ST1Fourv2d;
6499 else if (Ty ==
S64 || Ty == P0)
6500 Opc = AArch64::ST1Fourv1d;
6503 selectVectorStoreIntrinsic(
I, 4, Opc);
6506 case Intrinsic::aarch64_neon_st2: {
6507 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6510 Opc = AArch64::ST2Twov8b;
6512 Opc = AArch64::ST2Twov16b;
6514 Opc = AArch64::ST2Twov4h;
6516 Opc = AArch64::ST2Twov8h;
6518 Opc = AArch64::ST2Twov2s;
6520 Opc = AArch64::ST2Twov4s;
6522 Opc = AArch64::ST2Twov2d;
6523 else if (Ty ==
S64 || Ty == P0)
6524 Opc = AArch64::ST1Twov1d;
6527 selectVectorStoreIntrinsic(
I, 2, Opc);
6530 case Intrinsic::aarch64_neon_st3: {
6531 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6534 Opc = AArch64::ST3Threev8b;
6536 Opc = AArch64::ST3Threev16b;
6538 Opc = AArch64::ST3Threev4h;
6540 Opc = AArch64::ST3Threev8h;
6542 Opc = AArch64::ST3Threev2s;
6544 Opc = AArch64::ST3Threev4s;
6546 Opc = AArch64::ST3Threev2d;
6547 else if (Ty ==
S64 || Ty == P0)
6548 Opc = AArch64::ST1Threev1d;
6551 selectVectorStoreIntrinsic(
I, 3, Opc);
6554 case Intrinsic::aarch64_neon_st4: {
6555 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6558 Opc = AArch64::ST4Fourv8b;
6560 Opc = AArch64::ST4Fourv16b;
6562 Opc = AArch64::ST4Fourv4h;
6564 Opc = AArch64::ST4Fourv8h;
6566 Opc = AArch64::ST4Fourv2s;
6568 Opc = AArch64::ST4Fourv4s;
6570 Opc = AArch64::ST4Fourv2d;
6571 else if (Ty ==
S64 || Ty == P0)
6572 Opc = AArch64::ST1Fourv1d;
6575 selectVectorStoreIntrinsic(
I, 4, Opc);
6578 case Intrinsic::aarch64_neon_st2lane: {
6579 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6582 Opc = AArch64::ST2i8;
6584 Opc = AArch64::ST2i16;
6586 Opc = AArch64::ST2i32;
6589 Opc = AArch64::ST2i64;
6592 if (!selectVectorStoreLaneIntrinsic(
I, 2, Opc))
6596 case Intrinsic::aarch64_neon_st3lane: {
6597 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6600 Opc = AArch64::ST3i8;
6602 Opc = AArch64::ST3i16;
6604 Opc = AArch64::ST3i32;
6607 Opc = AArch64::ST3i64;
6610 if (!selectVectorStoreLaneIntrinsic(
I, 3, Opc))
6614 case Intrinsic::aarch64_neon_st4lane: {
6615 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6618 Opc = AArch64::ST4i8;
6620 Opc = AArch64::ST4i16;
6622 Opc = AArch64::ST4i32;
6625 Opc = AArch64::ST4i64;
6628 if (!selectVectorStoreLaneIntrinsic(
I, 4, Opc))
6632 case Intrinsic::aarch64_mops_memset_tag: {
6645 Register DstDef =
I.getOperand(0).getReg();
6647 Register DstUse =
I.getOperand(2).getReg();
6648 Register ValUse =
I.getOperand(3).getReg();
6649 Register SizeUse =
I.getOperand(4).getReg();
6656 auto Memset = MIB.
buildInstr(AArch64::MOPSMemorySetTaggingPseudo,
6657 {DstDef, SizeDef}, {DstUse, SizeUse, ValUse});
6664 I.eraseFromParent();
6668bool AArch64InstructionSelector::selectIntrinsic(
MachineInstr &
I,
6670 unsigned IntrinID = cast<GIntrinsic>(
I).getIntrinsicID();
6675 case Intrinsic::aarch64_crypto_sha1h: {
6676 Register DstReg =
I.getOperand(0).getReg();
6677 Register SrcReg =
I.getOperand(2).getReg();
6680 if (
MRI.getType(DstReg).getSizeInBits() != 32 ||
6681 MRI.getType(SrcReg).getSizeInBits() != 32)
6686 if (RBI.getRegBank(SrcReg,
MRI,
TRI)->getID() != AArch64::FPRRegBankID) {
6687 SrcReg =
MRI.createVirtualRegister(&AArch64::FPR32RegClass);
6691 RBI.constrainGenericRegister(
I.getOperand(2).getReg(),
6692 AArch64::GPR32RegClass,
MRI);
6695 if (RBI.getRegBank(DstReg,
MRI,
TRI)->getID() != AArch64::FPRRegBankID)
6696 DstReg =
MRI.createVirtualRegister(&AArch64::FPR32RegClass);
6699 auto SHA1Inst = MIB.
buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
6703 if (DstReg !=
I.getOperand(0).getReg()) {
6707 RBI.constrainGenericRegister(
I.getOperand(0).getReg(),
6708 AArch64::GPR32RegClass,
MRI);
6711 I.eraseFromParent();
6714 case Intrinsic::frameaddress:
6715 case Intrinsic::returnaddress: {
6719 unsigned Depth =
I.getOperand(2).getImm();
6720 Register DstReg =
I.getOperand(0).getReg();
6721 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass,
MRI);
6723 if (
Depth == 0 && IntrinID == Intrinsic::returnaddress) {
6724 if (!MFReturnAddr) {
6729 MF,
TII, AArch64::LR, AArch64::GPR64RegClass,
I.getDebugLoc());
6732 if (STI.hasPAuth()) {
6733 MIB.
buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
6740 I.eraseFromParent();
6747 Register NextFrame =
MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
6749 MIB.
buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0);
6751 FrameAddr = NextFrame;
6754 if (IntrinID == Intrinsic::frameaddress)
6759 if (STI.hasPAuth()) {
6760 Register TmpReg =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
6761 MIB.
buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
6762 MIB.
buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
6771 I.eraseFromParent();
6774 case Intrinsic::swift_async_context_addr:
6775 auto Sub = MIB.
buildInstr(AArch64::SUBXri, {
I.getOperand(0).getReg()},
6783 I.eraseFromParent();
6790AArch64InstructionSelector::selectShiftA_32(
const MachineOperand &Root)
const {
6792 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6793 return std::nullopt;
6794 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
6799AArch64InstructionSelector::selectShiftB_32(
const MachineOperand &Root)
const {
6801 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6802 return std::nullopt;
6808AArch64InstructionSelector::selectShiftA_64(
const MachineOperand &Root)
const {
6810 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
6811 return std::nullopt;
6812 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
6817AArch64InstructionSelector::selectShiftB_64(
const MachineOperand &Root)
const {
6819 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
6820 return std::nullopt;
6831AArch64InstructionSelector::select12BitValueWithLeftShift(
6834 if (Immed >> 12 == 0) {
6836 }
else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
6838 Immed = Immed >> 12;
6840 return std::nullopt;
6853AArch64InstructionSelector::selectArithImmed(
MachineOperand &Root)
const {
6860 if (MaybeImmed == std::nullopt)
6861 return std::nullopt;
6862 return select12BitValueWithLeftShift(*MaybeImmed);
6868AArch64InstructionSelector::selectNegArithImmed(
MachineOperand &Root)
const {
6872 return std::nullopt;
6874 if (MaybeImmed == std::nullopt)
6875 return std::nullopt;
6882 return std::nullopt;
6887 if (
MRI.getType(Root.
getReg()).getSizeInBits() == 32)
6890 Immed = ~Immed + 1ULL;
6892 if (Immed & 0xFFFFFFFFFF000000ULL)
6893 return std::nullopt;
6895 Immed &= 0xFFFFFFULL;
6896 return select12BitValueWithLeftShift(Immed);
6902bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
6906 if (
MRI.hasOneNonDBGUse(DefReg) ||
6907 MI.getParent()->getParent()->getFunction().hasOptSize())
6912 if (!STI.hasAddrLSLFast())
6918 return all_of(
MRI.use_nodbg_instructions(DefReg),
6934AArch64InstructionSelector::selectExtendedSHL(
6936 unsigned SizeInBytes,
bool WantsExt)
const {
6937 assert(
Base.isReg() &&
"Expected base to be a register operand");
6938 assert(
Offset.isReg() &&
"Expected offset to be a register operand");
6943 unsigned OffsetOpc = OffsetInst->
getOpcode();
6944 bool LookedThroughZExt =
false;
6945 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
6947 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
6948 return std::nullopt;
6952 LookedThroughZExt =
true;
6954 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
6955 return std::nullopt;
6958 int64_t LegalShiftVal =
Log2_32(SizeInBytes);
6959 if (LegalShiftVal == 0)
6960 return std::nullopt;
6961 if (!isWorthFoldingIntoExtendedReg(*OffsetInst,
MRI))
6962 return std::nullopt;
6973 if (OffsetOpc == TargetOpcode::G_SHL)
6974 return std::nullopt;
6980 return std::nullopt;
6985 int64_t ImmVal = ValAndVReg->Value.getSExtValue();
6989 if (OffsetOpc == TargetOpcode::G_MUL) {
6990 if (!llvm::has_single_bit<uint32_t>(ImmVal))
6991 return std::nullopt;
6997 if ((ImmVal & 0x7) != ImmVal)
6998 return std::nullopt;
7002 if (ImmVal != LegalShiftVal)
7003 return std::nullopt;
7005 unsigned SignExtend = 0;
7009 if (!LookedThroughZExt) {
7011 auto Ext = getExtendTypeForInst(*ExtInst,
MRI,
true);
7013 return std::nullopt;
7018 return std::nullopt;
7024 OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
7034 MIB.addImm(SignExtend);
7048AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
7051 return std::nullopt;
7068 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd,
MRI))
7069 return std::nullopt;
7075 return selectExtendedSHL(Root, PtrAdd->
getOperand(1),
7089AArch64InstructionSelector::selectAddrModeRegisterOffset(
7095 if (Gep->
getOpcode() != TargetOpcode::G_PTR_ADD)
7096 return std::nullopt;
7102 return std::nullopt;
7122AArch64InstructionSelector::selectAddrModeXRO(
MachineOperand &Root,
7123 unsigned SizeInBytes)
const {
7126 return std::nullopt;
7130 return std::nullopt;
7148 unsigned Scale =
Log2_32(SizeInBytes);
7149 int64_t ImmOff = ValAndVReg->Value.getSExtValue();
7153 if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
7154 ImmOff < (0x1000 << Scale))
7155 return std::nullopt;
7160 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
7164 if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
7170 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
7171 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
7176 return std::nullopt;
7180 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
7186 return selectAddrModeRegisterOffset(Root);
7196AArch64InstructionSelector::selectAddrModeWRO(
MachineOperand &Root,
7197 unsigned SizeInBytes)
const {
7202 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd,
MRI))
7203 return std::nullopt;
7224 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->
getOperand(0),
7233 if (!isWorthFoldingIntoExtendedReg(*OffsetInst,
MRI))
7234 return std::nullopt;
7238 getExtendTypeForInst(*OffsetInst,
MRI,
true);
7240 return std::nullopt;
7245 AArch64::GPR32RegClass, MIB);
7252 MIB.addImm(SignExtend);
7263AArch64InstructionSelector::selectAddrModeUnscaled(
MachineOperand &Root,
7264 unsigned Size)
const {
7269 return std::nullopt;
7271 if (!isBaseWithConstantOffset(Root,
MRI))
7272 return std::nullopt;
7277 if (!OffImm.
isReg())
7278 return std::nullopt;
7280 if (
RHS->getOpcode() != TargetOpcode::G_CONSTANT)
7281 return std::nullopt;
7285 return std::nullopt;
7288 if (RHSC >= -256 && RHSC < 256) {
7295 return std::nullopt;
7299AArch64InstructionSelector::tryFoldAddLowIntoImm(
MachineInstr &RootDef,
7302 if (RootDef.
getOpcode() != AArch64::G_ADD_LOW)
7303 return std::nullopt;
7306 return std::nullopt;
7311 return std::nullopt;
7315 return std::nullopt;
7319 return std::nullopt;
7321 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.
getTarget());
7326 MIB.addGlobalAddress(GV,
Offset,
7336AArch64InstructionSelector::selectAddrModeIndexed(
MachineOperand &Root,
7337 unsigned Size)
const {
7342 return std::nullopt;
7345 if (RootDef->
getOpcode() == TargetOpcode::G_FRAME_INDEX) {
7355 auto OpFns = tryFoldAddLowIntoImm(*RootDef,
Size,
MRI);
7360 if (isBaseWithConstantOffset(Root,
MRI)) {
7368 if ((RHSC & (
Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
7369 if (LHSDef->
getOpcode() == TargetOpcode::G_FRAME_INDEX)
7384 if (selectAddrModeUnscaled(Root,
Size))
7385 return std::nullopt;
7396 switch (
MI.getOpcode()) {
7399 case TargetOpcode::G_SHL:
7401 case TargetOpcode::G_LSHR:
7403 case TargetOpcode::G_ASHR:
7405 case TargetOpcode::G_ROTR:
7413AArch64InstructionSelector::selectShiftedRegister(
MachineOperand &Root,
7414 bool AllowROR)
const {
7416 return std::nullopt;
7425 return std::nullopt;
7427 return std::nullopt;
7428 if (!isWorthFoldingIntoExtendedReg(*ShiftInst,
MRI))
7429 return std::nullopt;
7435 return std::nullopt;
7442 unsigned NumBits =
MRI.getType(ShiftReg).getSizeInBits();
7443 unsigned Val = *Immed & (NumBits - 1);
7452 unsigned Opc =
MI.getOpcode();
7455 if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
7457 if (Opc == TargetOpcode::G_SEXT)
7458 Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
7460 Size =
MI.getOperand(2).getImm();
7461 assert(
Size != 64 &&
"Extend from 64 bits?");
7474 if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
7475 unsigned Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
7476 assert(
Size != 64 &&
"Extend from 64 bits?");
7491 if (Opc != TargetOpcode::G_AND)
7510Register AArch64InstructionSelector::moveScalarRegClass(
7513 auto Ty =
MRI.getType(Reg);
7522 return Copy.getReg(0);
7528AArch64InstructionSelector::selectArithExtendedRegister(
7531 return std::nullopt;
7540 return std::nullopt;
7542 if (!isWorthFoldingIntoExtendedReg(*RootDef,
MRI))
7543 return std::nullopt;
7546 if (RootDef->
getOpcode() == TargetOpcode::G_SHL) {
7551 return std::nullopt;
7552 ShiftVal = *MaybeShiftVal;
7554 return std::nullopt;
7559 return std::nullopt;
7560 Ext = getExtendTypeForInst(*ExtDef,
MRI);
7562 return std::nullopt;
7566 Ext = getExtendTypeForInst(*RootDef,
MRI);
7568 return std::nullopt;
7577 if (isDef32(*ExtInst))
7578 return std::nullopt;
7585 ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);
7589 MIB.addImm(getArithExtendImm(Ext, ShiftVal));
7594AArch64InstructionSelector::selectExtractHigh(
MachineOperand &Root)
const {
7596 return std::nullopt;
7601 while (Extract && Extract->MI->
getOpcode() == TargetOpcode::G_BITCAST &&
7602 STI.isLittleEndian())
7606 return std::nullopt;
7608 if (Extract->MI->
getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
7614 if (Extract->MI->
getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) {
7619 LaneIdx->Value.getSExtValue() == 1) {
7625 return std::nullopt;
7632 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7633 "Expected G_CONSTANT");
7634 std::optional<int64_t> CstVal =
7636 assert(CstVal &&
"Expected constant value");
7640void AArch64InstructionSelector::renderLogicalImm32(
7642 assert(
I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7643 "Expected G_CONSTANT");
7644 uint64_t CstVal =
I.getOperand(1).getCImm()->getZExtValue();
7649void AArch64InstructionSelector::renderLogicalImm64(
7651 assert(
I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7652 "Expected G_CONSTANT");
7653 uint64_t CstVal =
I.getOperand(1).getCImm()->getZExtValue();
7661 assert(
MI.getOpcode() == TargetOpcode::G_UBSANTRAP && OpIdx == 0 &&
7662 "Expected G_UBSANTRAP");
7663 MIB.
addImm(
MI.getOperand(0).getImm() | (
'U' << 8));
7669 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7670 "Expected G_FCONSTANT");
7678 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7679 "Expected G_FCONSTANT");
7687 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7688 "Expected G_FCONSTANT");
7693void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
7695 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7696 "Expected G_FCONSTANT");
7704bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
7706 if (!
MI.mayLoadOrStore())
7709 "Expected load/store to have only one mem op!");
7710 return (*
MI.memoperands_begin())->getSize() == NumBytes;
7713bool AArch64InstructionSelector::isDef32(
const MachineInstr &
MI)
const {
7715 if (
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits() != 32)
7722 switch (
MI.getOpcode()) {
7725 case TargetOpcode::COPY:
7726 case TargetOpcode::G_BITCAST:
7727 case TargetOpcode::G_TRUNC:
7728 case TargetOpcode::G_PHI:
7738 assert(
MI.getOpcode() == TargetOpcode::G_PHI &&
"Expected a G_PHI");
7741 assert(DstRB &&
"Expected PHI dst to have regbank assigned");
7752 auto *OpDef =
MRI.getVRegDef(OpReg);
7753 const LLT &Ty =
MRI.getType(OpReg);
7759 if (InsertPt != OpDefBB.
end() && InsertPt->isPHI())
7763 MRI.setRegBank(Copy.getReg(0), *DstRB);
7764 MO.setReg(Copy.getReg(0));
7773 for (
auto &BB : MF) {
7774 for (
auto &
MI : BB) {
7775 if (
MI.getOpcode() == TargetOpcode::G_PHI)
7780 for (
auto *
MI : Phis) {
7802 bool HasGPROp =
false, HasFPROp =
false;
7806 const LLT &Ty =
MRI.getType(MO.getReg());
7816 if (RB->
getID() == AArch64::GPRRegBankID)
7822 if (HasGPROp && HasFPROp)
7832 return new AArch64InstructionSelector(
TM, Subtarget, RBI);
unsigned const MachineRegisterInfo * MRI
static bool isPreferredADD(int64_t ImmOff)
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue CCOp, AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, const SDLoc &DL, SelectionDAG &DAG)
can be transformed to: not (and (not (and (setCC (cmp C)) (setCD (cmp D)))) (and (not (setCA (cmp A))...
static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, AArch64CC::CondCode Predicate)
Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain of CCMP/CFCMP ops.
static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static bool isCMN(SDValue Op, ISD::CondCode CC)
static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC)
Emit expression as a conjunction (a series of CCMP/CFCMP ops).
static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy)
#define GET_GLOBALISEL_PREDICATES_INIT
static std::pair< const TargetRegisterClass *, const TargetRegisterClass * > getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Helper function to get the source and destination register classes for a copy.
#define GET_GLOBALISEL_TEMPORARIES_INIT
static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Convert an IR fp condition code to an AArch64 CC.
static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, MachineRegisterInfo &MRI)
Return a register which can be used as a bit to test in a TB(N)Z.
static unsigned getMinSizeForRegBank(const RegisterBank &RB)
Returns the minimum size the given register bank can hold.
static std::optional< int64_t > getVectorShiftImm(Register Reg, MachineRegisterInfo &MRI)
Returns the element immediate value of a vector shift operand if found.
static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the G_LOAD or G_STORE operation GenericOpc, appropriate for the (value)...
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P)
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV),...
static bool getSubRegForClass(const TargetRegisterClass *RC, const TargetRegisterInfo &TRI, unsigned &SubReg)
Returns the correct subregister to use for a given register class.
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register SrcReg, const TargetRegisterClass *To, unsigned SubReg)
Helper function for selectCopy.
static const TargetRegisterClass * getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits, bool GetAllRegSet=false)
Given a register bank, and size in bits, return the smallest register class that can represent that c...
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, const AArch64RegisterBankInfo &RBI)
static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI)
static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type)
static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI)
Given a shift instruction, return the correct shift type for that instruction.
static bool unsupportedBinOp(const MachineInstr &I, const AArch64RegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const AArch64RegisterInfo &TRI)
Check whether I is a currently unsupported binary operation:
static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst, bool WillNegate, MachineRegisterInfo &MRI, unsigned Depth=0)
Returns true if Val is a tree of AND/OR/CMP operations that can be expressed as a conjunction.
static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, const unsigned EltSize)
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static std::optional< uint64_t > getImmedFromMO(const MachineOperand &Root)
static std::pair< unsigned, unsigned > getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize)
Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given size and RB.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
static std::optional< int64_t > getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI)
Matches and returns the shift immediate value for a SHL instruction given a shift operand.
static void changeFPCCToORAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
This file declares the targeting of the RegisterBankInfo class for AArch64.
static bool isStore(int Opcode)
static bool selectMergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool selectUnmergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file contains constants used for implementing Dwarf debug support.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
const char LLVMTargetMachineRef TM
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static constexpr int Concat[]
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
int getVarArgsStackIndex() const
int getVarArgsGPRIndex() const
unsigned getVarArgsGPRSize() const
This class provides the information for the target register banks.
bool isCallingConvWin64(CallingConv::ID CC) const
APInt bitcastToAPInt() const
Class for arbitrary precision integers.
APInt zext(unsigned width) const
Zero extend to a new width.
uint64_t getZExtValue() const
Get zero extended value.
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
bool isIntPredicate() const
static Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
ConstantFP - Floating Point Values [float, double].
const APFloat & getValueAPF() const
bool isNegative() const
Return true if the sign bit is set.
bool isZero() const
Return true if the value is positive or negative zero.
This is the shared class of boolean and integer constants.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Constant * getSplatValue(bool AllowUndefs=false) const
If all elements of the vector constant have the same value, return that value.
const APInt & getUniqueInteger() const
If C is a constant integer then return its value, otherwise C must be a vector of constant integers,...
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
This class represents an Operation in the Expression.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
Represents indexed stores.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
Register getCondReg() const
Register getFalseReg() const
Register getTrueReg() const
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
bool isEquality() const
Return true if this predicate is either EQ or NE.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
constexpr LLT multiplyElements(int Factor) const
Produce a vector type that is Factor times bigger, preserving the element type.
constexpr bool isPointerVector() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
This is an important class for using LLVM in a threaded context.
TypeSize getValue() const
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setAdjustsStack(bool V)
void setFrameAddressIsTaken(bool T)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineFunction & getMF()
Getter for the function we currently build.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineIRBuilderState & getState()
Getter for the State.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
void setState(const MachineIRBuilderState &NewState)
Setter for the State.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addBlockAddress(const BlockAddress *BA, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
@ MOLoad
The memory access reads data.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
const ConstantInt * getCImm() const
bool isCImm() const
isCImm - Test if this is a MO_CImmediate operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
static MachineOperand CreateBA(const BlockAddress *BA, int64_t Offset, unsigned TargetFlags=0)
const ConstantFP * getFPImm() const
unsigned getPredicate() const
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
T get() const
Returns the value of the specified pointer type.
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwise returns null.
Analysis providing profile information.
Holds all the information related to register banks.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
This class represents the LLVM 'select' instruction.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetLowering * getTargetLowering() const
The instances of the Type class are immutable: once they are created, they are never changed.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static IntegerType * getInt8Ty(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
void changeFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Find the AArch64 condition codes necessary to represent P for a scalar floating point comparison.
std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint8_t encodeAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType9(uint64_t Imm)
static bool isAdvSIMDModImmType4(uint64_t Imm)
static bool isAdvSIMDModImmType5(uint64_t Imm)
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType10(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType9(uint64_t Imm)
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static bool isAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType5(uint64_t Imm)
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static bool isAdvSIMDModImmType10(uint64_t Imm)
static int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType8(uint64_t Imm)
static bool isAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType11(uint64_t Imm)
static bool isAdvSIMDModImmType11(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType6(uint64_t Imm)
static bool isAdvSIMDModImmType8(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType4(uint64_t Imm)
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static bool isAdvSIMDModImmType6(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType1(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType1(uint64_t Imm)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
bool isPreISelGenericOpcode(unsigned Opcode)
Check whether the given Opcode is a generic opcode that is not supposed to appear after ISel.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
bool shouldOptForSize(const MachineBasicBlock &MBB, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
Returns true if the given block should be optimized for size.
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
AtomicOrdering
Atomic ordering for LLVM's memory model.
InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &, AArch64Subtarget &, AArch64RegisterBankInfo &)
DWARFExpression::Operation Op
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Class which stores all the state required in a MachineIRBuilder.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.