44#include "llvm/IR/IntrinsicsAArch64.h"
52#define DEBUG_TYPE "aarch64-isel"
55using namespace MIPatternMatch;
56using namespace AArch64GISelUtils;
65#define GET_GLOBALISEL_PREDICATE_BITSET
66#include "AArch64GenGlobalISel.inc"
67#undef GET_GLOBALISEL_PREDICATE_BITSET
82 InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
87 ProduceNonFlagSettingCondBr =
135 bool tryOptAndIntoCompareBranch(
MachineInstr &AndInst,
bool Invert,
213 bool selectVectorLoadIntrinsic(
unsigned Opc,
unsigned NumVecs,
215 bool selectVectorLoadLaneIntrinsic(
unsigned Opc,
unsigned NumVecs,
217 void selectVectorStoreIntrinsic(
MachineInstr &
I,
unsigned NumVecs,
219 bool selectVectorStoreLaneIntrinsic(
MachineInstr &
I,
unsigned NumVecs,
236 unsigned emitConstantPoolEntry(
const Constant *CPVal,
255 std::optional<CmpInst::Predicate> = std::nullopt)
const;
258 emitInstr(
unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
259 std::initializer_list<llvm::SrcOp> SrcOps,
261 const ComplexRendererFns &RenderFns = std::nullopt)
const;
296 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
317 MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,
339 std::pair<MachineInstr *, AArch64CC::CondCode>
374 ComplexRendererFns selectShiftA_32(
const MachineOperand &Root)
const;
375 ComplexRendererFns selectShiftB_32(
const MachineOperand &Root)
const;
376 ComplexRendererFns selectShiftA_64(
const MachineOperand &Root)
const;
377 ComplexRendererFns selectShiftB_64(
const MachineOperand &Root)
const;
379 ComplexRendererFns select12BitValueWithLeftShift(
uint64_t Immed)
const;
381 ComplexRendererFns selectNegArithImmed(
MachineOperand &Root)
const;
384 unsigned Size)
const;
386 ComplexRendererFns selectAddrModeUnscaled8(
MachineOperand &Root)
const {
387 return selectAddrModeUnscaled(Root, 1);
389 ComplexRendererFns selectAddrModeUnscaled16(
MachineOperand &Root)
const {
390 return selectAddrModeUnscaled(Root, 2);
392 ComplexRendererFns selectAddrModeUnscaled32(
MachineOperand &Root)
const {
393 return selectAddrModeUnscaled(Root, 4);
395 ComplexRendererFns selectAddrModeUnscaled64(
MachineOperand &Root)
const {
396 return selectAddrModeUnscaled(Root, 8);
398 ComplexRendererFns selectAddrModeUnscaled128(
MachineOperand &Root)
const {
399 return selectAddrModeUnscaled(Root, 16);
404 ComplexRendererFns tryFoldAddLowIntoImm(
MachineInstr &RootDef,
unsigned Size,
408 unsigned Size)
const;
410 ComplexRendererFns selectAddrModeIndexed(
MachineOperand &Root)
const {
411 return selectAddrModeIndexed(Root, Width / 8);
418 unsigned SizeInBytes)
const;
426 bool WantsExt)
const;
427 ComplexRendererFns selectAddrModeRegisterOffset(
MachineOperand &Root)
const;
429 unsigned SizeInBytes)
const;
431 ComplexRendererFns selectAddrModeXRO(
MachineOperand &Root)
const {
432 return selectAddrModeXRO(Root, Width / 8);
436 unsigned SizeInBytes)
const;
438 ComplexRendererFns selectAddrModeWRO(
MachineOperand &Root)
const {
439 return selectAddrModeWRO(Root, Width / 8);
443 bool AllowROR =
false)
const;
445 ComplexRendererFns selectArithShiftedRegister(
MachineOperand &Root)
const {
446 return selectShiftedRegister(Root);
449 ComplexRendererFns selectLogicalShiftedRegister(
MachineOperand &Root)
const {
450 return selectShiftedRegister(Root,
true);
460 bool IsLoadStore =
false)
const;
471 ComplexRendererFns selectArithExtendedRegister(
MachineOperand &Root)
const;
476 int OpIdx = -1)
const;
478 int OpIdx = -1)
const;
480 int OpIdx = -1)
const;
484 int OpIdx = -1)
const;
486 int OpIdx = -1)
const;
488 int OpIdx = -1)
const;
491 int OpIdx = -1)
const;
497 bool tryOptSelect(
GSelect &Sel);
504 bool isLoadStoreOfNumBytes(
const MachineInstr &
MI,
unsigned NumBytes)
const;
517 bool ProduceNonFlagSettingCondBr =
false;
526#define GET_GLOBALISEL_PREDICATES_DECL
527#include "AArch64GenGlobalISel.inc"
528#undef GET_GLOBALISEL_PREDICATES_DECL
532#define GET_GLOBALISEL_TEMPORARIES_DECL
533#include "AArch64GenGlobalISel.inc"
534#undef GET_GLOBALISEL_TEMPORARIES_DECL
539#define GET_GLOBALISEL_IMPL
540#include "AArch64GenGlobalISel.inc"
541#undef GET_GLOBALISEL_IMPL
543AArch64InstructionSelector::AArch64InstructionSelector(
546 :
TM(
TM), STI(STI),
TII(*STI.getInstrInfo()),
TRI(*STI.getRegisterInfo()),
549#include
"AArch64GenGlobalISel.inc"
552#include
"AArch64GenGlobalISel.inc"
564 bool GetAllRegSet =
false) {
565 if (RB.
getID() == AArch64::GPRRegBankID) {
567 return GetAllRegSet ? &AArch64::GPR32allRegClass
568 : &AArch64::GPR32RegClass;
570 return GetAllRegSet ? &AArch64::GPR64allRegClass
571 : &AArch64::GPR64RegClass;
573 return &AArch64::XSeqPairsClassRegClass;
577 if (RB.
getID() == AArch64::FPRRegBankID) {
580 return &AArch64::FPR8RegClass;
582 return &AArch64::FPR16RegClass;
584 return &AArch64::FPR32RegClass;
586 return &AArch64::FPR64RegClass;
588 return &AArch64::FPR128RegClass;
600 bool GetAllRegSet =
false) {
601 unsigned RegBankID = RB.
getID();
603 if (RegBankID == AArch64::GPRRegBankID) {
604 if (SizeInBits <= 32)
605 return GetAllRegSet ? &AArch64::GPR32allRegClass
606 : &AArch64::GPR32RegClass;
607 if (SizeInBits == 64)
608 return GetAllRegSet ? &AArch64::GPR64allRegClass
609 : &AArch64::GPR64RegClass;
610 if (SizeInBits == 128)
611 return &AArch64::XSeqPairsClassRegClass;
614 if (RegBankID == AArch64::FPRRegBankID) {
615 switch (SizeInBits) {
619 return &AArch64::FPR8RegClass;
621 return &AArch64::FPR16RegClass;
623 return &AArch64::FPR32RegClass;
625 return &AArch64::FPR64RegClass;
627 return &AArch64::FPR128RegClass;
637 switch (
TRI.getRegSizeInBits(*RC)) {
645 if (RC != &AArch64::FPR32RegClass)
655 dbgs() <<
"Couldn't find appropriate subregister for register class.");
664 switch (RB.
getID()) {
665 case AArch64::GPRRegBankID:
667 case AArch64::FPRRegBankID:
690 const unsigned RegClassIDs[],
692 unsigned NumRegs = Regs.
size();
695 assert(NumRegs >= 2 && NumRegs <= 4 &&
696 "Only support between two and 4 registers in a tuple!");
698 auto *DesiredClass =
TRI->getRegClass(RegClassIDs[NumRegs - 2]);
700 MIB.
buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
701 for (
unsigned I = 0, E = Regs.
size();
I < E; ++
I) {
702 RegSequence.addUse(Regs[
I]);
703 RegSequence.addImm(SubRegs[
I]);
705 return RegSequence.getReg(0);
710 static const unsigned RegClassIDs[] = {
711 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
712 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
713 AArch64::dsub2, AArch64::dsub3};
714 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
719 static const unsigned RegClassIDs[] = {
720 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
721 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
722 AArch64::qsub2, AArch64::qsub3};
723 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
728 auto &
MBB = *
MI.getParent();
730 auto &
MRI = MF.getRegInfo();
736 else if (Root.
isReg()) {
741 Immed = ValAndVReg->Value.getSExtValue();
757 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
764 for (
auto &MO :
I.operands()) {
767 LLVM_DEBUG(
dbgs() <<
"Generic inst non-reg operands are unsupported\n");
775 if (!MO.getReg().isVirtual()) {
776 LLVM_DEBUG(
dbgs() <<
"Generic inst has physical register operand\n");
786 if (PrevOpBank && OpBank != PrevOpBank) {
787 LLVM_DEBUG(
dbgs() <<
"Generic inst operands have different banks\n");
802 case AArch64::GPRRegBankID:
804 switch (GenericOpc) {
805 case TargetOpcode::G_SHL:
806 return AArch64::LSLVWr;
807 case TargetOpcode::G_LSHR:
808 return AArch64::LSRVWr;
809 case TargetOpcode::G_ASHR:
810 return AArch64::ASRVWr;
814 }
else if (OpSize == 64) {
815 switch (GenericOpc) {
816 case TargetOpcode::G_PTR_ADD:
817 return AArch64::ADDXrr;
818 case TargetOpcode::G_SHL:
819 return AArch64::LSLVXr;
820 case TargetOpcode::G_LSHR:
821 return AArch64::LSRVXr;
822 case TargetOpcode::G_ASHR:
823 return AArch64::ASRVXr;
829 case AArch64::FPRRegBankID:
832 switch (GenericOpc) {
833 case TargetOpcode::G_FADD:
834 return AArch64::FADDSrr;
835 case TargetOpcode::G_FSUB:
836 return AArch64::FSUBSrr;
837 case TargetOpcode::G_FMUL:
838 return AArch64::FMULSrr;
839 case TargetOpcode::G_FDIV:
840 return AArch64::FDIVSrr;
845 switch (GenericOpc) {
846 case TargetOpcode::G_FADD:
847 return AArch64::FADDDrr;
848 case TargetOpcode::G_FSUB:
849 return AArch64::FSUBDrr;
850 case TargetOpcode::G_FMUL:
851 return AArch64::FMULDrr;
852 case TargetOpcode::G_FDIV:
853 return AArch64::FDIVDrr;
854 case TargetOpcode::G_OR:
855 return AArch64::ORRv8i8;
872 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
874 case AArch64::GPRRegBankID:
877 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
879 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
881 return isStore ? AArch64::STRWui : AArch64::LDRWui;
883 return isStore ? AArch64::STRXui : AArch64::LDRXui;
886 case AArch64::FPRRegBankID:
889 return isStore ? AArch64::STRBui : AArch64::LDRBui;
891 return isStore ? AArch64::STRHui : AArch64::LDRHui;
893 return isStore ? AArch64::STRSui : AArch64::LDRSui;
895 return isStore ? AArch64::STRDui : AArch64::LDRDui;
897 return isStore ? AArch64::STRQui : AArch64::LDRQui;
911 assert(SrcReg.
isValid() &&
"Expected a valid source register?");
912 assert(To &&
"Destination register class cannot be null");
919 RegOp.
setReg(SubRegCopy.getReg(0));
923 if (!
I.getOperand(0).getReg().isPhysical())
933static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
937 Register DstReg =
I.getOperand(0).getReg();
938 Register SrcReg =
I.getOperand(1).getReg();
952 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
953 SrcSize = DstSize = 32;
970 if (Reg.isPhysical())
972 LLT Ty =
MRI.getType(Reg);
978 RC = getRegClassForTypeOnBank(Ty, RB);
981 dbgs() <<
"Warning: DBG_VALUE operand has unexpected size/bank\n");
994 Register DstReg =
I.getOperand(0).getReg();
995 Register SrcReg =
I.getOperand(1).getReg();
1014 LLVM_DEBUG(
dbgs() <<
"Couldn't determine source register class\n");
1018 unsigned SrcSize =
TRI.getRegSizeInBits(*SrcRC);
1019 unsigned DstSize =
TRI.getRegSizeInBits(*DstRC);
1030 auto Copy = MIB.
buildCopy({DstTempRC}, {SrcReg});
1032 }
else if (SrcSize > DstSize) {
1039 }
else if (DstSize > SrcSize) {
1046 Register PromoteReg =
MRI.createVirtualRegister(PromotionRC);
1048 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1053 RegOp.
setReg(PromoteReg);
1072 if (
I.getOpcode() == TargetOpcode::G_ZEXT) {
1073 I.setDesc(
TII.get(AArch64::COPY));
1074 assert(SrcRegBank.
getID() == AArch64::GPRRegBankID);
1078 I.setDesc(
TII.get(AArch64::COPY));
1093 switch (GenericOpc) {
1094 case TargetOpcode::G_SITOFP:
1095 return AArch64::SCVTFUWSri;
1096 case TargetOpcode::G_UITOFP:
1097 return AArch64::UCVTFUWSri;
1098 case TargetOpcode::G_FPTOSI:
1099 return AArch64::FCVTZSUWSr;
1100 case TargetOpcode::G_FPTOUI:
1101 return AArch64::FCVTZUUWSr;
1106 switch (GenericOpc) {
1107 case TargetOpcode::G_SITOFP:
1108 return AArch64::SCVTFUXSri;
1109 case TargetOpcode::G_UITOFP:
1110 return AArch64::UCVTFUXSri;
1111 case TargetOpcode::G_FPTOSI:
1112 return AArch64::FCVTZSUWDr;
1113 case TargetOpcode::G_FPTOUI:
1114 return AArch64::FCVTZUUWDr;
1124 switch (GenericOpc) {
1125 case TargetOpcode::G_SITOFP:
1126 return AArch64::SCVTFUWDri;
1127 case TargetOpcode::G_UITOFP:
1128 return AArch64::UCVTFUWDri;
1129 case TargetOpcode::G_FPTOSI:
1130 return AArch64::FCVTZSUXSr;
1131 case TargetOpcode::G_FPTOUI:
1132 return AArch64::FCVTZUUXSr;
1137 switch (GenericOpc) {
1138 case TargetOpcode::G_SITOFP:
1139 return AArch64::SCVTFUXDri;
1140 case TargetOpcode::G_UITOFP:
1141 return AArch64::UCVTFUXDri;
1142 case TargetOpcode::G_FPTOSI:
1143 return AArch64::FCVTZSUXDr;
1144 case TargetOpcode::G_FPTOUI:
1145 return AArch64::FCVTZUUXDr;
1164 RBI.getRegBank(True,
MRI,
TRI)->getID() &&
1165 "Expected both select operands to have the same regbank?");
1166 LLT Ty =
MRI.getType(True);
1171 "Expected 32 bit or 64 bit select only?");
1172 const bool Is32Bit =
Size == 32;
1173 if (RBI.getRegBank(True,
MRI,
TRI)->getID() != AArch64::GPRRegBankID) {
1174 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1175 auto FCSel = MIB.
buildInstr(Opc, {Dst}, {True, False}).addImm(
CC);
1181 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1183 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &
CC, &
MRI,
1198 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1215 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1234 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1250 auto TryOptSelectCst = [&Opc, &True, &False, &
CC, Is32Bit, &
MRI,
1256 if (!TrueCst && !FalseCst)
1259 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1260 if (TrueCst && FalseCst) {
1261 int64_t
T = TrueCst->Value.getSExtValue();
1262 int64_t
F = FalseCst->Value.getSExtValue();
1264 if (
T == 0 &&
F == 1) {
1266 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1272 if (
T == 0 &&
F == -1) {
1274 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1282 int64_t
T = TrueCst->Value.getSExtValue();
1285 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1294 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1303 int64_t
F = FalseCst->Value.getSExtValue();
1306 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1313 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1321 Optimized |= TryFoldBinOpIntoSelect(False, True,
false);
1322 Optimized |= TryFoldBinOpIntoSelect(True, False,
true);
1443 assert(Reg.isValid() &&
"Expected valid register!");
1444 bool HasZext =
false;
1446 unsigned Opc =
MI->getOpcode();
1448 if (!
MI->getOperand(0).isReg() ||
1449 !
MRI.hasOneNonDBGUse(
MI->getOperand(0).getReg()))
1456 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1457 Opc == TargetOpcode::G_TRUNC) {
1458 if (Opc == TargetOpcode::G_ZEXT)
1461 Register NextReg =
MI->getOperand(1).getReg();
1463 if (!NextReg.
isValid() || !
MRI.hasOneNonDBGUse(NextReg))
1472 std::optional<uint64_t>
C;
1477 case TargetOpcode::G_AND:
1478 case TargetOpcode::G_XOR: {
1479 TestReg =
MI->getOperand(1).getReg();
1480 Register ConstantReg =
MI->getOperand(2).getReg();
1491 C = VRegAndVal->Value.getZExtValue();
1493 C = VRegAndVal->Value.getSExtValue();
1497 case TargetOpcode::G_ASHR:
1498 case TargetOpcode::G_LSHR:
1499 case TargetOpcode::G_SHL: {
1500 TestReg =
MI->getOperand(1).getReg();
1504 C = VRegAndVal->Value.getSExtValue();
1516 unsigned TestRegSize =
MRI.getType(TestReg).getSizeInBits();
1520 case TargetOpcode::G_AND:
1522 if ((*
C >> Bit) & 1)
1525 case TargetOpcode::G_SHL:
1528 if (*
C <= Bit && (Bit - *
C) < TestRegSize) {
1533 case TargetOpcode::G_ASHR:
1538 if (Bit >= TestRegSize)
1539 Bit = TestRegSize - 1;
1541 case TargetOpcode::G_LSHR:
1543 if ((Bit + *
C) < TestRegSize) {
1548 case TargetOpcode::G_XOR:
1557 if ((*
C >> Bit) & 1)
1576 assert(ProduceNonFlagSettingCondBr &&
1577 "Cannot emit TB(N)Z with speculation tracking!");
1582 LLT Ty =
MRI.getType(TestReg);
1585 assert(Bit < 64 &&
"Bit is too large!");
1589 bool UseWReg =
Bit < 32;
1590 unsigned NecessarySize = UseWReg ? 32 : 64;
1591 if (
Size != NecessarySize)
1592 TestReg = moveScalarRegClass(
1593 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1596 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1597 {AArch64::TBZW, AArch64::TBNZW}};
1598 unsigned Opc = OpcTable[UseWReg][IsNegative];
1605bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1608 assert(AndInst.
getOpcode() == TargetOpcode::G_AND &&
"Expected G_AND only?");
1635 int32_t
Bit = MaybeBit->Value.exactLogBase2();
1642 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1650 assert(ProduceNonFlagSettingCondBr &&
"CBZ does not set flags!");
1652 assert(RBI.getRegBank(CompareReg,
MRI,
TRI)->getID() ==
1653 AArch64::GPRRegBankID &&
1654 "Expected GPRs only?");
1655 auto Ty =
MRI.getType(CompareReg);
1658 assert(Width <= 64 &&
"Expected width to be at most 64?");
1659 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1660 {AArch64::CBNZW, AArch64::CBNZX}};
1661 unsigned Opc = OpcTable[IsNegative][Width == 64];
1662 auto BranchMI = MIB.
buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1667bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1670 assert(
I.getOpcode() == TargetOpcode::G_BRCOND);
1682 I.eraseFromParent();
1686bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1689 assert(
I.getOpcode() == TargetOpcode::G_BRCOND);
1695 if (!ProduceNonFlagSettingCondBr)
1714 if (VRegAndVal && !AndInst) {
1715 int64_t
C = VRegAndVal->Value.getSExtValue();
1721 emitTestBit(LHS, Bit,
false, DestMBB, MIB);
1722 I.eraseFromParent();
1730 emitTestBit(LHS, Bit,
true, DestMBB, MIB);
1731 I.eraseFromParent();
1739 emitTestBit(LHS, Bit,
false, DestMBB, MIB);
1740 I.eraseFromParent();
1754 if (VRegAndVal && VRegAndVal->Value == 0) {
1762 tryOptAndIntoCompareBranch(
1764 I.eraseFromParent();
1769 auto LHSTy =
MRI.getType(LHS);
1770 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1772 I.eraseFromParent();
1781bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1784 assert(
I.getOpcode() == TargetOpcode::G_BRCOND);
1785 if (tryOptCompareBranchFedByICmp(
I, ICmp, MIB))
1795 I.eraseFromParent();
1799bool AArch64InstructionSelector::selectCompareBranch(
1801 Register CondReg =
I.getOperand(0).getReg();
1806 if (CCMIOpc == TargetOpcode::G_FCMP)
1807 return selectCompareBranchFedByFCmp(
I, *CCMI, MIB);
1808 if (CCMIOpc == TargetOpcode::G_ICMP)
1809 return selectCompareBranchFedByICmp(
I, *CCMI, MIB);
1814 if (ProduceNonFlagSettingCondBr) {
1815 emitTestBit(CondReg, 0,
true,
1816 I.getOperand(1).getMBB(), MIB);
1817 I.eraseFromParent();
1827 .
addMBB(
I.getOperand(1).getMBB());
1828 I.eraseFromParent();
1836 assert(
MRI.getType(Reg).isVector() &&
"Expected a *vector* shift operand");
1847 return std::nullopt;
1849 int64_t Imm = *ShiftImm;
1851 return std::nullopt;
1855 return std::nullopt;
1858 return std::nullopt;
1862 return std::nullopt;
1866 return std::nullopt;
1870 return std::nullopt;
1876bool AArch64InstructionSelector::selectVectorSHL(
MachineInstr &
I,
1878 assert(
I.getOpcode() == TargetOpcode::G_SHL);
1879 Register DstReg =
I.getOperand(0).getReg();
1880 const LLT Ty =
MRI.getType(DstReg);
1881 Register Src1Reg =
I.getOperand(1).getReg();
1882 Register Src2Reg =
I.getOperand(2).getReg();
1893 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1895 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1897 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1899 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1901 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1903 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1905 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1911 auto Shl = MIB.
buildInstr(Opc, {DstReg}, {Src1Reg});
1917 I.eraseFromParent();
1921bool AArch64InstructionSelector::selectVectorAshrLshr(
1923 assert(
I.getOpcode() == TargetOpcode::G_ASHR ||
1924 I.getOpcode() == TargetOpcode::G_LSHR);
1925 Register DstReg =
I.getOperand(0).getReg();
1926 const LLT Ty =
MRI.getType(DstReg);
1927 Register Src1Reg =
I.getOperand(1).getReg();
1928 Register Src2Reg =
I.getOperand(2).getReg();
1933 bool IsASHR =
I.getOpcode() == TargetOpcode::G_ASHR;
1943 unsigned NegOpc = 0;
1945 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
1947 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1948 NegOpc = AArch64::NEGv2i64;
1950 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1951 NegOpc = AArch64::NEGv4i32;
1953 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1954 NegOpc = AArch64::NEGv2i32;
1956 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1957 NegOpc = AArch64::NEGv4i16;
1959 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1960 NegOpc = AArch64::NEGv8i16;
1962 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1963 NegOpc = AArch64::NEGv16i8;
1965 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1966 NegOpc = AArch64::NEGv8i8;
1972 auto Neg = MIB.
buildInstr(NegOpc, {RC}, {Src2Reg});
1974 auto SShl = MIB.
buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1976 I.eraseFromParent();
1980bool AArch64InstructionSelector::selectVaStartAAPCS(
1985bool AArch64InstructionSelector::selectVaStartDarwin(
1988 Register ListReg =
I.getOperand(0).getReg();
1990 Register ArgsAddrReg =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2001 BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::ADDXri))
2009 MIB =
BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::STRXui))
2016 I.eraseFromParent();
2020void AArch64InstructionSelector::materializeLargeCMVal(
2026 auto MovZ = MIB.
buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
2037 :
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2039 if (
auto *GV = dyn_cast<GlobalValue>(V)) {
2041 GV, MovZ->getOperand(1).getOffset(), Flags));
2045 MovZ->getOperand(1).getOffset(), Flags));
2051 Register DstReg = BuildMovK(MovZ.getReg(0),
2057bool AArch64InstructionSelector::preISelLower(
MachineInstr &
I) {
2062 switch (
I.getOpcode()) {
2063 case TargetOpcode::G_STORE: {
2064 bool Changed = contractCrossBankCopyIntoStore(
I,
MRI);
2072 SrcOp.setReg(NewSrc);
2073 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass,
MRI);
2078 case TargetOpcode::G_PTR_ADD:
2079 return convertPtrAddToAdd(
I,
MRI);
2080 case TargetOpcode::G_LOAD: {
2085 Register DstReg =
I.getOperand(0).getReg();
2086 const LLT DstTy =
MRI.getType(DstReg);
2092 case AArch64::G_DUP: {
2094 LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2098 MRI.setType(
I.getOperand(0).getReg(),
2100 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2101 I.getOperand(1).setReg(NewSrc.getReg(0));
2104 case TargetOpcode::G_UITOFP:
2105 case TargetOpcode::G_SITOFP: {
2110 Register SrcReg =
I.getOperand(1).getReg();
2111 LLT SrcTy =
MRI.getType(SrcReg);
2112 LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2116 if (RBI.getRegBank(SrcReg,
MRI,
TRI)->getID() == AArch64::FPRRegBankID) {
2117 if (
I.getOpcode() == TargetOpcode::G_SITOFP)
2118 I.setDesc(
TII.get(AArch64::G_SITOF));
2120 I.setDesc(
TII.get(AArch64::G_UITOF));
2138bool AArch64InstructionSelector::convertPtrAddToAdd(
2140 assert(
I.getOpcode() == TargetOpcode::G_PTR_ADD &&
"Expected G_PTR_ADD");
2141 Register DstReg =
I.getOperand(0).getReg();
2142 Register AddOp1Reg =
I.getOperand(1).getReg();
2143 const LLT PtrTy =
MRI.getType(DstReg);
2147 const LLT CastPtrTy =
2152 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2154 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2158 I.setDesc(
TII.get(TargetOpcode::G_ADD));
2159 MRI.setType(DstReg, CastPtrTy);
2160 I.getOperand(1).setReg(PtrToInt.getReg(0));
2161 if (!select(*PtrToInt)) {
2162 LLVM_DEBUG(
dbgs() <<
"Failed to select G_PTRTOINT in convertPtrAddToAdd");
2171 I.getOperand(2).setReg(NegatedReg);
2172 I.setDesc(
TII.get(TargetOpcode::G_SUB));
2176bool AArch64InstructionSelector::earlySelectSHL(
MachineInstr &
I,
2181 assert(
I.getOpcode() == TargetOpcode::G_SHL &&
"unexpected op");
2182 const auto &MO =
I.getOperand(2);
2187 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2191 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2192 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2194 if (!Imm1Fn || !Imm2Fn)
2198 MIB.
buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2201 for (
auto &RenderFn : *Imm1Fn)
2203 for (
auto &RenderFn : *Imm2Fn)
2206 I.eraseFromParent();
2210bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2212 assert(
I.getOpcode() == TargetOpcode::G_STORE &&
"Expected G_STORE");
2230 LLT DefDstTy =
MRI.getType(DefDstReg);
2231 Register StoreSrcReg =
I.getOperand(0).getReg();
2232 LLT StoreSrcTy =
MRI.getType(StoreSrcReg);
2243 if (RBI.getRegBank(StoreSrcReg,
MRI,
TRI) ==
2244 RBI.getRegBank(DefDstReg,
MRI,
TRI))
2248 I.getOperand(0).setReg(DefDstReg);
2252bool AArch64InstructionSelector::earlySelect(
MachineInstr &
I) {
2253 assert(
I.getParent() &&
"Instruction should be in a basic block!");
2254 assert(
I.getParent()->getParent() &&
"Instruction should be in a function!");
2260 switch (
I.getOpcode()) {
2261 case AArch64::G_DUP: {
2264 Register Src =
I.getOperand(1).getReg();
2269 Register Dst =
I.getOperand(0).getReg();
2271 MRI.getType(Dst).getNumElements(),
2273 ValAndVReg->Value));
2274 if (!emitConstantVector(Dst, CV, MIB,
MRI))
2276 I.eraseFromParent();
2279 case TargetOpcode::G_SEXT:
2282 if (selectUSMovFromExtend(
I,
MRI))
2285 case TargetOpcode::G_BR:
2287 case TargetOpcode::G_SHL:
2288 return earlySelectSHL(
I,
MRI);
2289 case TargetOpcode::G_CONSTANT: {
2290 bool IsZero =
false;
2291 if (
I.getOperand(1).isCImm())
2292 IsZero =
I.getOperand(1).getCImm()->isZero();
2293 else if (
I.getOperand(1).isImm())
2294 IsZero =
I.getOperand(1).getImm() == 0;
2299 Register DefReg =
I.getOperand(0).getReg();
2300 LLT Ty =
MRI.getType(DefReg);
2302 I.getOperand(1).ChangeToRegister(AArch64::XZR,
false);
2303 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
MRI);
2305 I.getOperand(1).ChangeToRegister(AArch64::WZR,
false);
2306 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass,
MRI);
2310 I.setDesc(
TII.get(TargetOpcode::COPY));
2314 case TargetOpcode::G_ADD: {
2323 Register AddDst =
I.getOperand(0).getReg();
2324 Register AddLHS =
I.getOperand(1).getReg();
2325 Register AddRHS =
I.getOperand(2).getReg();
2327 LLT Ty =
MRI.getType(AddLHS);
2336 if (!
MRI.hasOneNonDBGUse(Reg))
2350 MRI.getType(
Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2360 Cmp = MatchCmp(AddRHS);
2364 auto &PredOp =
Cmp->getOperand(1);
2369 emitIntegerCompare(
Cmp->getOperand(2),
2370 Cmp->getOperand(3), PredOp, MIB);
2371 emitCSINC(AddDst, AddLHS, AddLHS, InvCC, MIB);
2372 I.eraseFromParent();
2375 case TargetOpcode::G_OR: {
2379 Register Dst =
I.getOperand(0).getReg();
2380 LLT Ty =
MRI.getType(Dst);
2399 if (ShiftImm >
Size || ((1ULL << ShiftImm) - 1ULL) !=
uint64_t(MaskImm))
2402 int64_t Immr =
Size - ShiftImm;
2403 int64_t Imms =
Size - ShiftImm - 1;
2404 unsigned Opc =
Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2405 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2406 I.eraseFromParent();
2409 case TargetOpcode::G_FENCE: {
2410 if (
I.getOperand(1).getImm() == 0)
2414 .
addImm(
I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
2415 I.eraseFromParent();
2424 assert(
I.getParent() &&
"Instruction should be in a basic block!");
2425 assert(
I.getParent()->getParent() &&
"Instruction should be in a function!");
2432 if (Subtarget->requiresStrictAlign()) {
2434 LLVM_DEBUG(
dbgs() <<
"AArch64 GISel does not support strict-align yet\n");
2440 unsigned Opcode =
I.getOpcode();
2442 if (!
I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2445 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2448 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2449 const Register DefReg =
I.getOperand(0).getReg();
2450 const LLT DefTy =
MRI.getType(DefReg);
2453 MRI.getRegClassOrRegBank(DefReg);
2463 DefRC = getRegClassForTypeOnBank(DefTy, RB);
2470 I.setDesc(
TII.get(TargetOpcode::PHI));
2472 return RBI.constrainGenericRegister(DefReg, *DefRC,
MRI);
2478 if (
I.isDebugInstr())
2485 if (
I.getNumOperands() !=
I.getNumExplicitOperands()) {
2487 dbgs() <<
"Generic instruction has unexpected implicit operands\n");
2494 if (preISelLower(
I)) {
2495 Opcode =
I.getOpcode();
2506 if (selectImpl(
I, *CoverageInfo))
2510 I.getOperand(0).isReg() ?
MRI.getType(
I.getOperand(0).getReg()) :
LLT{};
2513 case TargetOpcode::G_SBFX:
2514 case TargetOpcode::G_UBFX: {
2515 static const unsigned OpcTable[2][2] = {
2516 {AArch64::UBFMWri, AArch64::UBFMXri},
2517 {AArch64::SBFMWri, AArch64::SBFMXri}};
2518 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2520 unsigned Opc = OpcTable[IsSigned][
Size == 64];
2523 assert(Cst1 &&
"Should have gotten a constant for src 1?");
2526 assert(Cst2 &&
"Should have gotten a constant for src 2?");
2527 auto LSB = Cst1->Value.getZExtValue();
2528 auto Width = Cst2->Value.getZExtValue();
2530 MIB.
buildInstr(Opc, {
I.getOperand(0)}, {
I.getOperand(1)})
2532 .
addImm(LSB + Width - 1);
2533 I.eraseFromParent();
2536 case TargetOpcode::G_BRCOND:
2537 return selectCompareBranch(
I, MF,
MRI);
2539 case TargetOpcode::G_BRINDIRECT: {
2540 I.setDesc(
TII.get(AArch64::BR));
2544 case TargetOpcode::G_BRJT:
2545 return selectBrJT(
I,
MRI);
2547 case AArch64::G_ADD_LOW: {
2553 if (BaseMI->
getOpcode() != AArch64::ADRP) {
2554 I.setDesc(
TII.get(AArch64::ADDXri));
2559 "Expected small code model");
2561 auto Op2 =
I.getOperand(2);
2562 auto MovAddr = MIB.
buildInstr(AArch64::MOVaddr, {
I.getOperand(0)}, {})
2563 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2564 Op1.getTargetFlags())
2566 Op2.getTargetFlags());
2567 I.eraseFromParent();
2571 case TargetOpcode::G_FCONSTANT:
2572 case TargetOpcode::G_CONSTANT: {
2573 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2582 const Register DefReg =
I.getOperand(0).getReg();
2583 const LLT DefTy =
MRI.getType(DefReg);
2589 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2591 <<
" constant, expected: " << s16 <<
" or " << s32
2592 <<
" or " << s64 <<
" or " << s128 <<
'\n');
2596 if (RB.
getID() != AArch64::FPRRegBankID) {
2598 <<
" constant on bank: " << RB
2599 <<
", expected: FPR\n");
2607 if (DefSize != 128 &&
I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2611 if (Ty != p0 && Ty != s8 && Ty != s16) {
2613 <<
" constant, expected: " << s32 <<
", " << s64
2614 <<
", or " << p0 <<
'\n');
2618 if (RB.
getID() != AArch64::GPRRegBankID) {
2620 <<
" constant on bank: " << RB
2621 <<
", expected: GPR\n");
2638 if (TLI->isFPImmLegal(
I.getOperand(1).getFPImm()->getValueAPF(),
2645 auto *FPImm =
I.getOperand(1).getFPImm();
2648 LLVM_DEBUG(
dbgs() <<
"Failed to load double constant pool entry\n");
2652 I.eraseFromParent();
2653 return RBI.constrainGenericRegister(DefReg, FPRRC,
MRI);
2657 assert((DefSize == 32 || DefSize == 64) &&
"Unexpected const def size");
2659 const Register DefGPRReg =
MRI.createVirtualRegister(
2660 DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2666 if (!RBI.constrainGenericRegister(DefReg, FPRRC,
MRI)) {
2667 LLVM_DEBUG(
dbgs() <<
"Failed to constrain G_FCONSTANT def operand\n");
2675 }
else if (
I.getOperand(1).isCImm()) {
2676 uint64_t Val =
I.getOperand(1).getCImm()->getZExtValue();
2677 I.getOperand(1).ChangeToImmediate(Val);
2678 }
else if (
I.getOperand(1).isImm()) {
2679 uint64_t Val =
I.getOperand(1).getImm();
2680 I.getOperand(1).ChangeToImmediate(Val);
2683 const unsigned MovOpc =
2684 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2685 I.setDesc(
TII.get(MovOpc));
2689 case TargetOpcode::G_EXTRACT: {
2690 Register DstReg =
I.getOperand(0).getReg();
2691 Register SrcReg =
I.getOperand(1).getReg();
2692 LLT SrcTy =
MRI.getType(SrcReg);
2693 LLT DstTy =
MRI.getType(DstReg);
2705 unsigned Offset =
I.getOperand(2).getImm();
2714 if (SrcRB.
getID() == AArch64::GPRRegBankID) {
2716 MIB.
buildInstr(TargetOpcode::COPY, {DstReg}, {})
2718 Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2720 AArch64::GPR64RegClass, NewI->getOperand(0));
2721 I.eraseFromParent();
2727 unsigned LaneIdx =
Offset / 64;
2729 DstReg, DstRB,
LLT::scalar(64), SrcReg, LaneIdx, MIB);
2732 I.eraseFromParent();
2736 I.setDesc(
TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2742 "unexpected G_EXTRACT types");
2749 .addReg(DstReg, 0, AArch64::sub_32);
2750 RBI.constrainGenericRegister(
I.getOperand(0).getReg(),
2751 AArch64::GPR32RegClass,
MRI);
2752 I.getOperand(0).setReg(DstReg);
2757 case TargetOpcode::G_INSERT: {
2758 LLT SrcTy =
MRI.getType(
I.getOperand(2).getReg());
2759 LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2766 I.setDesc(
TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2767 unsigned LSB =
I.getOperand(3).getImm();
2768 unsigned Width =
MRI.getType(
I.getOperand(2).getReg()).getSizeInBits();
2769 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2774 "unexpected G_INSERT types");
2780 TII.get(AArch64::SUBREG_TO_REG))
2783 .
addUse(
I.getOperand(2).getReg())
2784 .
addImm(AArch64::sub_32);
2785 RBI.constrainGenericRegister(
I.getOperand(2).getReg(),
2786 AArch64::GPR32RegClass,
MRI);
2787 I.getOperand(2).setReg(SrcReg);
2791 case TargetOpcode::G_FRAME_INDEX: {
2798 I.setDesc(
TII.get(AArch64::ADDXri));
2807 case TargetOpcode::G_GLOBAL_VALUE: {
2810 if (
I.getOperand(1).isSymbol()) {
2811 OpFlags =
I.getOperand(1).getTargetFlags();
2815 GV =
I.getOperand(1).getGlobal();
2817 return selectTLSGlobalValue(
I,
MRI);
2818 OpFlags = STI.ClassifyGlobalReference(GV,
TM);
2822 I.setDesc(
TII.get(AArch64::LOADgot));
2823 I.getOperand(1).setTargetFlags(OpFlags);
2825 !
TM.isPositionIndependent()) {
2827 materializeLargeCMVal(
I, GV, OpFlags);
2828 I.eraseFromParent();
2831 I.setDesc(
TII.get(AArch64::ADR));
2832 I.getOperand(1).setTargetFlags(OpFlags);
2834 I.setDesc(
TII.get(AArch64::MOVaddr));
2837 MIB.addGlobalAddress(GV,
I.getOperand(1).getOffset(),
2843 case TargetOpcode::G_ZEXTLOAD:
2844 case TargetOpcode::G_LOAD:
2845 case TargetOpcode::G_STORE: {
2847 bool IsZExtLoad =
I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2861 if (Order != AtomicOrdering::NotAtomic &&
2862 Order != AtomicOrdering::Unordered &&
2863 Order != AtomicOrdering::Monotonic) {
2864 assert(!isa<GZExtLoad>(LdSt));
2865 if (MemSizeInBytes > 64)
2868 if (isa<GLoad>(LdSt)) {
2869 static constexpr unsigned LDAPROpcodes[] = {
2870 AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
2871 static constexpr unsigned LDAROpcodes[] = {
2872 AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
2874 STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent
2877 I.setDesc(
TII.get(Opcodes[
Log2_32(MemSizeInBytes)]));
2879 static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2880 AArch64::STLRW, AArch64::STLRX};
2882 if (
MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
2884 Register NewVal =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2885 MIB.
buildInstr(TargetOpcode::COPY, {NewVal}, {})
2886 .addReg(
I.getOperand(0).getReg(), 0, AArch64::sub_32);
2887 I.getOperand(0).setReg(NewVal);
2889 I.setDesc(
TII.get(Opcodes[
Log2_32(MemSizeInBytes)]));
2900 "Load/Store pointer operand isn't a GPR");
2901 assert(
MRI.getType(PtrReg).isPointer() &&
2902 "Load/Store pointer operand isn't a pointer");
2906 const LLT ValTy =
MRI.getType(ValReg);
2911 if (isa<GStore>(LdSt) && ValTy.
getSizeInBits() > MemSizeInBits) {
2914 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2920 .addReg(ValReg, 0,
SubReg)
2922 RBI.constrainGenericRegister(Copy, *RC,
MRI);
2924 }
else if (isa<GLoad>(LdSt) && ValTy.
getSizeInBits() > MemSizeInBits) {
2927 if (RB.
getID() == AArch64::FPRRegBankID) {
2930 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2937 MRI.setRegBank(NewDst, RB);
2940 MIB.
buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
2944 auto SubRegRC = getRegClassForTypeOnBank(
MRI.getType(OldDst), RB);
2945 RBI.constrainGenericRegister(OldDst, *SubRegRC,
MRI);
2952 auto SelectLoadStoreAddressingMode = [&]() ->
MachineInstr * {
2953 bool IsStore = isa<GStore>(
I);
2954 const unsigned NewOpc =
2956 if (NewOpc ==
I.getOpcode())
2960 selectAddrModeIndexed(
I.getOperand(1), MemSizeInBytes);
2963 I.setDesc(
TII.get(NewOpc));
2969 auto NewInst = MIB.
buildInstr(NewOpc, {}, {},
I.getFlags());
2970 Register CurValReg =
I.getOperand(0).getReg();
2971 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
2972 NewInst.cloneMemRefs(
I);
2973 for (
auto &Fn : *AddrModeFns)
2975 I.eraseFromParent();
2984 if (Opcode == TargetOpcode::G_STORE) {
2987 if (CVal && CVal->Value == 0) {
2989 case AArch64::STRWui:
2990 case AArch64::STRHHui:
2991 case AArch64::STRBBui:
2992 LoadStore->getOperand(0).setReg(AArch64::WZR);
2994 case AArch64::STRXui:
2995 LoadStore->getOperand(0).setReg(AArch64::XZR);
3001 if (IsZExtLoad || (Opcode == TargetOpcode::G_LOAD &&
3002 ValTy ==
LLT::scalar(64) && MemSizeInBits == 32)) {
3005 if (
MRI.getType(
LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
3009 Register LdReg =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3014 MIB.
buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
3017 .
addImm(AArch64::sub_32);
3019 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
3025 case TargetOpcode::G_INDEXED_ZEXTLOAD:
3026 case TargetOpcode::G_INDEXED_SEXTLOAD:
3027 return selectIndexedExtLoad(
I,
MRI);
3028 case TargetOpcode::G_INDEXED_LOAD:
3029 return selectIndexedLoad(
I,
MRI);
3030 case TargetOpcode::G_INDEXED_STORE:
3031 return selectIndexedStore(cast<GIndexedStore>(
I),
MRI);
3033 case TargetOpcode::G_LSHR:
3034 case TargetOpcode::G_ASHR:
3035 if (
MRI.getType(
I.getOperand(0).getReg()).isVector())
3036 return selectVectorAshrLshr(
I,
MRI);
3038 case TargetOpcode::G_SHL:
3039 if (Opcode == TargetOpcode::G_SHL &&
3040 MRI.getType(
I.getOperand(0).getReg()).isVector())
3041 return selectVectorSHL(
I,
MRI);
3048 Register SrcReg =
I.getOperand(1).getReg();
3049 Register ShiftReg =
I.getOperand(2).getReg();
3050 const LLT ShiftTy =
MRI.getType(ShiftReg);
3051 const LLT SrcTy =
MRI.getType(SrcReg);
3056 auto Trunc = MIB.
buildInstr(TargetOpcode::COPY, {SrcTy}, {})
3057 .addReg(ShiftReg, 0, AArch64::sub_32);
3058 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
3059 I.getOperand(2).setReg(Trunc.getReg(0));
3063 case TargetOpcode::G_OR: {
3070 const Register DefReg =
I.getOperand(0).getReg();
3074 if (NewOpc ==
I.getOpcode())
3077 I.setDesc(
TII.get(NewOpc));
3085 case TargetOpcode::G_PTR_ADD: {
3086 emitADD(
I.getOperand(0).getReg(),
I.getOperand(1),
I.getOperand(2), MIB);
3087 I.eraseFromParent();
3091 case TargetOpcode::G_SADDE:
3092 case TargetOpcode::G_UADDE:
3093 case TargetOpcode::G_SSUBE:
3094 case TargetOpcode::G_USUBE:
3095 case TargetOpcode::G_SADDO:
3096 case TargetOpcode::G_UADDO:
3097 case TargetOpcode::G_SSUBO:
3098 case TargetOpcode::G_USUBO:
3099 return selectOverflowOp(
I,
MRI);
3101 case TargetOpcode::G_PTRMASK: {
3102 Register MaskReg =
I.getOperand(2).getReg();
3109 I.setDesc(
TII.get(AArch64::ANDXri));
3110 I.getOperand(2).ChangeToImmediate(
3115 case TargetOpcode::G_PTRTOINT:
3116 case TargetOpcode::G_TRUNC: {
3117 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
3118 const LLT SrcTy =
MRI.getType(
I.getOperand(1).getReg());
3120 const Register DstReg =
I.getOperand(0).getReg();
3121 const Register SrcReg =
I.getOperand(1).getReg();
3128 dbgs() <<
"G_TRUNC/G_PTRTOINT input/output on different banks\n");
3132 if (DstRB.
getID() == AArch64::GPRRegBankID) {
3141 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC,
MRI) ||
3142 !RBI.constrainGenericRegister(DstReg, *DstRC,
MRI)) {
3143 LLVM_DEBUG(
dbgs() <<
"Failed to constrain G_TRUNC/G_PTRTOINT\n");
3147 if (DstRC == SrcRC) {
3149 }
else if (Opcode == TargetOpcode::G_TRUNC && DstTy ==
LLT::scalar(32) &&
3153 }
else if (DstRC == &AArch64::GPR32RegClass &&
3154 SrcRC == &AArch64::GPR64RegClass) {
3155 I.getOperand(1).setSubReg(AArch64::sub_32);
3158 dbgs() <<
"Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3162 I.setDesc(
TII.get(TargetOpcode::COPY));
3164 }
else if (DstRB.
getID() == AArch64::FPRRegBankID) {
3167 I.setDesc(
TII.get(AArch64::XTNv4i16));
3177 I.eraseFromParent();
3182 if (Opcode == TargetOpcode::G_PTRTOINT) {
3183 assert(DstTy.
isVector() &&
"Expected an FPR ptrtoint to be a vector");
3184 I.setDesc(
TII.get(TargetOpcode::COPY));
3192 case TargetOpcode::G_ANYEXT: {
3193 if (selectUSMovFromExtend(
I,
MRI))
3196 const Register DstReg =
I.getOperand(0).getReg();
3197 const Register SrcReg =
I.getOperand(1).getReg();
3200 if (RBDst.
getID() != AArch64::GPRRegBankID) {
3202 <<
", expected: GPR\n");
3207 if (RBSrc.
getID() != AArch64::GPRRegBankID) {
3209 <<
", expected: GPR\n");
3213 const unsigned DstSize =
MRI.getType(DstReg).getSizeInBits();
3216 LLVM_DEBUG(
dbgs() <<
"G_ANYEXT operand has no size, not a gvreg?\n");
3220 if (DstSize != 64 && DstSize > 32) {
3222 <<
", expected: 32 or 64\n");
3228 Register ExtSrc =
MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3233 .
addImm(AArch64::sub_32);
3234 I.getOperand(1).setReg(ExtSrc);
3239 case TargetOpcode::G_ZEXT:
3240 case TargetOpcode::G_SEXT_INREG:
3241 case TargetOpcode::G_SEXT: {
3242 if (selectUSMovFromExtend(
I,
MRI))
3245 unsigned Opcode =
I.getOpcode();
3246 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3247 const Register DefReg =
I.getOperand(0).getReg();
3248 Register SrcReg =
I.getOperand(1).getReg();
3249 const LLT DstTy =
MRI.getType(DefReg);
3250 const LLT SrcTy =
MRI.getType(SrcReg);
3256 if (Opcode == TargetOpcode::G_SEXT_INREG)
3257 SrcSize =
I.getOperand(2).getImm();
3263 AArch64::GPRRegBankID &&
3264 "Unexpected ext regbank");
3277 RBI.getRegBank(SrcReg,
MRI,
TRI)->getID() == AArch64::GPRRegBankID;
3278 if (LoadMI && IsGPR) {
3280 unsigned BytesLoaded =
MemOp->getSize().getValue();
3287 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3289 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3290 const Register ZReg = AArch64::WZR;
3291 MIB.
buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg})
3294 MIB.
buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3297 .
addImm(AArch64::sub_32);
3299 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3301 LLVM_DEBUG(
dbgs() <<
"Failed to constrain G_ZEXT destination\n");
3305 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3311 I.eraseFromParent();
3316 if (DstSize == 64) {
3317 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3319 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3325 SrcReg = MIB.
buildInstr(AArch64::SUBREG_TO_REG,
3326 {&AArch64::GPR64RegClass}, {})
3333 ExtI = MIB.
buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3337 }
else if (DstSize <= 32) {
3338 ExtI = MIB.
buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3347 I.eraseFromParent();
3351 case TargetOpcode::G_SITOFP:
3352 case TargetOpcode::G_UITOFP:
3353 case TargetOpcode::G_FPTOSI:
3354 case TargetOpcode::G_FPTOUI: {
3355 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg()),
3356 SrcTy =
MRI.getType(
I.getOperand(1).getReg());
3358 if (NewOpc == Opcode)
3361 I.setDesc(
TII.get(NewOpc));
3368 case TargetOpcode::G_FREEZE:
3371 case TargetOpcode::G_INTTOPTR:
3376 case TargetOpcode::G_BITCAST:
3384 case TargetOpcode::G_SELECT: {
3385 auto &Sel = cast<GSelect>(
I);
3386 const Register CondReg = Sel.getCondReg();
3387 const Register TReg = Sel.getTrueReg();
3388 const Register FReg = Sel.getFalseReg();
3390 if (tryOptSelect(Sel))
3395 Register DeadVReg =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3396 auto TstMI = MIB.
buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3399 if (!emitSelect(Sel.getReg(0), TReg, FReg,
AArch64CC::NE, MIB))
3401 Sel.eraseFromParent();
3404 case TargetOpcode::G_ICMP: {
3406 return selectVectorICmp(
I,
MRI);
3417 emitIntegerCompare(
I.getOperand(2),
I.getOperand(3),
I.getOperand(1), MIB);
3418 emitCSINC(
I.getOperand(0).getReg(), AArch64::WZR,
3419 AArch64::WZR, InvCC, MIB);
3420 I.eraseFromParent();
3424 case TargetOpcode::G_FCMP: {
3427 if (!emitFPCompare(
I.getOperand(2).getReg(),
I.getOperand(3).getReg(), MIB,
3429 !emitCSetForFCmp(
I.getOperand(0).getReg(), Pred, MIB))
3431 I.eraseFromParent();
3434 case TargetOpcode::G_VASTART:
3435 return STI.isTargetDarwin() ? selectVaStartDarwin(
I, MF,
MRI)
3436 : selectVaStartAAPCS(
I, MF,
MRI);
3437 case TargetOpcode::G_INTRINSIC:
3438 return selectIntrinsic(
I,
MRI);
3439 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3440 return selectIntrinsicWithSideEffects(
I,
MRI);
3441 case TargetOpcode::G_IMPLICIT_DEF: {
3442 I.setDesc(
TII.get(TargetOpcode::IMPLICIT_DEF));
3443 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
3444 const Register DstReg =
I.getOperand(0).getReg();
3447 RBI.constrainGenericRegister(DstReg, *DstRC,
MRI);
3450 case TargetOpcode::G_BLOCK_ADDR: {
3452 materializeLargeCMVal(
I,
I.getOperand(1).getBlockAddress(), 0);
3453 I.eraseFromParent();
3456 I.setDesc(
TII.get(AArch64::MOVaddrBA));
3457 auto MovMI =
BuildMI(
MBB,
I,
I.getDebugLoc(),
TII.get(AArch64::MOVaddrBA),
3458 I.getOperand(0).getReg())
3462 I.getOperand(1).getBlockAddress(), 0,
3464 I.eraseFromParent();
3468 case AArch64::G_DUP: {
3474 if (RBI.getRegBank(
I.getOperand(1).getReg(),
MRI,
TRI)->getID() !=
3475 AArch64::GPRRegBankID)
3477 LLT VecTy =
MRI.getType(
I.getOperand(0).getReg());
3479 I.setDesc(
TII.get(AArch64::DUPv8i8gpr));
3481 I.setDesc(
TII.get(AArch64::DUPv16i8gpr));
3483 I.setDesc(
TII.get(AArch64::DUPv4i16gpr));
3485 I.setDesc(
TII.get(AArch64::DUPv8i16gpr));
3490 case TargetOpcode::G_BUILD_VECTOR:
3491 return selectBuildVector(
I,
MRI);
3492 case TargetOpcode::G_MERGE_VALUES:
3494 case TargetOpcode::G_UNMERGE_VALUES:
3496 case TargetOpcode::G_SHUFFLE_VECTOR:
3497 return selectShuffleVector(
I,
MRI);
3498 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3499 return selectExtractElt(
I,
MRI);
3500 case TargetOpcode::G_CONCAT_VECTORS:
3501 return selectConcatVectors(
I,
MRI);
3502 case TargetOpcode::G_JUMP_TABLE:
3503 return selectJumpTable(
I,
MRI);
3504 case TargetOpcode::G_MEMCPY:
3505 case TargetOpcode::G_MEMCPY_INLINE:
3506 case TargetOpcode::G_MEMMOVE:
3507 case TargetOpcode::G_MEMSET:
3508 assert(STI.hasMOPS() &&
"Shouldn't get here without +mops feature");
3509 return selectMOPS(
I,
MRI);
3515bool AArch64InstructionSelector::selectAndRestoreState(
MachineInstr &
I) {
3522bool AArch64InstructionSelector::selectMOPS(
MachineInstr &GI,
3526 case TargetOpcode::G_MEMCPY:
3527 case TargetOpcode::G_MEMCPY_INLINE:
3528 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3530 case TargetOpcode::G_MEMMOVE:
3531 Mopcode = AArch64::MOPSMemoryMovePseudo;
3533 case TargetOpcode::G_MEMSET:
3535 Mopcode = AArch64::MOPSMemorySetPseudo;
3544 const Register DstPtrCopy =
MRI.cloneVirtualRegister(DstPtr.getReg());
3545 const Register SrcValCopy =
MRI.cloneVirtualRegister(SrcOrVal.getReg());
3548 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3549 const auto &SrcValRegClass =
3550 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3553 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass,
MRI);
3554 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass,
MRI);
3555 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass,
MRI);
3565 Register DefDstPtr =
MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3566 Register DefSize =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3568 MIB.
buildInstr(Mopcode, {DefDstPtr, DefSize},
3569 {DstPtrCopy, SizeCopy, SrcValCopy});
3571 Register DefSrcPtr =
MRI.createVirtualRegister(&SrcValRegClass);
3572 MIB.
buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3573 {DstPtrCopy, SrcValCopy, SizeCopy});
3582 assert(
I.getOpcode() == TargetOpcode::G_BRJT &&
"Expected G_BRJT");
3583 Register JTAddr =
I.getOperand(0).getReg();
3584 unsigned JTI =
I.getOperand(1).getIndex();
3587 Register TargetReg =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3588 Register ScratchReg =
MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3591 auto JumpTableInst = MIB.
buildInstr(AArch64::JumpTableDest32,
3592 {TargetReg, ScratchReg}, {JTAddr,
Index})
3593 .addJumpTableIndex(JTI);
3595 MIB.
buildInstr(TargetOpcode::JUMP_TABLE_DEBUG_INFO, {},
3596 {
static_cast<int64_t
>(JTI)});
3598 MIB.
buildInstr(AArch64::BR, {}, {TargetReg});
3599 I.eraseFromParent();
3603bool AArch64InstructionSelector::selectJumpTable(
MachineInstr &
I,
3605 assert(
I.getOpcode() == TargetOpcode::G_JUMP_TABLE &&
"Expected jump table");
3606 assert(
I.getOperand(1).isJTI() &&
"Jump table op should have a JTI!");
3608 Register DstReg =
I.getOperand(0).getReg();
3609 unsigned JTI =
I.getOperand(1).getIndex();
3612 MIB.
buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3615 I.eraseFromParent();
3619bool AArch64InstructionSelector::selectTLSGlobalValue(
3621 if (!STI.isTargetMachO())
3626 const auto &GlobalOp =
I.getOperand(1);
3627 assert(GlobalOp.getOffset() == 0 &&
3628 "Shouldn't have an offset on TLS globals!");
3632 MIB.
buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3635 auto Load = MIB.
buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3636 {LoadGOT.getReg(0)})
3649 RBI.constrainGenericRegister(
I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3651 I.eraseFromParent();
3655bool AArch64InstructionSelector::selectVectorICmp(
3657 Register DstReg =
I.getOperand(0).getReg();
3658 LLT DstTy =
MRI.getType(DstReg);
3659 Register SrcReg =
I.getOperand(2).getReg();
3660 Register Src2Reg =
I.getOperand(3).getReg();
3661 LLT SrcTy =
MRI.getType(SrcReg);
3686 unsigned PredIdx = 0;
3687 bool SwapOperands =
false;
3702 SwapOperands =
true;
3706 SwapOperands =
true;
3716 SwapOperands =
true;
3720 SwapOperands =
true;
3730 static const unsigned OpcTable[4][4][9] = {
3738 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
3739 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
3740 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
3741 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
3742 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
3743 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
3749 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
3750 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
3751 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
3752 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
3753 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
3754 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
3760 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
3761 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
3762 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
3763 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
3764 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
3765 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
3774 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
3775 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
3776 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
3788 unsigned EltIdx =
Log2_32(SrcEltSize / 8);
3789 unsigned NumEltsIdx =
Log2_32(NumElts / 2);
3790 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
3798 getRegClassForTypeOnBank(SrcTy, VecRB,
true);
3800 LLVM_DEBUG(
dbgs() <<
"Could not determine source register class.\n");
3804 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
3806 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
3821 RBI.constrainGenericRegister(DstReg, *SrcRC,
MRI);
3822 I.eraseFromParent();
3826MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3829 auto Undef = MIRBuilder.
buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3831 auto BuildFn = [&](
unsigned SubregIndex) {
3835 .addImm(SubregIndex);
3843 return BuildFn(AArch64::bsub);
3845 return BuildFn(AArch64::hsub);
3847 return BuildFn(AArch64::ssub);
3849 return BuildFn(AArch64::dsub);
3856AArch64InstructionSelector::emitNarrowVector(
Register DstReg,
Register SrcReg,
3859 LLT DstTy =
MRI.getType(DstReg);
3861 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(SrcReg,
MRI,
TRI));
3862 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3869 if (
SubReg != AArch64::ssub &&
SubReg != AArch64::dsub) {
3875 .addReg(SrcReg, 0,
SubReg);
3876 RBI.constrainGenericRegister(DstReg, *RC,
MRI);
3880bool AArch64InstructionSelector::selectMergeValues(
3882 assert(
I.getOpcode() == TargetOpcode::G_MERGE_VALUES &&
"unexpected opcode");
3883 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
3884 const LLT SrcTy =
MRI.getType(
I.getOperand(1).getReg());
3888 if (
I.getNumOperands() != 3)
3895 Register DstReg =
I.getOperand(0).getReg();
3896 Register Src1Reg =
I.getOperand(1).getReg();
3897 Register Src2Reg =
I.getOperand(2).getReg();
3898 auto Tmp = MIB.
buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3899 MachineInstr *InsMI = emitLaneInsert(std::nullopt, Tmp.getReg(0), Src1Reg,
3904 Src2Reg, 1, RB, MIB);
3909 I.eraseFromParent();
3913 if (RB.
getID() != AArch64::GPRRegBankID)
3919 auto *DstRC = &AArch64::GPR64RegClass;
3920 Register SubToRegDef =
MRI.createVirtualRegister(DstRC);
3922 TII.get(TargetOpcode::SUBREG_TO_REG))
3925 .
addUse(
I.getOperand(1).getReg())
3926 .
addImm(AArch64::sub_32);
3927 Register SubToRegDef2 =
MRI.createVirtualRegister(DstRC);
3930 TII.get(TargetOpcode::SUBREG_TO_REG))
3933 .
addUse(
I.getOperand(2).getReg())
3934 .
addImm(AArch64::sub_32);
3936 *
BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::BFMXri))
3937 .
addDef(
I.getOperand(0).getReg())
3945 I.eraseFromParent();
3950 const unsigned EltSize) {
3955 CopyOpc = AArch64::DUPi8;
3956 ExtractSubReg = AArch64::bsub;
3959 CopyOpc = AArch64::DUPi16;
3960 ExtractSubReg = AArch64::hsub;
3963 CopyOpc = AArch64::DUPi32;
3964 ExtractSubReg = AArch64::ssub;
3967 CopyOpc = AArch64::DUPi64;
3968 ExtractSubReg = AArch64::dsub;
3972 LLVM_DEBUG(
dbgs() <<
"Elt size '" << EltSize <<
"' unsupported.\n");
3978MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3979 std::optional<Register> DstReg,
const RegisterBank &DstRB,
LLT ScalarTy,
3982 unsigned CopyOpc = 0;
3983 unsigned ExtractSubReg = 0;
3986 dbgs() <<
"Couldn't determine lane copy opcode for instruction.\n");
3991 getRegClassForTypeOnBank(ScalarTy, DstRB,
true);
3993 LLVM_DEBUG(
dbgs() <<
"Could not determine destination register class.\n");
3998 const LLT &VecTy =
MRI.getType(VecReg);
4000 getRegClassForTypeOnBank(VecTy, VecRB,
true);
4002 LLVM_DEBUG(
dbgs() <<
"Could not determine source register class.\n");
4009 DstReg =
MRI.createVirtualRegister(DstRC);
4012 auto Copy = MIRBuilder.
buildInstr(TargetOpcode::COPY, {*DstReg}, {})
4013 .addReg(VecReg, 0, ExtractSubReg);
4014 RBI.constrainGenericRegister(*DstReg, *DstRC,
MRI);
4023 VecTy.
getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
4024 if (!ScalarToVector)
4030 MIRBuilder.
buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
4034 RBI.constrainGenericRegister(*DstReg, *DstRC,
MRI);
4038bool AArch64InstructionSelector::selectExtractElt(
4040 assert(
I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
4041 "unexpected opcode!");
4042 Register DstReg =
I.getOperand(0).getReg();
4043 const LLT NarrowTy =
MRI.getType(DstReg);
4044 const Register SrcReg =
I.getOperand(1).getReg();
4045 const LLT WideTy =
MRI.getType(SrcReg);
4048 "source register size too small!");
4049 assert(!NarrowTy.
isVector() &&
"cannot extract vector into vector!");
4053 assert(LaneIdxOp.
isReg() &&
"Lane index operand was not a register?");
4055 if (RBI.getRegBank(DstReg,
MRI,
TRI)->getID() != AArch64::FPRRegBankID) {
4064 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4068 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
4073 I.eraseFromParent();
4077bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4079 unsigned NumElts =
I.getNumOperands() - 1;
4080 Register SrcReg =
I.getOperand(NumElts).getReg();
4081 const LLT NarrowTy =
MRI.getType(
I.getOperand(0).getReg());
4082 const LLT SrcTy =
MRI.getType(SrcReg);
4084 assert(NarrowTy.
isVector() &&
"Expected an unmerge into vectors");
4086 LLVM_DEBUG(
dbgs() <<
"Unexpected vector type for vec split unmerge");
4093 *RBI.getRegBank(
I.getOperand(0).getReg(),
MRI,
TRI);
4094 for (
unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4095 Register Dst =
I.getOperand(OpIdx).getReg();
4097 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4101 I.eraseFromParent();
4105bool AArch64InstructionSelector::selectUnmergeValues(
MachineInstr &
I,
4107 assert(
I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
4108 "unexpected opcode");
4111 if (RBI.getRegBank(
I.getOperand(0).getReg(),
MRI,
TRI)->getID() !=
4112 AArch64::FPRRegBankID ||
4113 RBI.getRegBank(
I.getOperand(1).getReg(),
MRI,
TRI)->getID() !=
4114 AArch64::FPRRegBankID) {
4115 LLVM_DEBUG(
dbgs() <<
"Unmerging vector-to-gpr and scalar-to-scalar "
4116 "currently unsupported.\n");
4122 unsigned NumElts =
I.getNumOperands() - 1;
4123 Register SrcReg =
I.getOperand(NumElts).getReg();
4124 const LLT NarrowTy =
MRI.getType(
I.getOperand(0).getReg());
4125 const LLT WideTy =
MRI.getType(SrcReg);
4128 "can only unmerge from vector or s128 types!");
4130 "source register size too small!");
4133 return selectSplitVectorUnmerge(
I,
MRI);
4137 unsigned CopyOpc = 0;
4138 unsigned ExtractSubReg = 0;
4149 unsigned NumInsertRegs = NumElts - 1;
4161 *RBI.getRegBank(SrcReg,
MRI,
TRI));
4165 assert(Found &&
"expected to find last operand's subeg idx");
4166 for (
unsigned Idx = 0;
Idx < NumInsertRegs; ++
Idx) {
4167 Register ImpDefReg =
MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4169 *
BuildMI(
MBB,
I,
I.getDebugLoc(),
TII.get(TargetOpcode::IMPLICIT_DEF),
4173 Register InsertReg =
MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4176 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4193 Register CopyTo =
I.getOperand(0).getReg();
4194 auto FirstCopy = MIB.
buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4195 .addReg(InsertRegs[0], 0, ExtractSubReg);
4199 unsigned LaneIdx = 1;
4200 for (
Register InsReg : InsertRegs) {
4201 Register CopyTo =
I.getOperand(LaneIdx).getReg();
4214 MRI.getRegClassOrNull(
I.getOperand(1).getReg());
4220 RBI.constrainGenericRegister(CopyTo, *RC,
MRI);
4221 I.eraseFromParent();
4225bool AArch64InstructionSelector::selectConcatVectors(
4227 assert(
I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4228 "Unexpected opcode");
4229 Register Dst =
I.getOperand(0).getReg();
4230 Register Op1 =
I.getOperand(1).getReg();
4231 Register Op2 =
I.getOperand(2).getReg();
4232 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4235 I.eraseFromParent();
4240AArch64InstructionSelector::emitConstantPoolEntry(
const Constant *CPVal,
4249MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4257 RC = &AArch64::FPR128RegClass;
4258 Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;
4261 RC = &AArch64::FPR64RegClass;
4262 Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;
4265 RC = &AArch64::FPR32RegClass;
4266 Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;
4269 RC = &AArch64::FPR16RegClass;
4270 Opc = AArch64::LDRHui;
4273 LLVM_DEBUG(
dbgs() <<
"Could not load from constant pool of type "
4279 auto &MF = MIRBuilder.
getMF();
4280 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4281 if (IsTiny && (
Size == 16 ||
Size == 8 ||
Size == 4)) {
4283 LoadMI = &*MIRBuilder.
buildInstr(Opc, {RC}, {}).addConstantPoolIndex(CPIdx);
4286 MIRBuilder.
buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4289 LoadMI = &*MIRBuilder.
buildInstr(Opc, {RC}, {Adrp})
4290 .addConstantPoolIndex(
4306static std::pair<unsigned, unsigned>
4308 unsigned Opc, SubregIdx;
4309 if (RB.
getID() == AArch64::GPRRegBankID) {
4311 Opc = AArch64::INSvi8gpr;
4312 SubregIdx = AArch64::bsub;
4313 }
else if (EltSize == 16) {
4314 Opc = AArch64::INSvi16gpr;
4315 SubregIdx = AArch64::ssub;
4316 }
else if (EltSize == 32) {
4317 Opc = AArch64::INSvi32gpr;
4318 SubregIdx = AArch64::ssub;
4319 }
else if (EltSize == 64) {
4320 Opc = AArch64::INSvi64gpr;
4321 SubregIdx = AArch64::dsub;
4327 Opc = AArch64::INSvi8lane;
4328 SubregIdx = AArch64::bsub;
4329 }
else if (EltSize == 16) {
4330 Opc = AArch64::INSvi16lane;
4331 SubregIdx = AArch64::hsub;
4332 }
else if (EltSize == 32) {
4333 Opc = AArch64::INSvi32lane;
4334 SubregIdx = AArch64::ssub;
4335 }
else if (EltSize == 64) {
4336 Opc = AArch64::INSvi64lane;
4337 SubregIdx = AArch64::dsub;
4342 return std::make_pair(Opc, SubregIdx);
4346 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4348 const ComplexRendererFns &RenderFns)
const {
4349 assert(Opcode &&
"Expected an opcode?");
4351 "Function should only be used to produce selected instructions!");
4352 auto MI = MIRBuilder.
buildInstr(Opcode, DstOps, SrcOps);
4354 for (
auto &Fn : *RenderFns)
4361 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4365 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4366 auto Ty =
MRI.getType(
LHS.getReg());
4369 assert((
Size == 32 ||
Size == 64) &&
"Expected a 32-bit or 64-bit type only");
4370 bool Is32Bit =
Size == 32;
4373 if (
auto Fns = selectArithImmed(RHS))
4374 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {
LHS},
4378 if (
auto Fns = selectNegArithImmed(RHS))
4379 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {
LHS},
4383 if (
auto Fns = selectArithExtendedRegister(RHS))
4384 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {
LHS},
4388 if (
auto Fns = selectShiftedRegister(RHS))
4389 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {
LHS},
4391 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {
LHS,
RHS},
4399 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4400 {{AArch64::ADDXri, AArch64::ADDWri},
4401 {AArch64::ADDXrs, AArch64::ADDWrs},
4402 {AArch64::ADDXrr, AArch64::ADDWrr},
4403 {AArch64::SUBXri, AArch64::SUBWri},
4404 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4405 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4412 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4413 {{AArch64::ADDSXri, AArch64::ADDSWri},
4414 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4415 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4416 {AArch64::SUBSXri, AArch64::SUBSWri},
4417 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4418 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4425 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4426 {{AArch64::SUBSXri, AArch64::SUBSWri},
4427 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4428 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4429 {AArch64::ADDSXri, AArch64::ADDSWri},
4430 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4431 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4438 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4440 bool Is32Bit = (
MRI->getType(
LHS.getReg()).getSizeInBits() == 32);
4441 static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr};
4442 return emitInstr(OpcTable[Is32Bit], {Dst}, {
LHS,
RHS}, MIRBuilder);
4449 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4451 bool Is32Bit = (
MRI->getType(
LHS.getReg()).getSizeInBits() == 32);
4452 static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr};
4453 return emitInstr(OpcTable[Is32Bit], {Dst}, {
LHS,
RHS}, MIRBuilder);
4460 bool Is32Bit = (
MRI.getType(
LHS.getReg()).getSizeInBits() == 32);
4461 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4462 return emitADDS(
MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4468 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4472 bool Is32Bit = (
RegSize == 32);
4473 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4474 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4475 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4479 int64_t
Imm = ValAndVReg->Value.getSExtValue();
4482 auto TstMI = MIRBuilder.
buildInstr(OpcTable[0][Is32Bit], {Ty}, {
LHS});
4489 if (
auto Fns = selectLogicalShiftedRegister(RHS))
4490 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {
LHS}, MIRBuilder, Fns);
4491 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {
LHS,
RHS}, MIRBuilder);
4494MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4497 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected LHS and RHS to be registers!");
4504 assert((
Size == 32 ||
Size == 64) &&
"Expected a 32-bit or 64-bit LHS/RHS?");
4506 if (
auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4508 auto Dst =
MRI.cloneVirtualRegister(
LHS.getReg());
4509 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4512MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4516 LLT Ty =
MRI.getType(Dst);
4518 "Expected a 32-bit scalar register?");
4520 const Register ZReg = AArch64::WZR;
4525 return emitCSINC(Dst, ZReg, ZReg, InvCC1,
4531 emitCSINC(Def1Reg, ZReg, ZReg, InvCC1, MIRBuilder);
4532 emitCSINC(Def2Reg, ZReg, ZReg, InvCC2, MIRBuilder);
4533 auto OrMI = MIRBuilder.
buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4538MachineInstr *AArch64InstructionSelector::emitFPCompare(
4540 std::optional<CmpInst::Predicate> Pred)
const {
4542 LLT Ty =
MRI.getType(LHS);
4546 assert(OpSize == 16 || OpSize == 32 || OpSize == 64);
4557 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4561 ShouldUseImm =
true;
4565 unsigned CmpOpcTbl[2][3] = {
4566 {AArch64::FCMPHrr, AArch64::FCMPSrr, AArch64::FCMPDrr},
4567 {AArch64::FCMPHri, AArch64::FCMPSri, AArch64::FCMPDri}};
4569 CmpOpcTbl[ShouldUseImm][OpSize == 16 ? 0 : (OpSize == 32 ? 1 : 2)];
4581MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4590 const LLT Op1Ty =
MRI.getType(Op1);
4591 const LLT Op2Ty =
MRI.getType(Op2);
4593 if (Op1Ty != Op2Ty) {
4594 LLVM_DEBUG(
dbgs() <<
"Could not do vector concat of differing vector tys");
4597 assert(Op1Ty.
isVector() &&
"Expected a vector for vector concat");
4600 LLVM_DEBUG(
dbgs() <<
"Vector concat not supported for full size vectors");
4616 emitScalarToVector(ScalarTy.
getSizeInBits(), DstRC, Op1, MIRBuilder);
4618 emitScalarToVector(ScalarTy.
getSizeInBits(), DstRC, Op2, MIRBuilder);
4619 if (!WidenedOp1 || !WidenedOp2) {
4620 LLVM_DEBUG(
dbgs() <<
"Could not emit a vector from scalar value");
4625 unsigned InsertOpc, InsSubRegIdx;
4626 std::tie(InsertOpc, InsSubRegIdx) =
4630 Dst =
MRI.createVirtualRegister(DstRC);
4651 Size =
TRI.getRegSizeInBits(*RC);
4653 Size =
MRI.getType(Dst).getSizeInBits();
4655 assert(
Size <= 64 &&
"Expected 64 bits or less only!");
4656 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4657 unsigned Opc = OpcTable[
Size == 64];
4658 auto CSINC = MIRBuilder.
buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
4666 unsigned Opcode =
I.getOpcode();
4670 bool NeedsNegatedCarry =
4671 (Opcode == TargetOpcode::G_USUBE || Opcode == TargetOpcode::G_SSUBE);
4681 if (SrcMI ==
I.getPrevNode()) {
4682 if (
auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(SrcMI)) {
4683 bool ProducesNegatedCarry = CarrySrcMI->isSub();
4684 if (NeedsNegatedCarry == ProducesNegatedCarry &&
4685 CarrySrcMI->isUnsigned() &&
4686 CarrySrcMI->getCarryOutReg() == CarryReg &&
4687 selectAndRestoreState(*SrcMI))
4692 Register DeadReg =
MRI->createVirtualRegister(&AArch64::GPR32RegClass);
4694 if (NeedsNegatedCarry) {
4697 return emitInstr(AArch64::SUBSWrr, {DeadReg}, {ZReg, CarryReg}, MIB);
4701 auto Fns = select12BitValueWithLeftShift(1);
4702 return emitInstr(AArch64::SUBSWri, {DeadReg}, {CarryReg}, MIB, Fns);
4705bool AArch64InstructionSelector::selectOverflowOp(
MachineInstr &
I,
4707 auto &CarryMI = cast<GAddSubCarryOut>(
I);
4709 if (
auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(&
I)) {
4711 emitCarryIn(
I, CarryInMI->getCarryInReg());
4715 auto OpAndCC = emitOverflowOp(
I.getOpcode(), CarryMI.getDstReg(),
4716 CarryMI.getLHS(), CarryMI.getRHS(), MIB);
4718 Register CarryOutReg = CarryMI.getCarryOutReg();
4721 if (!
MRI.use_nodbg_empty(CarryOutReg)) {
4727 emitCSINC(CarryOutReg, ZReg, ZReg,
4728 getInvertedCondCode(OpAndCC.second), MIB);
4731 I.eraseFromParent();
4735std::pair<MachineInstr *, AArch64CC::CondCode>
4736AArch64InstructionSelector::emitOverflowOp(
unsigned Opcode,
Register Dst,
4743 case TargetOpcode::G_SADDO:
4744 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::VS);
4745 case TargetOpcode::G_UADDO:
4746 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::HS);
4747 case TargetOpcode::G_SSUBO:
4748 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::VS);
4749 case TargetOpcode::G_USUBO:
4750 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::LO);
4751 case TargetOpcode::G_SADDE:
4752 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::VS);
4753 case TargetOpcode::G_UADDE:
4754 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::HS);
4755 case TargetOpcode::G_SSUBE:
4756 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::VS);
4757 case TargetOpcode::G_USUBE:
4758 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::LO);
4778 unsigned Depth = 0) {
4779 if (!
MRI.hasOneNonDBGUse(Val))
4783 if (isa<GAnyCmp>(ValDef)) {
4785 MustBeFirst =
false;
4791 if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
4792 bool IsOR = Opcode == TargetOpcode::G_OR;
4804 if (MustBeFirstL && MustBeFirstR)
4810 if (!CanNegateL && !CanNegateR)
4814 CanNegate = WillNegate && CanNegateL && CanNegateR;
4817 MustBeFirst = !CanNegate;
4819 assert(Opcode == TargetOpcode::G_AND &&
"Must be G_AND");
4822 MustBeFirst = MustBeFirstL || MustBeFirstR;
4829MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
4835 LLT OpTy =
MRI.getType(LHS);
4837 std::optional<ValueAndVReg>
C;
4841 if (
C &&
C->Value.ult(32))
4842 CCmpOpc = OpTy.
getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi;
4844 CCmpOpc = OpTy.
getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
4850 assert(STI.hasFullFP16() &&
"Expected Full FP16 for fp16 comparisons");
4851 CCmpOpc = AArch64::FCCMPHrr;
4854 CCmpOpc = AArch64::FCCMPSrr;
4857 CCmpOpc = AArch64::FCCMPDrr;
4867 if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi)
4868 CCmp.
addImm(
C->Value.getZExtValue());
4876MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
4883 if (
auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {
4889 if (isa<GICmp>(Cmp)) {
4900 ExtraCmp = emitFPCompare(LHS, RHS, MIB,
CC);
4911 auto Dst =
MRI.cloneVirtualRegister(LHS);
4912 if (isa<GICmp>(Cmp))
4913 return emitSUBS(Dst,
Cmp->getOperand(2),
Cmp->getOperand(3), MIB);
4914 return emitFPCompare(
Cmp->getOperand(2).getReg(),
4915 Cmp->getOperand(3).getReg(), MIB);
4920 assert(
MRI.hasOneNonDBGUse(Val) &&
"Valid conjunction/disjunction tree");
4922 bool IsOR = Opcode == TargetOpcode::G_OR;
4928 assert(ValidL &&
"Valid conjunction/disjunction tree");
4935 assert(ValidR &&
"Valid conjunction/disjunction tree");
4940 assert(!MustBeFirstR &&
"Valid conjunction/disjunction tree");
4949 bool NegateAfterAll;
4950 if (Opcode == TargetOpcode::G_OR) {
4953 assert(CanNegateR &&
"at least one side must be negatable");
4954 assert(!MustBeFirstR &&
"invalid conjunction/disjunction tree");
4958 NegateAfterR =
true;
4961 NegateR = CanNegateR;
4962 NegateAfterR = !CanNegateR;
4965 NegateAfterAll = !Negate;
4967 assert(Opcode == TargetOpcode::G_AND &&
4968 "Valid conjunction/disjunction tree");
4969 assert(!Negate &&
"Valid conjunction/disjunction tree");
4973 NegateAfterR =
false;
4974 NegateAfterAll =
false;
4990MachineInstr *AArch64InstructionSelector::emitConjunction(
4992 bool DummyCanNegate;
4993 bool DummyMustBeFirst;
5000bool AArch64InstructionSelector::tryOptSelectConjunction(
GSelect &SelI,
5012bool AArch64InstructionSelector::tryOptSelect(
GSelect &
I) {
5036 if (!
MRI.hasOneNonDBGUse(CondDefReg)) {
5038 for (
const MachineInstr &UI :
MRI.use_nodbg_instructions(CondDefReg)) {
5041 if (UI.getOpcode() != TargetOpcode::G_SELECT)
5047 unsigned CondOpc = CondDef->
getOpcode();
5048 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
5049 if (tryOptSelectConjunction(
I, *CondDef))
5055 if (CondOpc == TargetOpcode::G_ICMP) {
5083 emitSelect(
I.getOperand(0).getReg(),
I.getOperand(2).getReg(),
5084 I.getOperand(3).getReg(), CondCode, MIB);
5085 I.eraseFromParent();
5089MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
5093 "Unexpected MachineOperand");
5130 return emitCMN(LHS, RHSDef->
getOperand(2), MIRBuilder);
5141 LHSDef->
getOpcode() == TargetOpcode::G_AND) {
5144 if (!ValAndVReg || ValAndVReg->Value != 0)
5154bool AArch64InstructionSelector::selectShuffleVector(
5156 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
5157 Register Src1Reg =
I.getOperand(1).getReg();
5158 const LLT Src1Ty =
MRI.getType(Src1Reg);
5159 Register Src2Reg =
I.getOperand(2).getReg();
5160 const LLT Src2Ty =
MRI.getType(Src2Reg);
5171 LLVM_DEBUG(
dbgs() <<
"Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
5178 for (
int Val : Mask) {
5181 Val = Val < 0 ? 0 : Val;
5182 for (
unsigned Byte = 0;
Byte < BytesPerElt; ++
Byte) {
5200 emitVectorConcat(std::nullopt, Src1Reg, Src2Reg, MIB);
5207 IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
5211 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
5217 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
5218 RBI.constrainGenericRegister(
Copy.getReg(0), AArch64::FPR64RegClass,
MRI);
5219 I.eraseFromParent();
5227 auto TBL2 = MIB.
buildInstr(AArch64::TBLv16i8Two, {
I.getOperand(0)},
5230 I.eraseFromParent();
5234MachineInstr *AArch64InstructionSelector::emitLaneInsert(
5244 DstReg =
MRI.createVirtualRegister(DstRC);
5246 unsigned EltSize =
MRI.getType(EltReg).getSizeInBits();
5249 if (RB.
getID() == AArch64::FPRRegBankID) {
5250 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
5251 InsElt = MIRBuilder.
buildInstr(Opc, {*DstReg}, {SrcReg})
5253 .
addUse(InsSub->getOperand(0).getReg())
5256 InsElt = MIRBuilder.
buildInstr(Opc, {*DstReg}, {SrcReg})
5265bool AArch64InstructionSelector::selectUSMovFromExtend(
5267 if (
MI.getOpcode() != TargetOpcode::G_SEXT &&
5268 MI.getOpcode() != TargetOpcode::G_ZEXT &&
5269 MI.getOpcode() != TargetOpcode::G_ANYEXT)
5271 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SEXT;
5272 const Register DefReg =
MI.getOperand(0).getReg();
5273 const LLT DstTy =
MRI.getType(DefReg);
5276 if (DstSize != 32 && DstSize != 64)
5280 MI.getOperand(1).getReg(),
MRI);
5286 const LLT &VecTy =
MRI.getType(Src0);
5289 const MachineInstr *ScalarToVector = emitScalarToVector(
5290 VecTy.
getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
5291 assert(ScalarToVector &&
"Didn't expect emitScalarToVector to fail!");
5297 Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
5299 Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
5301 Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
5303 Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
5305 Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
5314 if (DstSize == 64 && !IsSigned) {
5315 Register NewReg =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
5316 MIB.
buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
5317 ExtI = MIB.
buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
5320 .
addImm(AArch64::sub_32);
5321 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
MRI);
5323 ExtI = MIB.
buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
5326 MI.eraseFromParent();
5330MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8(
5333 if (DstSize == 128) {
5334 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5336 Op = AArch64::MOVIv16b_ns;
5338 Op = AArch64::MOVIv8b_ns;
5345 auto Mov = Builder.
buildInstr(
Op, {Dst}, {}).addImm(Val);
5352MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm16(
5357 if (DstSize == 128) {
5358 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5360 Op = Inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16;
5362 Op = Inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16;
5382MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm32(
5387 if (DstSize == 128) {
5388 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5390 Op = Inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32;
5392 Op = Inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32;
5418MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64(
5422 if (DstSize == 128) {
5423 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5425 Op = AArch64::MOVIv2d_ns;
5427 Op = AArch64::MOVID;
5433 auto Mov = Builder.
buildInstr(
Op, {Dst}, {}).addImm(Val);
5440MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm321s(
5445 if (DstSize == 128) {
5446 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5448 Op = Inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl;
5450 Op = Inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl;
5470MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(
5474 bool IsWide =
false;
5475 if (DstSize == 128) {
5476 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5478 Op = AArch64::FMOVv4f32_ns;
5481 Op = AArch64::FMOVv2f32_ns;
5490 Op = AArch64::FMOVv2f64_ns;
5494 auto Mov = Builder.
buildInstr(
Op, {Dst}, {}).addImm(Val);
5499bool AArch64InstructionSelector::selectIndexedExtLoad(
5501 auto &ExtLd = cast<GIndexedAnyExtLoad>(
MI);
5503 Register WriteBack = ExtLd.getWritebackReg();
5506 LLT Ty =
MRI.getType(Dst);
5508 unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits();
5509 bool IsPre = ExtLd.isPre();
5510 bool IsSExt = isa<GIndexedSExtLoad>(ExtLd);
5511 bool InsertIntoXReg =
false;
5519 if (MemSizeBits == 8) {
5522 Opc = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
5524 Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
5525 NewLdDstTy = IsDst64 ? s64 : s32;
5527 Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
5528 InsertIntoXReg = IsDst64;
5531 }
else if (MemSizeBits == 16) {
5534 Opc = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
5536 Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
5537 NewLdDstTy = IsDst64 ? s64 : s32;
5539 Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
5540 InsertIntoXReg = IsDst64;
5543 }
else if (MemSizeBits == 32) {
5545 Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
5548 Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
5549 InsertIntoXReg = IsDst64;
5556 if (RBI.getRegBank(Dst,
MRI,
TRI)->getID() == AArch64::FPRRegBankID)
5564 .addImm(Cst->getSExtValue());
5569 if (InsertIntoXReg) {
5571 auto SubToReg = MIB.
buildInstr(TargetOpcode::SUBREG_TO_REG, {Dst}, {})
5574 .
addImm(AArch64::sub_32);
5575 RBI.constrainGenericRegister(SubToReg.getReg(0), AArch64::GPR64RegClass,
5581 MI.eraseFromParent();
5586bool AArch64InstructionSelector::selectIndexedLoad(
MachineInstr &
MI,
5588 auto &Ld = cast<GIndexedLoad>(
MI);
5590 Register WriteBack = Ld.getWritebackReg();
5593 assert(
MRI.getType(Dst).getSizeInBits() <= 128 &&
5594 "Unexpected type for indexed load");
5595 unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes();
5597 if (MemSize <
MRI.getType(Dst).getSizeInBytes())
5598 return selectIndexedExtLoad(
MI,
MRI);
5602 static constexpr unsigned GPROpcodes[] = {
5603 AArch64::LDRBBpre, AArch64::LDRHHpre, AArch64::LDRWpre,
5605 static constexpr unsigned FPROpcodes[] = {
5606 AArch64::LDRBpre, AArch64::LDRHpre, AArch64::LDRSpre, AArch64::LDRDpre,
5608 if (RBI.getRegBank(Dst,
MRI,
TRI)->getID() == AArch64::FPRRegBankID)
5609 Opc = FPROpcodes[
Log2_32(MemSize)];
5611 Opc = GPROpcodes[
Log2_32(MemSize)];
5613 static constexpr unsigned GPROpcodes[] = {
5614 AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,
5616 static constexpr unsigned FPROpcodes[] = {
5617 AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,
5618 AArch64::LDRDpost, AArch64::LDRQpost};
5619 if (RBI.getRegBank(Dst,
MRI,
TRI)->getID() == AArch64::FPRRegBankID)
5620 Opc = FPROpcodes[
Log2_32(MemSize)];
5622 Opc = GPROpcodes[
Log2_32(MemSize)];
5628 MIB.
buildInstr(Opc, {WriteBack, Dst}, {
Base}).addImm(Cst->getSExtValue());
5631 MI.eraseFromParent();
5635bool AArch64InstructionSelector::selectIndexedStore(
GIndexedStore &
I,
5641 LLT ValTy =
MRI.getType(Val);
5646 static constexpr unsigned GPROpcodes[] = {
5647 AArch64::STRBBpre, AArch64::STRHHpre, AArch64::STRWpre,
5649 static constexpr unsigned FPROpcodes[] = {
5650 AArch64::STRBpre, AArch64::STRHpre, AArch64::STRSpre, AArch64::STRDpre,
5653 if (RBI.getRegBank(Val,
MRI,
TRI)->getID() == AArch64::FPRRegBankID)
5658 static constexpr unsigned GPROpcodes[] = {
5659 AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,
5661 static constexpr unsigned FPROpcodes[] = {
5662 AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost,
5663 AArch64::STRDpost, AArch64::STRQpost};
5665 if (RBI.getRegBank(Val,
MRI,
TRI)->getID() == AArch64::FPRRegBankID)
5675 MIB.
buildInstr(Opc, {Dst}, {Val,
Base}).addImm(Cst->getSExtValue());
5676 Str.cloneMemRefs(
I);
5678 I.eraseFromParent();
5686 LLT DstTy =
MRI.getType(Dst);
5689 if (DstSize == 128) {
5691 MIRBuilder.
buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
5696 if (DstSize == 64) {
5699 .
buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
5702 .addReg(Mov.getReg(0), 0, AArch64::dsub);
5703 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass,
MRI);
5735 if (
auto *NewOp = TryMOVIWithBits(DefBits))
5739 auto TryWithFNeg = [&](
APInt DefBits,
int NumBits,
5743 APInt NegBits(DstSize, 0);
5744 unsigned NumElts = DstSize / NumBits;
5745 for (
unsigned i = 0; i < NumElts; i++)
5746 NegBits |= Neg << (NumBits * i);
5747 NegBits = DefBits ^ NegBits;
5751 if (
auto *NewOp = TryMOVIWithBits(NegBits)) {
5752 Register NewDst =
MRI.createVirtualRegister(&AArch64::FPR128RegClass);
5754 return MIRBuilder.
buildInstr(NegOpc, {Dst}, {NewDst});
5759 if ((R = TryWithFNeg(DefBits, 32, AArch64::FNEGv4f32)) ||
5760 (R = TryWithFNeg(DefBits, 64, AArch64::FNEGv2f64)) ||
5761 (STI.hasFullFP16() &&
5762 (R = TryWithFNeg(DefBits, 16, AArch64::FNEGv8f16))))
5768 LLVM_DEBUG(
dbgs() <<
"Could not generate cp load for constant vector!");
5772 auto Copy = MIRBuilder.
buildCopy(Dst, CPLoad->getOperand(0));
5773 RBI.constrainGenericRegister(
5774 Dst, *
MRI.getRegClass(CPLoad->getOperand(0).getReg()),
MRI);
5778bool AArch64InstructionSelector::tryOptConstantBuildVec(
5780 assert(
I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5782 assert(DstSize <= 128 &&
"Unexpected build_vec type!");
5788 for (
unsigned Idx = 1;
Idx <
I.getNumOperands(); ++
Idx) {
5794 const_cast<ConstantInt *
>(OpMI->getOperand(1).getCImm()));
5795 else if ((OpMI =
getOpcodeDef(TargetOpcode::G_FCONSTANT,
5796 I.getOperand(
Idx).getReg(),
MRI)))
5798 const_cast<ConstantFP *
>(OpMI->getOperand(1).getFPImm()));
5803 if (!emitConstantVector(
I.getOperand(0).getReg(), CV, MIB,
MRI))
5805 I.eraseFromParent();
5809bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5815 Register Dst =
I.getOperand(0).getReg();
5816 Register EltReg =
I.getOperand(1).getReg();
5817 LLT EltTy =
MRI.getType(EltReg);
5825 return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(), MRI);
5833 getRegClassForTypeOnBank(
MRI.getType(Dst), DstRB);
5838 auto SubregToReg = MIB.
buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
5842 I.eraseFromParent();
5844 return RBI.constrainGenericRegister(Dst, *DstRC,
MRI);
5847bool AArch64InstructionSelector::selectBuildVector(
MachineInstr &
I,
5849 assert(
I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5852 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
5853 const LLT EltTy =
MRI.getType(
I.getOperand(1).getReg());
5856 if (tryOptConstantBuildVec(
I, DstTy,
MRI))
5858 if (tryOptBuildVecToSubregToReg(
I,
MRI))
5861 if (EltSize != 8 && EltSize != 16 && EltSize != 32 && EltSize != 64)
5868 I.getOperand(1).getReg(), MIB);
5878 for (
unsigned i = 2, e = DstSize / EltSize + 1; i <
e; ++i) {
5881 Register OpReg =
I.getOperand(i).getReg();
5883 if (!getOpcodeDef<GImplicitDef>(OpReg,
MRI)) {
5884 PrevMI = &*emitLaneInsert(std::nullopt, DstVec, OpReg, i - 1, RB, MIB);
5891 if (DstSize < 128) {
5894 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec,
MRI,
TRI));
5897 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5905 if (
SubReg != AArch64::ssub &&
SubReg != AArch64::dsub) {
5906 LLVM_DEBUG(
dbgs() <<
"Unsupported destination size! (" << DstSize
5912 Register DstReg =
I.getOperand(0).getReg();
5914 MIB.
buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0,
SubReg);
5917 RBI.constrainGenericRegister(DstReg, *RC,
MRI);
5935 if (PrevMI == ScalarToVec && DstReg.
isVirtual()) {
5937 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec,
MRI,
TRI));
5938 RBI.constrainGenericRegister(DstReg, *RC,
MRI);
5942 I.eraseFromParent();
5946bool AArch64InstructionSelector::selectVectorLoadIntrinsic(
unsigned Opc,
5949 assert(
I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5950 assert(Opc &&
"Expected an opcode?");
5951 assert(NumVecs > 1 && NumVecs < 5 &&
"Only support 2, 3, or 4 vectors");
5953 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
5956 "Destination must be 64 bits or 128 bits?");
5957 unsigned SubReg =
Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
5958 auto Ptr =
I.getOperand(
I.getNumOperands() - 1).getReg();
5959 assert(
MRI.getType(
Ptr).isPointer() &&
"Expected a pointer type?");
5961 Load.cloneMemRefs(
I);
5963 Register SelectedLoadDst =
Load->getOperand(0).getReg();
5964 for (
unsigned Idx = 0;
Idx < NumVecs; ++
Idx) {
5965 auto Vec = MIB.
buildInstr(TargetOpcode::COPY, {
I.getOperand(
Idx)}, {})
5966 .addReg(SelectedLoadDst, 0,
SubReg +
Idx);
5975bool AArch64InstructionSelector::selectVectorLoadLaneIntrinsic(
5977 assert(
I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5978 assert(Opc &&
"Expected an opcode?");
5979 assert(NumVecs > 1 && NumVecs < 5 &&
"Only support 2, 3, or 4 vectors");
5981 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
5984 auto FirstSrcRegIt =
I.operands_begin() + NumVecs + 1;
5986 std::transform(FirstSrcRegIt, FirstSrcRegIt + NumVecs, Regs.
begin(),
5987 [](
auto MO) { return MO.getReg(); });
5991 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
6006 .
addImm(LaneNo->getZExtValue())
6008 Load.cloneMemRefs(
I);
6010 Register SelectedLoadDst =
Load->getOperand(0).getReg();
6011 unsigned SubReg = AArch64::qsub0;
6012 for (
unsigned Idx = 0;
Idx < NumVecs; ++
Idx) {
6013 auto Vec = MIB.
buildInstr(TargetOpcode::COPY,
6014 {Narrow ?
DstOp(&AArch64::FPR128RegClass)
6017 .addReg(SelectedLoadDst, 0,
SubReg +
Idx);
6022 !emitNarrowVector(
I.getOperand(
Idx).getReg(), WideReg, MIB,
MRI))
6028void AArch64InstructionSelector::selectVectorStoreIntrinsic(
MachineInstr &
I,
6032 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6036 std::transform(
I.operands_begin() + 1,
I.operands_begin() + 1 + NumVecs,
6037 Regs.
begin(), [](
auto MO) { return MO.getReg(); });
6046bool AArch64InstructionSelector::selectVectorStoreLaneIntrinsic(
6049 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6053 std::transform(
I.operands_begin() + 1,
I.operands_begin() + 1 + NumVecs,
6054 Regs.
begin(), [](
auto MO) { return MO.getReg(); });
6058 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
6071 .
addImm(LaneNo->getZExtValue())
6078bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
6081 unsigned IntrinID = cast<GIntrinsic>(
I).getIntrinsicID();
6092 case Intrinsic::aarch64_ldxp:
6093 case Intrinsic::aarch64_ldaxp: {
6095 IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
6096 {
I.getOperand(0).
getReg(),
I.getOperand(1).getReg()},
6102 case Intrinsic::aarch64_neon_ld1x2: {
6103 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6106 Opc = AArch64::LD1Twov8b;
6108 Opc = AArch64::LD1Twov16b;
6110 Opc = AArch64::LD1Twov4h;
6112 Opc = AArch64::LD1Twov8h;
6114 Opc = AArch64::LD1Twov2s;
6116 Opc = AArch64::LD1Twov4s;
6118 Opc = AArch64::LD1Twov2d;
6119 else if (Ty ==
S64 || Ty == P0)
6120 Opc = AArch64::LD1Twov1d;
6123 selectVectorLoadIntrinsic(Opc, 2,
I);
6126 case Intrinsic::aarch64_neon_ld1x3: {
6127 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6130 Opc = AArch64::LD1Threev8b;
6132 Opc = AArch64::LD1Threev16b;
6134 Opc = AArch64::LD1Threev4h;
6136 Opc = AArch64::LD1Threev8h;
6138 Opc = AArch64::LD1Threev2s;
6140 Opc = AArch64::LD1Threev4s;
6142 Opc = AArch64::LD1Threev2d;
6143 else if (Ty ==
S64 || Ty == P0)
6144 Opc = AArch64::LD1Threev1d;
6147 selectVectorLoadIntrinsic(Opc, 3,
I);
6150 case Intrinsic::aarch64_neon_ld1x4: {
6151 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6154 Opc = AArch64::LD1Fourv8b;
6156 Opc = AArch64::LD1Fourv16b;
6158 Opc = AArch64::LD1Fourv4h;
6160 Opc = AArch64::LD1Fourv8h;
6162 Opc = AArch64::LD1Fourv2s;
6164 Opc = AArch64::LD1Fourv4s;
6166 Opc = AArch64::LD1Fourv2d;
6167 else if (Ty ==
S64 || Ty == P0)
6168 Opc = AArch64::LD1Fourv1d;
6171 selectVectorLoadIntrinsic(Opc, 4,
I);
6174 case Intrinsic::aarch64_neon_ld2: {
6175 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6178 Opc = AArch64::LD2Twov8b;
6180 Opc = AArch64::LD2Twov16b;
6182 Opc = AArch64::LD2Twov4h;
6184 Opc = AArch64::LD2Twov8h;
6186 Opc = AArch64::LD2Twov2s;
6188 Opc = AArch64::LD2Twov4s;
6190 Opc = AArch64::LD2Twov2d;
6191 else if (Ty ==
S64 || Ty == P0)
6192 Opc = AArch64::LD1Twov1d;
6195 selectVectorLoadIntrinsic(Opc, 2,
I);
6198 case Intrinsic::aarch64_neon_ld2lane: {
6199 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6202 Opc = AArch64::LD2i8;
6204 Opc = AArch64::LD2i16;
6206 Opc = AArch64::LD2i32;
6209 Opc = AArch64::LD2i64;
6212 if (!selectVectorLoadLaneIntrinsic(Opc, 2,
I))
6216 case Intrinsic::aarch64_neon_ld2r: {
6217 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6220 Opc = AArch64::LD2Rv8b;
6222 Opc = AArch64::LD2Rv16b;
6224 Opc = AArch64::LD2Rv4h;
6226 Opc = AArch64::LD2Rv8h;
6228 Opc = AArch64::LD2Rv2s;
6230 Opc = AArch64::LD2Rv4s;
6232 Opc = AArch64::LD2Rv2d;
6233 else if (Ty ==
S64 || Ty == P0)
6234 Opc = AArch64::LD2Rv1d;
6237 selectVectorLoadIntrinsic(Opc, 2,
I);
6240 case Intrinsic::aarch64_neon_ld3: {
6241 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6244 Opc = AArch64::LD3Threev8b;
6246 Opc = AArch64::LD3Threev16b;
6248 Opc = AArch64::LD3Threev4h;
6250 Opc = AArch64::LD3Threev8h;
6252 Opc = AArch64::LD3Threev2s;
6254 Opc = AArch64::LD3Threev4s;
6256 Opc = AArch64::LD3Threev2d;
6257 else if (Ty ==
S64 || Ty == P0)
6258 Opc = AArch64::LD1Threev1d;
6261 selectVectorLoadIntrinsic(Opc, 3,
I);
6264 case Intrinsic::aarch64_neon_ld3lane: {
6265 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6268 Opc = AArch64::LD3i8;
6270 Opc = AArch64::LD3i16;
6272 Opc = AArch64::LD3i32;
6275 Opc = AArch64::LD3i64;
6278 if (!selectVectorLoadLaneIntrinsic(Opc, 3,
I))
6282 case Intrinsic::aarch64_neon_ld3r: {
6283 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6286 Opc = AArch64::LD3Rv8b;
6288 Opc = AArch64::LD3Rv16b;
6290 Opc = AArch64::LD3Rv4h;
6292 Opc = AArch64::LD3Rv8h;
6294 Opc = AArch64::LD3Rv2s;
6296 Opc = AArch64::LD3Rv4s;
6298 Opc = AArch64::LD3Rv2d;
6299 else if (Ty ==
S64 || Ty == P0)
6300 Opc = AArch64::LD3Rv1d;
6303 selectVectorLoadIntrinsic(Opc, 3,
I);
6306 case Intrinsic::aarch64_neon_ld4: {
6307 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6310 Opc = AArch64::LD4Fourv8b;
6312 Opc = AArch64::LD4Fourv16b;
6314 Opc = AArch64::LD4Fourv4h;
6316 Opc = AArch64::LD4Fourv8h;
6318 Opc = AArch64::LD4Fourv2s;
6320 Opc = AArch64::LD4Fourv4s;
6322 Opc = AArch64::LD4Fourv2d;
6323 else if (Ty ==
S64 || Ty == P0)
6324 Opc = AArch64::LD1Fourv1d;
6327 selectVectorLoadIntrinsic(Opc, 4,
I);
6330 case Intrinsic::aarch64_neon_ld4lane: {
6331 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6334 Opc = AArch64::LD4i8;
6336 Opc = AArch64::LD4i16;
6338 Opc = AArch64::LD4i32;
6341 Opc = AArch64::LD4i64;
6344 if (!selectVectorLoadLaneIntrinsic(Opc, 4,
I))
6348 case Intrinsic::aarch64_neon_ld4r: {
6349 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6352 Opc = AArch64::LD4Rv8b;
6354 Opc = AArch64::LD4Rv16b;
6356 Opc = AArch64::LD4Rv4h;
6358 Opc = AArch64::LD4Rv8h;
6360 Opc = AArch64::LD4Rv2s;
6362 Opc = AArch64::LD4Rv4s;
6364 Opc = AArch64::LD4Rv2d;
6365 else if (Ty ==
S64 || Ty == P0)
6366 Opc = AArch64::LD4Rv1d;
6369 selectVectorLoadIntrinsic(Opc, 4,
I);
6372 case Intrinsic::aarch64_neon_st1x2: {
6373 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6376 Opc = AArch64::ST1Twov8b;
6378 Opc = AArch64::ST1Twov16b;
6380 Opc = AArch64::ST1Twov4h;
6382 Opc = AArch64::ST1Twov8h;
6384 Opc = AArch64::ST1Twov2s;
6386 Opc = AArch64::ST1Twov4s;
6388 Opc = AArch64::ST1Twov2d;
6389 else if (Ty ==
S64 || Ty == P0)
6390 Opc = AArch64::ST1Twov1d;
6393 selectVectorStoreIntrinsic(
I, 2, Opc);
6396 case Intrinsic::aarch64_neon_st1x3: {
6397 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6400 Opc = AArch64::ST1Threev8b;
6402 Opc = AArch64::ST1Threev16b;
6404 Opc = AArch64::ST1Threev4h;
6406 Opc = AArch64::ST1Threev8h;
6408 Opc = AArch64::ST1Threev2s;
6410 Opc = AArch64::ST1Threev4s;
6412 Opc = AArch64::ST1Threev2d;
6413 else if (Ty ==
S64 || Ty == P0)
6414 Opc = AArch64::ST1Threev1d;
6417 selectVectorStoreIntrinsic(
I, 3, Opc);
6420 case Intrinsic::aarch64_neon_st1x4: {
6421 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6424 Opc = AArch64::ST1Fourv8b;
6426 Opc = AArch64::ST1Fourv16b;
6428 Opc = AArch64::ST1Fourv4h;
6430 Opc = AArch64::ST1Fourv8h;
6432 Opc = AArch64::ST1Fourv2s;
6434 Opc = AArch64::ST1Fourv4s;
6436 Opc = AArch64::ST1Fourv2d;
6437 else if (Ty ==
S64 || Ty == P0)
6438 Opc = AArch64::ST1Fourv1d;
6441 selectVectorStoreIntrinsic(
I, 4, Opc);
6444 case Intrinsic::aarch64_neon_st2: {
6445 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6448 Opc = AArch64::ST2Twov8b;
6450 Opc = AArch64::ST2Twov16b;
6452 Opc = AArch64::ST2Twov4h;
6454 Opc = AArch64::ST2Twov8h;
6456 Opc = AArch64::ST2Twov2s;
6458 Opc = AArch64::ST2Twov4s;
6460 Opc = AArch64::ST2Twov2d;
6461 else if (Ty ==
S64 || Ty == P0)
6462 Opc = AArch64::ST1Twov1d;
6465 selectVectorStoreIntrinsic(
I, 2, Opc);
6468 case Intrinsic::aarch64_neon_st3: {
6469 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6472 Opc = AArch64::ST3Threev8b;
6474 Opc = AArch64::ST3Threev16b;
6476 Opc = AArch64::ST3Threev4h;
6478 Opc = AArch64::ST3Threev8h;
6480 Opc = AArch64::ST3Threev2s;
6482 Opc = AArch64::ST3Threev4s;
6484 Opc = AArch64::ST3Threev2d;
6485 else if (Ty ==
S64 || Ty == P0)
6486 Opc = AArch64::ST1Threev1d;
6489 selectVectorStoreIntrinsic(
I, 3, Opc);
6492 case Intrinsic::aarch64_neon_st4: {
6493 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6496 Opc = AArch64::ST4Fourv8b;
6498 Opc = AArch64::ST4Fourv16b;
6500 Opc = AArch64::ST4Fourv4h;
6502 Opc = AArch64::ST4Fourv8h;
6504 Opc = AArch64::ST4Fourv2s;
6506 Opc = AArch64::ST4Fourv4s;
6508 Opc = AArch64::ST4Fourv2d;
6509 else if (Ty ==
S64 || Ty == P0)
6510 Opc = AArch64::ST1Fourv1d;
6513 selectVectorStoreIntrinsic(
I, 4, Opc);
6516 case Intrinsic::aarch64_neon_st2lane: {
6517 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6520 Opc = AArch64::ST2i8;
6522 Opc = AArch64::ST2i16;
6524 Opc = AArch64::ST2i32;
6527 Opc = AArch64::ST2i64;
6530 if (!selectVectorStoreLaneIntrinsic(
I, 2, Opc))
6534 case Intrinsic::aarch64_neon_st3lane: {
6535 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6538 Opc = AArch64::ST3i8;
6540 Opc = AArch64::ST3i16;
6542 Opc = AArch64::ST3i32;
6545 Opc = AArch64::ST3i64;
6548 if (!selectVectorStoreLaneIntrinsic(
I, 3, Opc))
6552 case Intrinsic::aarch64_neon_st4lane: {
6553 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6556 Opc = AArch64::ST4i8;
6558 Opc = AArch64::ST4i16;
6560 Opc = AArch64::ST4i32;
6563 Opc = AArch64::ST4i64;
6566 if (!selectVectorStoreLaneIntrinsic(
I, 4, Opc))
6570 case Intrinsic::aarch64_mops_memset_tag: {
6583 Register DstDef =
I.getOperand(0).getReg();
6585 Register DstUse =
I.getOperand(2).getReg();
6586 Register ValUse =
I.getOperand(3).getReg();
6587 Register SizeUse =
I.getOperand(4).getReg();
6594 auto Memset = MIB.
buildInstr(AArch64::MOPSMemorySetTaggingPseudo,
6595 {DstDef, SizeDef}, {DstUse, SizeUse, ValUse});
6602 I.eraseFromParent();
6606bool AArch64InstructionSelector::selectIntrinsic(
MachineInstr &
I,
6608 unsigned IntrinID = cast<GIntrinsic>(
I).getIntrinsicID();
6613 case Intrinsic::aarch64_crypto_sha1h: {
6614 Register DstReg =
I.getOperand(0).getReg();
6615 Register SrcReg =
I.getOperand(2).getReg();
6618 if (
MRI.getType(DstReg).getSizeInBits() != 32 ||
6619 MRI.getType(SrcReg).getSizeInBits() != 32)
6624 if (RBI.getRegBank(SrcReg,
MRI,
TRI)->getID() != AArch64::FPRRegBankID) {
6625 SrcReg =
MRI.createVirtualRegister(&AArch64::FPR32RegClass);
6629 RBI.constrainGenericRegister(
I.getOperand(2).getReg(),
6630 AArch64::GPR32RegClass,
MRI);
6633 if (RBI.getRegBank(DstReg,
MRI,
TRI)->getID() != AArch64::FPRRegBankID)
6634 DstReg =
MRI.createVirtualRegister(&AArch64::FPR32RegClass);
6637 auto SHA1Inst = MIB.
buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
6641 if (DstReg !=
I.getOperand(0).getReg()) {
6645 RBI.constrainGenericRegister(
I.getOperand(0).getReg(),
6646 AArch64::GPR32RegClass,
MRI);
6649 I.eraseFromParent();
6652 case Intrinsic::frameaddress:
6653 case Intrinsic::returnaddress: {
6657 unsigned Depth =
I.getOperand(2).getImm();
6658 Register DstReg =
I.getOperand(0).getReg();
6659 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass,
MRI);
6661 if (
Depth == 0 && IntrinID == Intrinsic::returnaddress) {
6662 if (!MFReturnAddr) {
6667 MF,
TII, AArch64::LR, AArch64::GPR64RegClass,
I.getDebugLoc());
6670 if (STI.hasPAuth()) {
6671 MIB.
buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
6678 I.eraseFromParent();
6685 Register NextFrame =
MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
6687 MIB.
buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0);
6689 FrameAddr = NextFrame;
6692 if (IntrinID == Intrinsic::frameaddress)
6697 if (STI.hasPAuth()) {
6698 Register TmpReg =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
6699 MIB.
buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
6700 MIB.
buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
6709 I.eraseFromParent();
6712 case Intrinsic::swift_async_context_addr:
6713 auto Sub = MIB.
buildInstr(AArch64::SUBXri, {
I.getOperand(0).getReg()},
6721 I.eraseFromParent();
6728AArch64InstructionSelector::selectShiftA_32(
const MachineOperand &Root)
const {
6730 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6731 return std::nullopt;
6732 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
6737AArch64InstructionSelector::selectShiftB_32(
const MachineOperand &Root)
const {
6739 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6740 return std::nullopt;
6746AArch64InstructionSelector::selectShiftA_64(
const MachineOperand &Root)
const {
6748 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
6749 return std::nullopt;
6750 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
6755AArch64InstructionSelector::selectShiftB_64(
const MachineOperand &Root)
const {
6757 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
6758 return std::nullopt;
6769AArch64InstructionSelector::select12BitValueWithLeftShift(
6772 if (Immed >> 12 == 0) {
6774 }
else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
6776 Immed = Immed >> 12;
6778 return std::nullopt;
6791AArch64InstructionSelector::selectArithImmed(
MachineOperand &Root)
const {
6798 if (MaybeImmed == std::nullopt)
6799 return std::nullopt;
6800 return select12BitValueWithLeftShift(*MaybeImmed);
6806AArch64InstructionSelector::selectNegArithImmed(
MachineOperand &Root)
const {
6810 return std::nullopt;
6812 if (MaybeImmed == std::nullopt)
6813 return std::nullopt;
6820 return std::nullopt;
6825 if (
MRI.getType(Root.
getReg()).getSizeInBits() == 32)
6828 Immed = ~Immed + 1ULL;
6830 if (Immed & 0xFFFFFFFFFF000000ULL)
6831 return std::nullopt;
6833 Immed &= 0xFFFFFFULL;
6834 return select12BitValueWithLeftShift(Immed);
6840bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
6844 if (
MRI.hasOneNonDBGUse(DefReg) ||
6845 MI.getParent()->getParent()->getFunction().hasOptSize())
6854 return all_of(
MRI.use_nodbg_instructions(DefReg),
6870AArch64InstructionSelector::selectExtendedSHL(
6872 unsigned SizeInBytes,
bool WantsExt)
const {
6873 assert(
Base.isReg() &&
"Expected base to be a register operand");
6874 assert(
Offset.isReg() &&
"Expected offset to be a register operand");
6879 unsigned OffsetOpc = OffsetInst->
getOpcode();
6880 bool LookedThroughZExt =
false;
6881 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
6883 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
6884 return std::nullopt;
6888 LookedThroughZExt =
true;
6890 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
6891 return std::nullopt;
6894 int64_t LegalShiftVal =
Log2_32(SizeInBytes);
6895 if (LegalShiftVal == 0)
6896 return std::nullopt;
6897 if (!isWorthFoldingIntoExtendedReg(*OffsetInst,
MRI))
6898 return std::nullopt;
6909 if (OffsetOpc == TargetOpcode::G_SHL)
6910 return std::nullopt;
6916 return std::nullopt;
6921 int64_t ImmVal = ValAndVReg->Value.getSExtValue();
6925 if (OffsetOpc == TargetOpcode::G_MUL) {
6926 if (!llvm::has_single_bit<uint32_t>(ImmVal))
6927 return std::nullopt;
6933 if ((ImmVal & 0x7) != ImmVal)
6934 return std::nullopt;
6938 if (ImmVal != LegalShiftVal)
6939 return std::nullopt;
6941 unsigned SignExtend = 0;
6945 if (!LookedThroughZExt) {
6947 auto Ext = getExtendTypeForInst(*ExtInst,
MRI,
true);
6949 return std::nullopt;
6954 return std::nullopt;
6960 OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
6970 MIB.addImm(SignExtend);
6984AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
6987 return std::nullopt;
7004 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd,
MRI))
7005 return std::nullopt;
7011 return selectExtendedSHL(Root, PtrAdd->
getOperand(1),
7025AArch64InstructionSelector::selectAddrModeRegisterOffset(
7031 if (Gep->
getOpcode() != TargetOpcode::G_PTR_ADD)
7032 return std::nullopt;
7038 return std::nullopt;
7058AArch64InstructionSelector::selectAddrModeXRO(
MachineOperand &Root,
7059 unsigned SizeInBytes)
const {
7062 return std::nullopt;
7066 return std::nullopt;
7084 unsigned Scale =
Log2_32(SizeInBytes);
7085 int64_t ImmOff = ValAndVReg->Value.getSExtValue();
7089 if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
7090 ImmOff < (0x1000 << Scale))
7091 return std::nullopt;
7096 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
7100 if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
7106 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
7107 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
7112 return std::nullopt;
7116 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
7122 return selectAddrModeRegisterOffset(Root);
7132AArch64InstructionSelector::selectAddrModeWRO(
MachineOperand &Root,
7133 unsigned SizeInBytes)
const {
7138 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd,
MRI))
7139 return std::nullopt;
7160 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->
getOperand(0),
7169 if (!isWorthFoldingIntoExtendedReg(*OffsetInst,
MRI))
7170 return std::nullopt;
7174 getExtendTypeForInst(*OffsetInst,
MRI,
true);
7176 return std::nullopt;
7181 AArch64::GPR32RegClass, MIB);
7188 MIB.addImm(SignExtend);
7199AArch64InstructionSelector::selectAddrModeUnscaled(
MachineOperand &Root,
7200 unsigned Size)
const {
7205 return std::nullopt;
7207 if (!isBaseWithConstantOffset(Root,
MRI))
7208 return std::nullopt;
7213 if (!OffImm.
isReg())
7214 return std::nullopt;
7216 if (
RHS->getOpcode() != TargetOpcode::G_CONSTANT)
7217 return std::nullopt;
7221 return std::nullopt;
7224 if (RHSC >= -256 && RHSC < 256) {
7231 return std::nullopt;
7235AArch64InstructionSelector::tryFoldAddLowIntoImm(
MachineInstr &RootDef,
7238 if (RootDef.
getOpcode() != AArch64::G_ADD_LOW)
7239 return std::nullopt;
7242 return std::nullopt;
7247 return std::nullopt;
7251 return std::nullopt;
7255 return std::nullopt;
7257 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.
getTarget());
7262 MIB.addGlobalAddress(GV,
Offset,
7272AArch64InstructionSelector::selectAddrModeIndexed(
MachineOperand &Root,
7273 unsigned Size)
const {
7278 return std::nullopt;
7281 if (RootDef->
getOpcode() == TargetOpcode::G_FRAME_INDEX) {
7291 auto OpFns = tryFoldAddLowIntoImm(*RootDef,
Size,
MRI);
7296 if (isBaseWithConstantOffset(Root,
MRI)) {
7304 if ((RHSC & (
Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
7305 if (LHSDef->
getOpcode() == TargetOpcode::G_FRAME_INDEX)
7320 if (selectAddrModeUnscaled(Root,
Size))
7321 return std::nullopt;
7332 switch (
MI.getOpcode()) {
7335 case TargetOpcode::G_SHL:
7337 case TargetOpcode::G_LSHR:
7339 case TargetOpcode::G_ASHR:
7341 case TargetOpcode::G_ROTR:
7349AArch64InstructionSelector::selectShiftedRegister(
MachineOperand &Root,
7350 bool AllowROR)
const {
7352 return std::nullopt;
7361 return std::nullopt;
7363 return std::nullopt;
7364 if (!isWorthFoldingIntoExtendedReg(*ShiftInst,
MRI))
7365 return std::nullopt;
7371 return std::nullopt;
7378 unsigned NumBits =
MRI.getType(ShiftReg).getSizeInBits();
7379 unsigned Val = *Immed & (NumBits - 1);
7388 unsigned Opc =
MI.getOpcode();
7391 if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
7393 if (Opc == TargetOpcode::G_SEXT)
7394 Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
7396 Size =
MI.getOperand(2).getImm();
7397 assert(
Size != 64 &&
"Extend from 64 bits?");
7410 if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
7411 unsigned Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
7412 assert(
Size != 64 &&
"Extend from 64 bits?");
7427 if (Opc != TargetOpcode::G_AND)
7446Register AArch64InstructionSelector::moveScalarRegClass(
7449 auto Ty =
MRI.getType(Reg);
7458 return Copy.getReg(0);
7464AArch64InstructionSelector::selectArithExtendedRegister(
7467 return std::nullopt;
7476 return std::nullopt;
7478 if (!isWorthFoldingIntoExtendedReg(*RootDef,
MRI))
7479 return std::nullopt;
7482 if (RootDef->
getOpcode() == TargetOpcode::G_SHL) {
7487 return std::nullopt;
7488 ShiftVal = *MaybeShiftVal;
7490 return std::nullopt;
7495 return std::nullopt;
7496 Ext = getExtendTypeForInst(*ExtDef,
MRI);
7498 return std::nullopt;
7502 Ext = getExtendTypeForInst(*RootDef,
MRI);
7504 return std::nullopt;
7513 if (isDef32(*ExtInst))
7514 return std::nullopt;
7521 ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);
7525 MIB.addImm(getArithExtendImm(Ext, ShiftVal));
7530AArch64InstructionSelector::selectExtractHigh(
MachineOperand &Root)
const {
7532 return std::nullopt;
7537 while (Extract && Extract->MI->
getOpcode() == TargetOpcode::G_BITCAST &&
7538 STI.isLittleEndian())
7542 return std::nullopt;
7544 if (Extract->MI->
getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
7550 if (Extract->MI->
getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) {
7555 LaneIdx->Value.getSExtValue() == 1) {
7561 return std::nullopt;
7568 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7569 "Expected G_CONSTANT");
7570 std::optional<int64_t> CstVal =
7572 assert(CstVal &&
"Expected constant value");
7576void AArch64InstructionSelector::renderLogicalImm32(
7578 assert(
I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7579 "Expected G_CONSTANT");
7580 uint64_t CstVal =
I.getOperand(1).getCImm()->getZExtValue();
7585void AArch64InstructionSelector::renderLogicalImm64(
7587 assert(
I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7588 "Expected G_CONSTANT");
7589 uint64_t CstVal =
I.getOperand(1).getCImm()->getZExtValue();
7597 assert(
MI.getOpcode() == TargetOpcode::G_UBSANTRAP && OpIdx == 0 &&
7598 "Expected G_UBSANTRAP");
7599 MIB.
addImm(
MI.getOperand(0).getImm() | (
'U' << 8));
7605 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7606 "Expected G_FCONSTANT");
7614 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7615 "Expected G_FCONSTANT");
7623 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7624 "Expected G_FCONSTANT");
7629void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
7631 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7632 "Expected G_FCONSTANT");
7640bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
7642 if (!
MI.mayLoadOrStore())
7645 "Expected load/store to have only one mem op!");
7646 return (*
MI.memoperands_begin())->getSize() == NumBytes;
7649bool AArch64InstructionSelector::isDef32(
const MachineInstr &
MI)
const {
7651 if (
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits() != 32)
7658 switch (
MI.getOpcode()) {
7661 case TargetOpcode::COPY:
7662 case TargetOpcode::G_BITCAST:
7663 case TargetOpcode::G_TRUNC:
7664 case TargetOpcode::G_PHI:
7674 assert(
MI.getOpcode() == TargetOpcode::G_PHI &&
"Expected a G_PHI");
7677 assert(DstRB &&
"Expected PHI dst to have regbank assigned");
7688 auto *OpDef =
MRI.getVRegDef(OpReg);
7689 const LLT &Ty =
MRI.getType(OpReg);
7695 if (InsertPt != OpDefBB.
end() && InsertPt->isPHI())
7699 MRI.setRegBank(Copy.getReg(0), *DstRB);
7700 MO.setReg(Copy.getReg(0));
7709 for (
auto &BB : MF) {
7710 for (
auto &
MI : BB) {
7711 if (
MI.getOpcode() == TargetOpcode::G_PHI)
7716 for (
auto *
MI : Phis) {
7738 bool HasGPROp =
false, HasFPROp =
false;
7742 const LLT &Ty =
MRI.getType(MO.getReg());
7752 if (RB->
getID() == AArch64::GPRRegBankID)
7758 if (HasGPROp && HasFPROp)
7768 return new AArch64InstructionSelector(
TM, Subtarget, RBI);
unsigned const MachineRegisterInfo * MRI
static bool isPreferredADD(int64_t ImmOff)
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue CCOp, AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, const SDLoc &DL, SelectionDAG &DAG)
can be transformed to: not (and (not (and (setCC (cmp C)) (setCD (cmp D)))) (and (not (setCA (cmp A))...
static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, AArch64CC::CondCode Predicate)
Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain of CCMP/CFCMP ops.
static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static bool isCMN(SDValue Op, ISD::CondCode CC)
static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC)
Emit expression as a conjunction (a series of CCMP/CFCMP ops).
static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy)
#define GET_GLOBALISEL_PREDICATES_INIT
static std::pair< const TargetRegisterClass *, const TargetRegisterClass * > getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Helper function to get the source and destination register classes for a copy.
#define GET_GLOBALISEL_TEMPORARIES_INIT
static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Convert an IR fp condition code to an AArch64 CC.
static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, MachineRegisterInfo &MRI)
Return a register which can be used as a bit to test in a TB(N)Z.
static unsigned getMinSizeForRegBank(const RegisterBank &RB)
Returns the minimum size the given register bank can hold.
static std::optional< int64_t > getVectorShiftImm(Register Reg, MachineRegisterInfo &MRI)
Returns the element immediate value of a vector shift operand if found.
static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the G_LOAD or G_STORE operation GenericOpc, appropriate for the (value)...
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P)
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV),...
static bool getSubRegForClass(const TargetRegisterClass *RC, const TargetRegisterInfo &TRI, unsigned &SubReg)
Returns the correct subregister to use for a given register class.
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register SrcReg, const TargetRegisterClass *To, unsigned SubReg)
Helper function for selectCopy.
static const TargetRegisterClass * getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits, bool GetAllRegSet=false)
Given a register bank, and size in bits, return the smallest register class that can represent that c...
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, const AArch64RegisterBankInfo &RBI)
static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI)
static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type)
static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI)
Given a shift instruction, return the correct shift type for that instruction.
static bool unsupportedBinOp(const MachineInstr &I, const AArch64RegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const AArch64RegisterInfo &TRI)
Check whether I is a currently unsupported binary operation:
static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst, bool WillNegate, MachineRegisterInfo &MRI, unsigned Depth=0)
Returns true if Val is a tree of AND/OR/CMP operations that can be expressed as a conjunction.
static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, const unsigned EltSize)
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static std::optional< uint64_t > getImmedFromMO(const MachineOperand &Root)
static std::pair< unsigned, unsigned > getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize)
Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given size and RB.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
static std::optional< int64_t > getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI)
Matches and returns the shift immediate value for a SHL instruction given a shift operand.
static void changeFPCCToORAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
This file declares the targeting of the RegisterBankInfo class for AArch64.
static bool isStore(int Opcode)
static bool selectMergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool selectUnmergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file contains constants used for implementing Dwarf debug support.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
const char LLVMTargetMachineRef TM
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static constexpr int Concat[]
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
int getVarArgsStackIndex() const
int getVarArgsGPRIndex() const
unsigned getVarArgsGPRSize() const
This class provides the information for the target register banks.
bool isCallingConvWin64(CallingConv::ID CC) const
APInt bitcastToAPInt() const
Class for arbitrary precision integers.
APInt zext(unsigned width) const
Zero extend to a new width.
uint64_t getZExtValue() const
Get zero extended value.
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
bool isIntPredicate() const
static Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
ConstantFP - Floating Point Values [float, double].
const APFloat & getValueAPF() const
bool isNegative() const
Return true if the sign bit is set.
bool isZero() const
Return true if the value is positive or negative zero.
This is the shared class of boolean and integer constants.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Constant * getSplatValue(bool AllowPoison=false) const
If all elements of the vector constant have the same value, return that value.
const APInt & getUniqueInteger() const
If C is a constant integer then return its value, otherwise C must be a vector of constant integers,...
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
This class represents an Operation in the Expression.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
Represents indexed stores.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
Register getCondReg() const
Register getFalseReg() const
Register getTrueReg() const
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
bool isEquality() const
Return true if this predicate is either EQ or NE.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
constexpr LLT multiplyElements(int Factor) const
Produce a vector type that is Factor times bigger, preserving the element type.
constexpr bool isPointerVector() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
This is an important class for using LLVM in a threaded context.
TypeSize getValue() const
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setAdjustsStack(bool V)
void setFrameAddressIsTaken(bool T)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineFunction & getMF()
Getter for the function we currently build.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineIRBuilderState & getState()
Getter for the State.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
void setState(const MachineIRBuilderState &NewState)
Setter for the State.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addBlockAddress(const BlockAddress *BA, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
@ MOLoad
The memory access reads data.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
const ConstantInt * getCImm() const
bool isCImm() const
isCImm - Test if this is a MO_CImmediate operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
static MachineOperand CreateBA(const BlockAddress *BA, int64_t Offset, unsigned TargetFlags=0)
const ConstantFP * getFPImm() const
unsigned getPredicate() const
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
T get() const
Returns the value of the specified pointer type.
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwise returns null.
Analysis providing profile information.
Holds all the information related to register banks.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
This class represents the LLVM 'select' instruction.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetLowering * getTargetLowering() const
The instances of the Type class are immutable: once they are created, they are never changed.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static IntegerType * getInt8Ty(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
void changeFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Find the AArch64 condition codes necessary to represent P for a scalar floating point comparison.
std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint8_t encodeAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType9(uint64_t Imm)
static bool isAdvSIMDModImmType4(uint64_t Imm)
static bool isAdvSIMDModImmType5(uint64_t Imm)
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType10(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType9(uint64_t Imm)
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static bool isAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType5(uint64_t Imm)
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static bool isAdvSIMDModImmType10(uint64_t Imm)
static int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType8(uint64_t Imm)
static bool isAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType11(uint64_t Imm)
static bool isAdvSIMDModImmType11(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType6(uint64_t Imm)
static bool isAdvSIMDModImmType8(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType4(uint64_t Imm)
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static bool isAdvSIMDModImmType6(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType1(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType1(uint64_t Imm)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
bool isPreISelGenericOpcode(unsigned Opcode)
Check whether the given Opcode is a generic opcode that is not supposed to appear after ISel.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
bool shouldOptForSize(const MachineBasicBlock &MBB, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
Returns true if the given block should be optimized for size.
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
AtomicOrdering
Atomic ordering for LLVM's memory model.
InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &, AArch64Subtarget &, AArch64RegisterBankInfo &)
DWARFExpression::Operation Op
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Class which stores all the state required in a MachineIRBuilder.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.