44#include "llvm/IR/IntrinsicsAArch64.h"
52#define DEBUG_TYPE "aarch64-isel"
55using namespace MIPatternMatch;
56using namespace AArch64GISelUtils;
65#define GET_GLOBALISEL_PREDICATE_BITSET
66#include "AArch64GenGlobalISel.inc"
67#undef GET_GLOBALISEL_PREDICATE_BITSET
82 InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
87 ProduceNonFlagSettingCondBr =
135 bool tryOptAndIntoCompareBranch(
MachineInstr &AndInst,
bool Invert,
213 bool selectVectorLoadIntrinsic(
unsigned Opc,
unsigned NumVecs,
215 bool selectVectorLoadLaneIntrinsic(
unsigned Opc,
unsigned NumVecs,
217 void selectVectorStoreIntrinsic(
MachineInstr &
I,
unsigned NumVecs,
219 bool selectVectorStoreLaneIntrinsic(
MachineInstr &
I,
unsigned NumVecs,
233 unsigned Opc1,
unsigned Opc2,
bool isExt);
239 unsigned emitConstantPoolEntry(
const Constant *CPVal,
258 std::optional<CmpInst::Predicate> = std::nullopt)
const;
261 emitInstr(
unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
262 std::initializer_list<llvm::SrcOp> SrcOps,
264 const ComplexRendererFns &RenderFns = std::nullopt)
const;
299 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
320 MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,
342 std::pair<MachineInstr *, AArch64CC::CondCode>
377 ComplexRendererFns selectShiftA_32(
const MachineOperand &Root)
const;
378 ComplexRendererFns selectShiftB_32(
const MachineOperand &Root)
const;
379 ComplexRendererFns selectShiftA_64(
const MachineOperand &Root)
const;
380 ComplexRendererFns selectShiftB_64(
const MachineOperand &Root)
const;
382 ComplexRendererFns select12BitValueWithLeftShift(
uint64_t Immed)
const;
384 ComplexRendererFns selectNegArithImmed(
MachineOperand &Root)
const;
387 unsigned Size)
const;
389 ComplexRendererFns selectAddrModeUnscaled8(
MachineOperand &Root)
const {
390 return selectAddrModeUnscaled(Root, 1);
392 ComplexRendererFns selectAddrModeUnscaled16(
MachineOperand &Root)
const {
393 return selectAddrModeUnscaled(Root, 2);
395 ComplexRendererFns selectAddrModeUnscaled32(
MachineOperand &Root)
const {
396 return selectAddrModeUnscaled(Root, 4);
398 ComplexRendererFns selectAddrModeUnscaled64(
MachineOperand &Root)
const {
399 return selectAddrModeUnscaled(Root, 8);
401 ComplexRendererFns selectAddrModeUnscaled128(
MachineOperand &Root)
const {
402 return selectAddrModeUnscaled(Root, 16);
407 ComplexRendererFns tryFoldAddLowIntoImm(
MachineInstr &RootDef,
unsigned Size,
411 unsigned Size)
const;
413 ComplexRendererFns selectAddrModeIndexed(
MachineOperand &Root)
const {
414 return selectAddrModeIndexed(Root, Width / 8);
423 bool IsAddrOperand)
const;
426 unsigned SizeInBytes)
const;
434 bool WantsExt)
const;
435 ComplexRendererFns selectAddrModeRegisterOffset(
MachineOperand &Root)
const;
437 unsigned SizeInBytes)
const;
439 ComplexRendererFns selectAddrModeXRO(
MachineOperand &Root)
const {
440 return selectAddrModeXRO(Root, Width / 8);
444 unsigned SizeInBytes)
const;
446 ComplexRendererFns selectAddrModeWRO(
MachineOperand &Root)
const {
447 return selectAddrModeWRO(Root, Width / 8);
451 bool AllowROR =
false)
const;
453 ComplexRendererFns selectArithShiftedRegister(
MachineOperand &Root)
const {
454 return selectShiftedRegister(Root);
457 ComplexRendererFns selectLogicalShiftedRegister(
MachineOperand &Root)
const {
458 return selectShiftedRegister(Root,
true);
468 bool IsLoadStore =
false)
const;
479 ComplexRendererFns selectArithExtendedRegister(
MachineOperand &Root)
const;
484 int OpIdx = -1)
const;
486 int OpIdx = -1)
const;
488 int OpIdx = -1)
const;
492 int OpIdx = -1)
const;
494 int OpIdx = -1)
const;
496 int OpIdx = -1)
const;
499 int OpIdx = -1)
const;
505 bool tryOptSelect(
GSelect &Sel);
512 bool isLoadStoreOfNumBytes(
const MachineInstr &
MI,
unsigned NumBytes)
const;
525 bool ProduceNonFlagSettingCondBr =
false;
534#define GET_GLOBALISEL_PREDICATES_DECL
535#include "AArch64GenGlobalISel.inc"
536#undef GET_GLOBALISEL_PREDICATES_DECL
540#define GET_GLOBALISEL_TEMPORARIES_DECL
541#include "AArch64GenGlobalISel.inc"
542#undef GET_GLOBALISEL_TEMPORARIES_DECL
547#define GET_GLOBALISEL_IMPL
548#include "AArch64GenGlobalISel.inc"
549#undef GET_GLOBALISEL_IMPL
551AArch64InstructionSelector::AArch64InstructionSelector(
554 :
TM(
TM), STI(STI),
TII(*STI.getInstrInfo()),
TRI(*STI.getRegisterInfo()),
557#include
"AArch64GenGlobalISel.inc"
560#include
"AArch64GenGlobalISel.inc"
572 bool GetAllRegSet =
false) {
573 if (RB.
getID() == AArch64::GPRRegBankID) {
575 return GetAllRegSet ? &AArch64::GPR32allRegClass
576 : &AArch64::GPR32RegClass;
578 return GetAllRegSet ? &AArch64::GPR64allRegClass
579 : &AArch64::GPR64RegClass;
581 return &AArch64::XSeqPairsClassRegClass;
585 if (RB.
getID() == AArch64::FPRRegBankID) {
588 return &AArch64::FPR8RegClass;
590 return &AArch64::FPR16RegClass;
592 return &AArch64::FPR32RegClass;
594 return &AArch64::FPR64RegClass;
596 return &AArch64::FPR128RegClass;
608 bool GetAllRegSet =
false) {
611 "Expected FPR regbank for scalable type size");
612 return &AArch64::ZPRRegClass;
615 unsigned RegBankID = RB.
getID();
617 if (RegBankID == AArch64::GPRRegBankID) {
618 if (SizeInBits <= 32)
619 return GetAllRegSet ? &AArch64::GPR32allRegClass
620 : &AArch64::GPR32RegClass;
621 if (SizeInBits == 64)
622 return GetAllRegSet ? &AArch64::GPR64allRegClass
623 : &AArch64::GPR64RegClass;
624 if (SizeInBits == 128)
625 return &AArch64::XSeqPairsClassRegClass;
628 if (RegBankID == AArch64::FPRRegBankID) {
629 switch (SizeInBits) {
633 return &AArch64::FPR8RegClass;
635 return &AArch64::FPR16RegClass;
637 return &AArch64::FPR32RegClass;
639 return &AArch64::FPR64RegClass;
641 return &AArch64::FPR128RegClass;
651 switch (
TRI.getRegSizeInBits(*RC)) {
659 if (RC != &AArch64::FPR32RegClass)
669 dbgs() <<
"Couldn't find appropriate subregister for register class.");
678 switch (RB.
getID()) {
679 case AArch64::GPRRegBankID:
681 case AArch64::FPRRegBankID:
704 const unsigned RegClassIDs[],
706 unsigned NumRegs = Regs.
size();
709 assert(NumRegs >= 2 && NumRegs <= 4 &&
710 "Only support between two and 4 registers in a tuple!");
712 auto *DesiredClass =
TRI->getRegClass(RegClassIDs[NumRegs - 2]);
714 MIB.
buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
715 for (
unsigned I = 0, E = Regs.
size();
I < E; ++
I) {
716 RegSequence.addUse(Regs[
I]);
717 RegSequence.addImm(SubRegs[
I]);
719 return RegSequence.getReg(0);
724 static const unsigned RegClassIDs[] = {
725 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
726 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
727 AArch64::dsub2, AArch64::dsub3};
728 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
733 static const unsigned RegClassIDs[] = {
734 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
735 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
736 AArch64::qsub2, AArch64::qsub3};
737 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
742 auto &
MBB = *
MI.getParent();
744 auto &
MRI = MF.getRegInfo();
750 else if (Root.
isReg()) {
755 Immed = ValAndVReg->Value.getSExtValue();
771 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
778 for (
auto &MO :
I.operands()) {
781 LLVM_DEBUG(
dbgs() <<
"Generic inst non-reg operands are unsupported\n");
789 if (!MO.getReg().isVirtual()) {
790 LLVM_DEBUG(
dbgs() <<
"Generic inst has physical register operand\n");
800 if (PrevOpBank && OpBank != PrevOpBank) {
801 LLVM_DEBUG(
dbgs() <<
"Generic inst operands have different banks\n");
816 case AArch64::GPRRegBankID:
818 switch (GenericOpc) {
819 case TargetOpcode::G_SHL:
820 return AArch64::LSLVWr;
821 case TargetOpcode::G_LSHR:
822 return AArch64::LSRVWr;
823 case TargetOpcode::G_ASHR:
824 return AArch64::ASRVWr;
828 }
else if (OpSize == 64) {
829 switch (GenericOpc) {
830 case TargetOpcode::G_PTR_ADD:
831 return AArch64::ADDXrr;
832 case TargetOpcode::G_SHL:
833 return AArch64::LSLVXr;
834 case TargetOpcode::G_LSHR:
835 return AArch64::LSRVXr;
836 case TargetOpcode::G_ASHR:
837 return AArch64::ASRVXr;
843 case AArch64::FPRRegBankID:
846 switch (GenericOpc) {
847 case TargetOpcode::G_FADD:
848 return AArch64::FADDSrr;
849 case TargetOpcode::G_FSUB:
850 return AArch64::FSUBSrr;
851 case TargetOpcode::G_FMUL:
852 return AArch64::FMULSrr;
853 case TargetOpcode::G_FDIV:
854 return AArch64::FDIVSrr;
859 switch (GenericOpc) {
860 case TargetOpcode::G_FADD:
861 return AArch64::FADDDrr;
862 case TargetOpcode::G_FSUB:
863 return AArch64::FSUBDrr;
864 case TargetOpcode::G_FMUL:
865 return AArch64::FMULDrr;
866 case TargetOpcode::G_FDIV:
867 return AArch64::FDIVDrr;
868 case TargetOpcode::G_OR:
869 return AArch64::ORRv8i8;
886 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
888 case AArch64::GPRRegBankID:
891 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
893 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
895 return isStore ? AArch64::STRWui : AArch64::LDRWui;
897 return isStore ? AArch64::STRXui : AArch64::LDRXui;
900 case AArch64::FPRRegBankID:
903 return isStore ? AArch64::STRBui : AArch64::LDRBui;
905 return isStore ? AArch64::STRHui : AArch64::LDRHui;
907 return isStore ? AArch64::STRSui : AArch64::LDRSui;
909 return isStore ? AArch64::STRDui : AArch64::LDRDui;
911 return isStore ? AArch64::STRQui : AArch64::LDRQui;
925 assert(SrcReg.
isValid() &&
"Expected a valid source register?");
926 assert(To &&
"Destination register class cannot be null");
933 RegOp.
setReg(SubRegCopy.getReg(0));
937 if (!
I.getOperand(0).getReg().isPhysical())
947static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
951 Register DstReg =
I.getOperand(0).getReg();
952 Register SrcReg =
I.getOperand(1).getReg();
967 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
985 if (Reg.isPhysical())
987 LLT Ty =
MRI.getType(Reg);
993 RC = getRegClassForTypeOnBank(Ty, RB);
996 dbgs() <<
"Warning: DBG_VALUE operand has unexpected size/bank\n");
1009 Register DstReg =
I.getOperand(0).getReg();
1010 Register SrcReg =
I.getOperand(1).getReg();
1029 LLVM_DEBUG(
dbgs() <<
"Couldn't determine source register class\n");
1033 const TypeSize SrcSize =
TRI.getRegSizeInBits(*SrcRC);
1034 const TypeSize DstSize =
TRI.getRegSizeInBits(*DstRC);
1045 auto Copy = MIB.
buildCopy({DstTempRC}, {SrcReg});
1047 }
else if (SrcSize > DstSize) {
1054 }
else if (DstSize > SrcSize) {
1061 Register PromoteReg =
MRI.createVirtualRegister(PromotionRC);
1063 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1068 RegOp.
setReg(PromoteReg);
1087 if (
I.getOpcode() == TargetOpcode::G_ZEXT) {
1088 I.setDesc(
TII.get(AArch64::COPY));
1089 assert(SrcRegBank.
getID() == AArch64::GPRRegBankID);
1093 I.setDesc(
TII.get(AArch64::COPY));
1108 switch (GenericOpc) {
1109 case TargetOpcode::G_SITOFP:
1110 return AArch64::SCVTFUWSri;
1111 case TargetOpcode::G_UITOFP:
1112 return AArch64::UCVTFUWSri;
1113 case TargetOpcode::G_FPTOSI:
1114 return AArch64::FCVTZSUWSr;
1115 case TargetOpcode::G_FPTOUI:
1116 return AArch64::FCVTZUUWSr;
1121 switch (GenericOpc) {
1122 case TargetOpcode::G_SITOFP:
1123 return AArch64::SCVTFUXSri;
1124 case TargetOpcode::G_UITOFP:
1125 return AArch64::UCVTFUXSri;
1126 case TargetOpcode::G_FPTOSI:
1127 return AArch64::FCVTZSUWDr;
1128 case TargetOpcode::G_FPTOUI:
1129 return AArch64::FCVTZUUWDr;
1139 switch (GenericOpc) {
1140 case TargetOpcode::G_SITOFP:
1141 return AArch64::SCVTFUWDri;
1142 case TargetOpcode::G_UITOFP:
1143 return AArch64::UCVTFUWDri;
1144 case TargetOpcode::G_FPTOSI:
1145 return AArch64::FCVTZSUXSr;
1146 case TargetOpcode::G_FPTOUI:
1147 return AArch64::FCVTZUUXSr;
1152 switch (GenericOpc) {
1153 case TargetOpcode::G_SITOFP:
1154 return AArch64::SCVTFUXDri;
1155 case TargetOpcode::G_UITOFP:
1156 return AArch64::UCVTFUXDri;
1157 case TargetOpcode::G_FPTOSI:
1158 return AArch64::FCVTZSUXDr;
1159 case TargetOpcode::G_FPTOUI:
1160 return AArch64::FCVTZUUXDr;
1179 RBI.getRegBank(True,
MRI,
TRI)->getID() &&
1180 "Expected both select operands to have the same regbank?");
1181 LLT Ty =
MRI.getType(True);
1186 "Expected 32 bit or 64 bit select only?");
1187 const bool Is32Bit =
Size == 32;
1188 if (RBI.getRegBank(True,
MRI,
TRI)->getID() != AArch64::GPRRegBankID) {
1189 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1190 auto FCSel = MIB.
buildInstr(Opc, {Dst}, {True, False}).addImm(
CC);
1196 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1198 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &
CC, &
MRI,
1213 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1230 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1249 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1265 auto TryOptSelectCst = [&Opc, &True, &False, &
CC, Is32Bit, &
MRI,
1271 if (!TrueCst && !FalseCst)
1274 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1275 if (TrueCst && FalseCst) {
1276 int64_t
T = TrueCst->Value.getSExtValue();
1277 int64_t
F = FalseCst->Value.getSExtValue();
1279 if (
T == 0 &&
F == 1) {
1281 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1287 if (
T == 0 &&
F == -1) {
1289 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1297 int64_t
T = TrueCst->Value.getSExtValue();
1300 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1309 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1318 int64_t
F = FalseCst->Value.getSExtValue();
1321 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1328 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1336 Optimized |= TryFoldBinOpIntoSelect(False, True,
false);
1337 Optimized |= TryFoldBinOpIntoSelect(True, False,
true);
1458 assert(Reg.isValid() &&
"Expected valid register!");
1459 bool HasZext =
false;
1461 unsigned Opc =
MI->getOpcode();
1463 if (!
MI->getOperand(0).isReg() ||
1464 !
MRI.hasOneNonDBGUse(
MI->getOperand(0).getReg()))
1471 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1472 Opc == TargetOpcode::G_TRUNC) {
1473 if (Opc == TargetOpcode::G_ZEXT)
1476 Register NextReg =
MI->getOperand(1).getReg();
1478 if (!NextReg.
isValid() || !
MRI.hasOneNonDBGUse(NextReg))
1487 std::optional<uint64_t>
C;
1492 case TargetOpcode::G_AND:
1493 case TargetOpcode::G_XOR: {
1494 TestReg =
MI->getOperand(1).getReg();
1495 Register ConstantReg =
MI->getOperand(2).getReg();
1506 C = VRegAndVal->Value.getZExtValue();
1508 C = VRegAndVal->Value.getSExtValue();
1512 case TargetOpcode::G_ASHR:
1513 case TargetOpcode::G_LSHR:
1514 case TargetOpcode::G_SHL: {
1515 TestReg =
MI->getOperand(1).getReg();
1519 C = VRegAndVal->Value.getSExtValue();
1531 unsigned TestRegSize =
MRI.getType(TestReg).getSizeInBits();
1535 case TargetOpcode::G_AND:
1537 if ((*
C >> Bit) & 1)
1540 case TargetOpcode::G_SHL:
1543 if (*
C <= Bit && (Bit - *
C) < TestRegSize) {
1548 case TargetOpcode::G_ASHR:
1553 if (Bit >= TestRegSize)
1554 Bit = TestRegSize - 1;
1556 case TargetOpcode::G_LSHR:
1558 if ((Bit + *
C) < TestRegSize) {
1563 case TargetOpcode::G_XOR:
1572 if ((*
C >> Bit) & 1)
1591 assert(ProduceNonFlagSettingCondBr &&
1592 "Cannot emit TB(N)Z with speculation tracking!");
1597 LLT Ty =
MRI.getType(TestReg);
1600 assert(Bit < 64 &&
"Bit is too large!");
1604 bool UseWReg =
Bit < 32;
1605 unsigned NecessarySize = UseWReg ? 32 : 64;
1606 if (
Size != NecessarySize)
1607 TestReg = moveScalarRegClass(
1608 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1611 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1612 {AArch64::TBZW, AArch64::TBNZW}};
1613 unsigned Opc = OpcTable[UseWReg][IsNegative];
1620bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1623 assert(AndInst.
getOpcode() == TargetOpcode::G_AND &&
"Expected G_AND only?");
1650 int32_t
Bit = MaybeBit->Value.exactLogBase2();
1657 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1665 assert(ProduceNonFlagSettingCondBr &&
"CBZ does not set flags!");
1667 assert(RBI.getRegBank(CompareReg,
MRI,
TRI)->getID() ==
1668 AArch64::GPRRegBankID &&
1669 "Expected GPRs only?");
1670 auto Ty =
MRI.getType(CompareReg);
1673 assert(Width <= 64 &&
"Expected width to be at most 64?");
1674 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1675 {AArch64::CBNZW, AArch64::CBNZX}};
1676 unsigned Opc = OpcTable[IsNegative][Width == 64];
1677 auto BranchMI = MIB.
buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1682bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1685 assert(
I.getOpcode() == TargetOpcode::G_BRCOND);
1697 I.eraseFromParent();
1701bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1704 assert(
I.getOpcode() == TargetOpcode::G_BRCOND);
1710 if (!ProduceNonFlagSettingCondBr)
1729 if (VRegAndVal && !AndInst) {
1730 int64_t
C = VRegAndVal->Value.getSExtValue();
1736 emitTestBit(LHS, Bit,
false, DestMBB, MIB);
1737 I.eraseFromParent();
1745 emitTestBit(LHS, Bit,
true, DestMBB, MIB);
1746 I.eraseFromParent();
1754 emitTestBit(LHS, Bit,
false, DestMBB, MIB);
1755 I.eraseFromParent();
1769 if (VRegAndVal && VRegAndVal->Value == 0) {
1777 tryOptAndIntoCompareBranch(
1779 I.eraseFromParent();
1784 auto LHSTy =
MRI.getType(LHS);
1785 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1787 I.eraseFromParent();
1796bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1799 assert(
I.getOpcode() == TargetOpcode::G_BRCOND);
1800 if (tryOptCompareBranchFedByICmp(
I, ICmp, MIB))
1810 I.eraseFromParent();
1814bool AArch64InstructionSelector::selectCompareBranch(
1816 Register CondReg =
I.getOperand(0).getReg();
1821 if (CCMIOpc == TargetOpcode::G_FCMP)
1822 return selectCompareBranchFedByFCmp(
I, *CCMI, MIB);
1823 if (CCMIOpc == TargetOpcode::G_ICMP)
1824 return selectCompareBranchFedByICmp(
I, *CCMI, MIB);
1829 if (ProduceNonFlagSettingCondBr) {
1830 emitTestBit(CondReg, 0,
true,
1831 I.getOperand(1).getMBB(), MIB);
1832 I.eraseFromParent();
1842 .
addMBB(
I.getOperand(1).getMBB());
1843 I.eraseFromParent();
1851 assert(
MRI.getType(Reg).isVector() &&
"Expected a *vector* shift operand");
1862 return std::nullopt;
1864 int64_t Imm = *ShiftImm;
1866 return std::nullopt;
1870 return std::nullopt;
1873 return std::nullopt;
1877 return std::nullopt;
1881 return std::nullopt;
1885 return std::nullopt;
1891bool AArch64InstructionSelector::selectVectorSHL(
MachineInstr &
I,
1893 assert(
I.getOpcode() == TargetOpcode::G_SHL);
1894 Register DstReg =
I.getOperand(0).getReg();
1895 const LLT Ty =
MRI.getType(DstReg);
1896 Register Src1Reg =
I.getOperand(1).getReg();
1897 Register Src2Reg =
I.getOperand(2).getReg();
1908 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1910 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1912 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1914 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1916 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1918 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1920 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1926 auto Shl = MIB.
buildInstr(Opc, {DstReg}, {Src1Reg});
1932 I.eraseFromParent();
1936bool AArch64InstructionSelector::selectVectorAshrLshr(
1938 assert(
I.getOpcode() == TargetOpcode::G_ASHR ||
1939 I.getOpcode() == TargetOpcode::G_LSHR);
1940 Register DstReg =
I.getOperand(0).getReg();
1941 const LLT Ty =
MRI.getType(DstReg);
1942 Register Src1Reg =
I.getOperand(1).getReg();
1943 Register Src2Reg =
I.getOperand(2).getReg();
1948 bool IsASHR =
I.getOpcode() == TargetOpcode::G_ASHR;
1958 unsigned NegOpc = 0;
1960 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
1962 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1963 NegOpc = AArch64::NEGv2i64;
1965 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1966 NegOpc = AArch64::NEGv4i32;
1968 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1969 NegOpc = AArch64::NEGv2i32;
1971 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1972 NegOpc = AArch64::NEGv4i16;
1974 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1975 NegOpc = AArch64::NEGv8i16;
1977 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1978 NegOpc = AArch64::NEGv16i8;
1980 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1981 NegOpc = AArch64::NEGv8i8;
1987 auto Neg = MIB.
buildInstr(NegOpc, {RC}, {Src2Reg});
1989 auto SShl = MIB.
buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1991 I.eraseFromParent();
1995bool AArch64InstructionSelector::selectVaStartAAPCS(
2000bool AArch64InstructionSelector::selectVaStartDarwin(
2003 Register ListReg =
I.getOperand(0).getReg();
2005 Register ArgsAddrReg =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2016 BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::ADDXri))
2024 MIB =
BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::STRXui))
2031 I.eraseFromParent();
2035void AArch64InstructionSelector::materializeLargeCMVal(
2041 auto MovZ = MIB.
buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
2052 :
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2054 if (
auto *GV = dyn_cast<GlobalValue>(V)) {
2056 GV, MovZ->getOperand(1).getOffset(), Flags));
2060 MovZ->getOperand(1).getOffset(), Flags));
2066 Register DstReg = BuildMovK(MovZ.getReg(0),
2072bool AArch64InstructionSelector::preISelLower(
MachineInstr &
I) {
2077 switch (
I.getOpcode()) {
2078 case TargetOpcode::G_STORE: {
2079 bool Changed = contractCrossBankCopyIntoStore(
I,
MRI);
2087 SrcOp.setReg(NewSrc);
2088 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass,
MRI);
2093 case TargetOpcode::G_PTR_ADD:
2094 return convertPtrAddToAdd(
I,
MRI);
2095 case TargetOpcode::G_LOAD: {
2100 Register DstReg =
I.getOperand(0).getReg();
2101 const LLT DstTy =
MRI.getType(DstReg);
2107 case AArch64::G_DUP: {
2109 LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2113 MRI.setType(
I.getOperand(0).getReg(),
2115 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2116 I.getOperand(1).setReg(NewSrc.getReg(0));
2119 case TargetOpcode::G_UITOFP:
2120 case TargetOpcode::G_SITOFP: {
2125 Register SrcReg =
I.getOperand(1).getReg();
2126 LLT SrcTy =
MRI.getType(SrcReg);
2127 LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2131 if (RBI.getRegBank(SrcReg,
MRI,
TRI)->getID() == AArch64::FPRRegBankID) {
2132 if (
I.getOpcode() == TargetOpcode::G_SITOFP)
2133 I.setDesc(
TII.get(AArch64::G_SITOF));
2135 I.setDesc(
TII.get(AArch64::G_UITOF));
2153bool AArch64InstructionSelector::convertPtrAddToAdd(
2155 assert(
I.getOpcode() == TargetOpcode::G_PTR_ADD &&
"Expected G_PTR_ADD");
2156 Register DstReg =
I.getOperand(0).getReg();
2157 Register AddOp1Reg =
I.getOperand(1).getReg();
2158 const LLT PtrTy =
MRI.getType(DstReg);
2162 const LLT CastPtrTy =
2167 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2169 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2173 I.setDesc(
TII.get(TargetOpcode::G_ADD));
2174 MRI.setType(DstReg, CastPtrTy);
2175 I.getOperand(1).setReg(PtrToInt.getReg(0));
2176 if (!select(*PtrToInt)) {
2177 LLVM_DEBUG(
dbgs() <<
"Failed to select G_PTRTOINT in convertPtrAddToAdd");
2186 I.getOperand(2).setReg(NegatedReg);
2187 I.setDesc(
TII.get(TargetOpcode::G_SUB));
2191bool AArch64InstructionSelector::earlySelectSHL(
MachineInstr &
I,
2196 assert(
I.getOpcode() == TargetOpcode::G_SHL &&
"unexpected op");
2197 const auto &MO =
I.getOperand(2);
2202 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2206 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2207 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2209 if (!Imm1Fn || !Imm2Fn)
2213 MIB.
buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2216 for (
auto &RenderFn : *Imm1Fn)
2218 for (
auto &RenderFn : *Imm2Fn)
2221 I.eraseFromParent();
2225bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2227 assert(
I.getOpcode() == TargetOpcode::G_STORE &&
"Expected G_STORE");
2245 LLT DefDstTy =
MRI.getType(DefDstReg);
2246 Register StoreSrcReg =
I.getOperand(0).getReg();
2247 LLT StoreSrcTy =
MRI.getType(StoreSrcReg);
2258 if (RBI.getRegBank(StoreSrcReg,
MRI,
TRI) ==
2259 RBI.getRegBank(DefDstReg,
MRI,
TRI))
2263 I.getOperand(0).setReg(DefDstReg);
2267bool AArch64InstructionSelector::earlySelect(
MachineInstr &
I) {
2268 assert(
I.getParent() &&
"Instruction should be in a basic block!");
2269 assert(
I.getParent()->getParent() &&
"Instruction should be in a function!");
2275 switch (
I.getOpcode()) {
2276 case AArch64::G_DUP: {
2279 Register Src =
I.getOperand(1).getReg();
2284 Register Dst =
I.getOperand(0).getReg();
2286 MRI.getType(Dst).getNumElements(),
2288 ValAndVReg->Value));
2289 if (!emitConstantVector(Dst, CV, MIB,
MRI))
2291 I.eraseFromParent();
2294 case TargetOpcode::G_SEXT:
2297 if (selectUSMovFromExtend(
I,
MRI))
2300 case TargetOpcode::G_BR:
2302 case TargetOpcode::G_SHL:
2303 return earlySelectSHL(
I,
MRI);
2304 case TargetOpcode::G_CONSTANT: {
2305 bool IsZero =
false;
2306 if (
I.getOperand(1).isCImm())
2307 IsZero =
I.getOperand(1).getCImm()->isZero();
2308 else if (
I.getOperand(1).isImm())
2309 IsZero =
I.getOperand(1).getImm() == 0;
2314 Register DefReg =
I.getOperand(0).getReg();
2315 LLT Ty =
MRI.getType(DefReg);
2317 I.getOperand(1).ChangeToRegister(AArch64::XZR,
false);
2318 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
MRI);
2320 I.getOperand(1).ChangeToRegister(AArch64::WZR,
false);
2321 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass,
MRI);
2325 I.setDesc(
TII.get(TargetOpcode::COPY));
2329 case TargetOpcode::G_ADD: {
2338 Register AddDst =
I.getOperand(0).getReg();
2339 Register AddLHS =
I.getOperand(1).getReg();
2340 Register AddRHS =
I.getOperand(2).getReg();
2342 LLT Ty =
MRI.getType(AddLHS);
2351 if (!
MRI.hasOneNonDBGUse(Reg))
2365 MRI.getType(
Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2375 Cmp = MatchCmp(AddRHS);
2379 auto &PredOp =
Cmp->getOperand(1);
2384 emitIntegerCompare(
Cmp->getOperand(2),
2385 Cmp->getOperand(3), PredOp, MIB);
2386 emitCSINC(AddDst, AddLHS, AddLHS, InvCC, MIB);
2387 I.eraseFromParent();
2390 case TargetOpcode::G_OR: {
2394 Register Dst =
I.getOperand(0).getReg();
2395 LLT Ty =
MRI.getType(Dst);
2414 if (ShiftImm >
Size || ((1ULL << ShiftImm) - 1ULL) !=
uint64_t(MaskImm))
2417 int64_t Immr =
Size - ShiftImm;
2418 int64_t Imms =
Size - ShiftImm - 1;
2419 unsigned Opc =
Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2420 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2421 I.eraseFromParent();
2424 case TargetOpcode::G_FENCE: {
2425 if (
I.getOperand(1).getImm() == 0)
2429 .
addImm(
I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
2430 I.eraseFromParent();
2439 assert(
I.getParent() &&
"Instruction should be in a basic block!");
2440 assert(
I.getParent()->getParent() &&
"Instruction should be in a function!");
2447 if (Subtarget->requiresStrictAlign()) {
2449 LLVM_DEBUG(
dbgs() <<
"AArch64 GISel does not support strict-align yet\n");
2455 unsigned Opcode =
I.getOpcode();
2457 if (!
I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2460 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2463 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2464 const Register DefReg =
I.getOperand(0).getReg();
2465 const LLT DefTy =
MRI.getType(DefReg);
2468 MRI.getRegClassOrRegBank(DefReg);
2478 DefRC = getRegClassForTypeOnBank(DefTy, RB);
2485 I.setDesc(
TII.get(TargetOpcode::PHI));
2487 return RBI.constrainGenericRegister(DefReg, *DefRC,
MRI);
2493 if (
I.isDebugInstr())
2500 if (
I.getNumOperands() !=
I.getNumExplicitOperands()) {
2502 dbgs() <<
"Generic instruction has unexpected implicit operands\n");
2509 if (preISelLower(
I)) {
2510 Opcode =
I.getOpcode();
2521 if (selectImpl(
I, *CoverageInfo))
2525 I.getOperand(0).isReg() ?
MRI.getType(
I.getOperand(0).getReg()) :
LLT{};
2528 case TargetOpcode::G_SBFX:
2529 case TargetOpcode::G_UBFX: {
2530 static const unsigned OpcTable[2][2] = {
2531 {AArch64::UBFMWri, AArch64::UBFMXri},
2532 {AArch64::SBFMWri, AArch64::SBFMXri}};
2533 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2535 unsigned Opc = OpcTable[IsSigned][
Size == 64];
2538 assert(Cst1 &&
"Should have gotten a constant for src 1?");
2541 assert(Cst2 &&
"Should have gotten a constant for src 2?");
2542 auto LSB = Cst1->Value.getZExtValue();
2543 auto Width = Cst2->Value.getZExtValue();
2545 MIB.
buildInstr(Opc, {
I.getOperand(0)}, {
I.getOperand(1)})
2547 .
addImm(LSB + Width - 1);
2548 I.eraseFromParent();
2551 case TargetOpcode::G_BRCOND:
2552 return selectCompareBranch(
I, MF,
MRI);
2554 case TargetOpcode::G_BRINDIRECT: {
2555 I.setDesc(
TII.get(AArch64::BR));
2559 case TargetOpcode::G_BRJT:
2560 return selectBrJT(
I,
MRI);
2562 case AArch64::G_ADD_LOW: {
2568 if (BaseMI->
getOpcode() != AArch64::ADRP) {
2569 I.setDesc(
TII.get(AArch64::ADDXri));
2574 "Expected small code model");
2576 auto Op2 =
I.getOperand(2);
2577 auto MovAddr = MIB.
buildInstr(AArch64::MOVaddr, {
I.getOperand(0)}, {})
2578 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2579 Op1.getTargetFlags())
2581 Op2.getTargetFlags());
2582 I.eraseFromParent();
2586 case TargetOpcode::G_FCONSTANT:
2587 case TargetOpcode::G_CONSTANT: {
2588 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2597 const Register DefReg =
I.getOperand(0).getReg();
2598 const LLT DefTy =
MRI.getType(DefReg);
2604 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2606 <<
" constant, expected: " << s16 <<
" or " << s32
2607 <<
" or " << s64 <<
" or " << s128 <<
'\n');
2611 if (RB.
getID() != AArch64::FPRRegBankID) {
2613 <<
" constant on bank: " << RB
2614 <<
", expected: FPR\n");
2622 if (DefSize != 128 &&
I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2626 if (Ty != p0 && Ty != s8 && Ty != s16) {
2628 <<
" constant, expected: " << s32 <<
", " << s64
2629 <<
", or " << p0 <<
'\n');
2633 if (RB.
getID() != AArch64::GPRRegBankID) {
2635 <<
" constant on bank: " << RB
2636 <<
", expected: GPR\n");
2653 if (TLI->isFPImmLegal(
I.getOperand(1).getFPImm()->getValueAPF(),
2660 auto *FPImm =
I.getOperand(1).getFPImm();
2663 LLVM_DEBUG(
dbgs() <<
"Failed to load double constant pool entry\n");
2667 I.eraseFromParent();
2668 return RBI.constrainGenericRegister(DefReg, FPRRC,
MRI);
2672 assert((DefSize == 32 || DefSize == 64) &&
"Unexpected const def size");
2674 const Register DefGPRReg =
MRI.createVirtualRegister(
2675 DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2681 if (!RBI.constrainGenericRegister(DefReg, FPRRC,
MRI)) {
2682 LLVM_DEBUG(
dbgs() <<
"Failed to constrain G_FCONSTANT def operand\n");
2690 }
else if (
I.getOperand(1).isCImm()) {
2691 uint64_t Val =
I.getOperand(1).getCImm()->getZExtValue();
2692 I.getOperand(1).ChangeToImmediate(Val);
2693 }
else if (
I.getOperand(1).isImm()) {
2694 uint64_t Val =
I.getOperand(1).getImm();
2695 I.getOperand(1).ChangeToImmediate(Val);
2698 const unsigned MovOpc =
2699 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2700 I.setDesc(
TII.get(MovOpc));
2704 case TargetOpcode::G_EXTRACT: {
2705 Register DstReg =
I.getOperand(0).getReg();
2706 Register SrcReg =
I.getOperand(1).getReg();
2707 LLT SrcTy =
MRI.getType(SrcReg);
2708 LLT DstTy =
MRI.getType(DstReg);
2720 unsigned Offset =
I.getOperand(2).getImm();
2729 if (SrcRB.
getID() == AArch64::GPRRegBankID) {
2731 MIB.
buildInstr(TargetOpcode::COPY, {DstReg}, {})
2733 Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2735 AArch64::GPR64RegClass, NewI->getOperand(0));
2736 I.eraseFromParent();
2742 unsigned LaneIdx =
Offset / 64;
2744 DstReg, DstRB,
LLT::scalar(64), SrcReg, LaneIdx, MIB);
2747 I.eraseFromParent();
2751 I.setDesc(
TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2757 "unexpected G_EXTRACT types");
2764 .addReg(DstReg, 0, AArch64::sub_32);
2765 RBI.constrainGenericRegister(
I.getOperand(0).getReg(),
2766 AArch64::GPR32RegClass,
MRI);
2767 I.getOperand(0).setReg(DstReg);
2772 case TargetOpcode::G_INSERT: {
2773 LLT SrcTy =
MRI.getType(
I.getOperand(2).getReg());
2774 LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2781 I.setDesc(
TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2782 unsigned LSB =
I.getOperand(3).getImm();
2783 unsigned Width =
MRI.getType(
I.getOperand(2).getReg()).getSizeInBits();
2784 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2789 "unexpected G_INSERT types");
2795 TII.get(AArch64::SUBREG_TO_REG))
2798 .
addUse(
I.getOperand(2).getReg())
2799 .
addImm(AArch64::sub_32);
2800 RBI.constrainGenericRegister(
I.getOperand(2).getReg(),
2801 AArch64::GPR32RegClass,
MRI);
2802 I.getOperand(2).setReg(SrcReg);
2806 case TargetOpcode::G_FRAME_INDEX: {
2813 I.setDesc(
TII.get(AArch64::ADDXri));
2822 case TargetOpcode::G_GLOBAL_VALUE: {
2825 if (
I.getOperand(1).isSymbol()) {
2826 OpFlags =
I.getOperand(1).getTargetFlags();
2830 GV =
I.getOperand(1).getGlobal();
2832 return selectTLSGlobalValue(
I,
MRI);
2833 OpFlags = STI.ClassifyGlobalReference(GV,
TM);
2837 I.setDesc(
TII.get(AArch64::LOADgot));
2838 I.getOperand(1).setTargetFlags(OpFlags);
2840 !
TM.isPositionIndependent()) {
2842 materializeLargeCMVal(
I, GV, OpFlags);
2843 I.eraseFromParent();
2846 I.setDesc(
TII.get(AArch64::ADR));
2847 I.getOperand(1).setTargetFlags(OpFlags);
2849 I.setDesc(
TII.get(AArch64::MOVaddr));
2852 MIB.addGlobalAddress(GV,
I.getOperand(1).getOffset(),
2858 case TargetOpcode::G_PTRAUTH_GLOBAL_VALUE:
2859 return selectPtrAuthGlobalValue(
I,
MRI);
2861 case TargetOpcode::G_ZEXTLOAD:
2862 case TargetOpcode::G_LOAD:
2863 case TargetOpcode::G_STORE: {
2865 bool IsZExtLoad =
I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2879 if (Order != AtomicOrdering::NotAtomic &&
2880 Order != AtomicOrdering::Unordered &&
2881 Order != AtomicOrdering::Monotonic) {
2882 assert(!isa<GZExtLoad>(LdSt));
2883 assert(MemSizeInBytes <= 8 &&
2884 "128-bit atomics should already be custom-legalized");
2886 if (isa<GLoad>(LdSt)) {
2887 static constexpr unsigned LDAPROpcodes[] = {
2888 AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
2889 static constexpr unsigned LDAROpcodes[] = {
2890 AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
2892 STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent
2895 I.setDesc(
TII.get(Opcodes[
Log2_32(MemSizeInBytes)]));
2897 static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2898 AArch64::STLRW, AArch64::STLRX};
2900 if (
MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
2902 Register NewVal =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2903 MIB.
buildInstr(TargetOpcode::COPY, {NewVal}, {})
2904 .addReg(
I.getOperand(0).getReg(), 0, AArch64::sub_32);
2905 I.getOperand(0).setReg(NewVal);
2907 I.setDesc(
TII.get(Opcodes[
Log2_32(MemSizeInBytes)]));
2918 "Load/Store pointer operand isn't a GPR");
2919 assert(
MRI.getType(PtrReg).isPointer() &&
2920 "Load/Store pointer operand isn't a pointer");
2924 const LLT ValTy =
MRI.getType(ValReg);
2929 if (isa<GStore>(LdSt) && ValTy.
getSizeInBits() > MemSizeInBits) {
2932 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2938 .addReg(ValReg, 0,
SubReg)
2940 RBI.constrainGenericRegister(Copy, *RC,
MRI);
2942 }
else if (isa<GLoad>(LdSt) && ValTy.
getSizeInBits() > MemSizeInBits) {
2945 if (RB.
getID() == AArch64::FPRRegBankID) {
2948 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2955 MRI.setRegBank(NewDst, RB);
2958 MIB.
buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
2962 auto SubRegRC = getRegClassForTypeOnBank(
MRI.getType(OldDst), RB);
2963 RBI.constrainGenericRegister(OldDst, *SubRegRC,
MRI);
2970 auto SelectLoadStoreAddressingMode = [&]() ->
MachineInstr * {
2971 bool IsStore = isa<GStore>(
I);
2972 const unsigned NewOpc =
2974 if (NewOpc ==
I.getOpcode())
2978 selectAddrModeIndexed(
I.getOperand(1), MemSizeInBytes);
2981 I.setDesc(
TII.get(NewOpc));
2987 auto NewInst = MIB.
buildInstr(NewOpc, {}, {},
I.getFlags());
2988 Register CurValReg =
I.getOperand(0).getReg();
2989 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
2990 NewInst.cloneMemRefs(
I);
2991 for (
auto &Fn : *AddrModeFns)
2993 I.eraseFromParent();
3002 if (Opcode == TargetOpcode::G_STORE) {
3005 if (CVal && CVal->Value == 0) {
3007 case AArch64::STRWui:
3008 case AArch64::STRHHui:
3009 case AArch64::STRBBui:
3010 LoadStore->getOperand(0).setReg(AArch64::WZR);
3012 case AArch64::STRXui:
3013 LoadStore->getOperand(0).setReg(AArch64::XZR);
3019 if (IsZExtLoad || (Opcode == TargetOpcode::G_LOAD &&
3020 ValTy ==
LLT::scalar(64) && MemSizeInBits == 32)) {
3023 if (
MRI.getType(
LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
3027 Register LdReg =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3032 MIB.
buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
3035 .
addImm(AArch64::sub_32);
3037 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
3043 case TargetOpcode::G_INDEXED_ZEXTLOAD:
3044 case TargetOpcode::G_INDEXED_SEXTLOAD:
3045 return selectIndexedExtLoad(
I,
MRI);
3046 case TargetOpcode::G_INDEXED_LOAD:
3047 return selectIndexedLoad(
I,
MRI);
3048 case TargetOpcode::G_INDEXED_STORE:
3049 return selectIndexedStore(cast<GIndexedStore>(
I),
MRI);
3051 case TargetOpcode::G_LSHR:
3052 case TargetOpcode::G_ASHR:
3053 if (
MRI.getType(
I.getOperand(0).getReg()).isVector())
3054 return selectVectorAshrLshr(
I,
MRI);
3056 case TargetOpcode::G_SHL:
3057 if (Opcode == TargetOpcode::G_SHL &&
3058 MRI.getType(
I.getOperand(0).getReg()).isVector())
3059 return selectVectorSHL(
I,
MRI);
3066 Register SrcReg =
I.getOperand(1).getReg();
3067 Register ShiftReg =
I.getOperand(2).getReg();
3068 const LLT ShiftTy =
MRI.getType(ShiftReg);
3069 const LLT SrcTy =
MRI.getType(SrcReg);
3074 auto Trunc = MIB.
buildInstr(TargetOpcode::COPY, {SrcTy}, {})
3075 .addReg(ShiftReg, 0, AArch64::sub_32);
3076 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
3077 I.getOperand(2).setReg(Trunc.getReg(0));
3081 case TargetOpcode::G_OR: {
3088 const Register DefReg =
I.getOperand(0).getReg();
3092 if (NewOpc ==
I.getOpcode())
3095 I.setDesc(
TII.get(NewOpc));
3103 case TargetOpcode::G_PTR_ADD: {
3104 emitADD(
I.getOperand(0).getReg(),
I.getOperand(1),
I.getOperand(2), MIB);
3105 I.eraseFromParent();
3109 case TargetOpcode::G_SADDE:
3110 case TargetOpcode::G_UADDE:
3111 case TargetOpcode::G_SSUBE:
3112 case TargetOpcode::G_USUBE:
3113 case TargetOpcode::G_SADDO:
3114 case TargetOpcode::G_UADDO:
3115 case TargetOpcode::G_SSUBO:
3116 case TargetOpcode::G_USUBO:
3117 return selectOverflowOp(
I,
MRI);
3119 case TargetOpcode::G_PTRMASK: {
3120 Register MaskReg =
I.getOperand(2).getReg();
3127 I.setDesc(
TII.get(AArch64::ANDXri));
3128 I.getOperand(2).ChangeToImmediate(
3133 case TargetOpcode::G_PTRTOINT:
3134 case TargetOpcode::G_TRUNC: {
3135 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
3136 const LLT SrcTy =
MRI.getType(
I.getOperand(1).getReg());
3138 const Register DstReg =
I.getOperand(0).getReg();
3139 const Register SrcReg =
I.getOperand(1).getReg();
3146 dbgs() <<
"G_TRUNC/G_PTRTOINT input/output on different banks\n");
3150 if (DstRB.
getID() == AArch64::GPRRegBankID) {
3159 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC,
MRI) ||
3160 !RBI.constrainGenericRegister(DstReg, *DstRC,
MRI)) {
3161 LLVM_DEBUG(
dbgs() <<
"Failed to constrain G_TRUNC/G_PTRTOINT\n");
3165 if (DstRC == SrcRC) {
3167 }
else if (Opcode == TargetOpcode::G_TRUNC && DstTy ==
LLT::scalar(32) &&
3171 }
else if (DstRC == &AArch64::GPR32RegClass &&
3172 SrcRC == &AArch64::GPR64RegClass) {
3173 I.getOperand(1).setSubReg(AArch64::sub_32);
3176 dbgs() <<
"Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3180 I.setDesc(
TII.get(TargetOpcode::COPY));
3182 }
else if (DstRB.
getID() == AArch64::FPRRegBankID) {
3185 I.setDesc(
TII.get(AArch64::XTNv4i16));
3195 I.eraseFromParent();
3200 if (Opcode == TargetOpcode::G_PTRTOINT) {
3201 assert(DstTy.
isVector() &&
"Expected an FPR ptrtoint to be a vector");
3202 I.setDesc(
TII.get(TargetOpcode::COPY));
3210 case TargetOpcode::G_ANYEXT: {
3211 if (selectUSMovFromExtend(
I,
MRI))
3214 const Register DstReg =
I.getOperand(0).getReg();
3215 const Register SrcReg =
I.getOperand(1).getReg();
3218 if (RBDst.
getID() != AArch64::GPRRegBankID) {
3220 <<
", expected: GPR\n");
3225 if (RBSrc.
getID() != AArch64::GPRRegBankID) {
3227 <<
", expected: GPR\n");
3231 const unsigned DstSize =
MRI.getType(DstReg).getSizeInBits();
3234 LLVM_DEBUG(
dbgs() <<
"G_ANYEXT operand has no size, not a gvreg?\n");
3238 if (DstSize != 64 && DstSize > 32) {
3240 <<
", expected: 32 or 64\n");
3246 Register ExtSrc =
MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3251 .
addImm(AArch64::sub_32);
3252 I.getOperand(1).setReg(ExtSrc);
3257 case TargetOpcode::G_ZEXT:
3258 case TargetOpcode::G_SEXT_INREG:
3259 case TargetOpcode::G_SEXT: {
3260 if (selectUSMovFromExtend(
I,
MRI))
3263 unsigned Opcode =
I.getOpcode();
3264 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3265 const Register DefReg =
I.getOperand(0).getReg();
3266 Register SrcReg =
I.getOperand(1).getReg();
3267 const LLT DstTy =
MRI.getType(DefReg);
3268 const LLT SrcTy =
MRI.getType(SrcReg);
3274 if (Opcode == TargetOpcode::G_SEXT_INREG)
3275 SrcSize =
I.getOperand(2).getImm();
3281 AArch64::GPRRegBankID &&
3282 "Unexpected ext regbank");
3295 RBI.getRegBank(SrcReg,
MRI,
TRI)->getID() == AArch64::GPRRegBankID;
3296 if (LoadMI && IsGPR) {
3298 unsigned BytesLoaded =
MemOp->getSize().getValue();
3305 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3307 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3308 const Register ZReg = AArch64::WZR;
3309 MIB.
buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg})
3312 MIB.
buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3315 .
addImm(AArch64::sub_32);
3317 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3319 LLVM_DEBUG(
dbgs() <<
"Failed to constrain G_ZEXT destination\n");
3323 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3329 I.eraseFromParent();
3334 if (DstSize == 64) {
3335 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3337 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3343 SrcReg = MIB.
buildInstr(AArch64::SUBREG_TO_REG,
3344 {&AArch64::GPR64RegClass}, {})
3351 ExtI = MIB.
buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3355 }
else if (DstSize <= 32) {
3356 ExtI = MIB.
buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3365 I.eraseFromParent();
3369 case TargetOpcode::G_SITOFP:
3370 case TargetOpcode::G_UITOFP:
3371 case TargetOpcode::G_FPTOSI:
3372 case TargetOpcode::G_FPTOUI: {
3373 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg()),
3374 SrcTy =
MRI.getType(
I.getOperand(1).getReg());
3376 if (NewOpc == Opcode)
3379 I.setDesc(
TII.get(NewOpc));
3386 case TargetOpcode::G_FREEZE:
3389 case TargetOpcode::G_INTTOPTR:
3394 case TargetOpcode::G_BITCAST:
3402 case TargetOpcode::G_SELECT: {
3403 auto &Sel = cast<GSelect>(
I);
3404 const Register CondReg = Sel.getCondReg();
3405 const Register TReg = Sel.getTrueReg();
3406 const Register FReg = Sel.getFalseReg();
3408 if (tryOptSelect(Sel))
3413 Register DeadVReg =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3414 auto TstMI = MIB.
buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3417 if (!emitSelect(Sel.getReg(0), TReg, FReg,
AArch64CC::NE, MIB))
3419 Sel.eraseFromParent();
3422 case TargetOpcode::G_ICMP: {
3435 emitIntegerCompare(
I.getOperand(2),
I.getOperand(3),
I.getOperand(1), MIB);
3436 emitCSINC(
I.getOperand(0).getReg(), AArch64::WZR,
3437 AArch64::WZR, InvCC, MIB);
3438 I.eraseFromParent();
3442 case TargetOpcode::G_FCMP: {
3445 if (!emitFPCompare(
I.getOperand(2).getReg(),
I.getOperand(3).getReg(), MIB,
3447 !emitCSetForFCmp(
I.getOperand(0).getReg(), Pred, MIB))
3449 I.eraseFromParent();
3452 case TargetOpcode::G_VASTART:
3453 return STI.isTargetDarwin() ? selectVaStartDarwin(
I, MF,
MRI)
3454 : selectVaStartAAPCS(
I, MF,
MRI);
3455 case TargetOpcode::G_INTRINSIC:
3456 return selectIntrinsic(
I,
MRI);
3457 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3458 return selectIntrinsicWithSideEffects(
I,
MRI);
3459 case TargetOpcode::G_IMPLICIT_DEF: {
3460 I.setDesc(
TII.get(TargetOpcode::IMPLICIT_DEF));
3461 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
3462 const Register DstReg =
I.getOperand(0).getReg();
3465 RBI.constrainGenericRegister(DstReg, *DstRC,
MRI);
3468 case TargetOpcode::G_BLOCK_ADDR: {
3470 materializeLargeCMVal(
I,
I.getOperand(1).getBlockAddress(), 0);
3471 I.eraseFromParent();
3474 I.setDesc(
TII.get(AArch64::MOVaddrBA));
3475 auto MovMI =
BuildMI(
MBB,
I,
I.getDebugLoc(),
TII.get(AArch64::MOVaddrBA),
3476 I.getOperand(0).getReg())
3480 I.getOperand(1).getBlockAddress(), 0,
3482 I.eraseFromParent();
3486 case AArch64::G_DUP: {
3492 if (RBI.getRegBank(
I.getOperand(1).getReg(),
MRI,
TRI)->getID() !=
3493 AArch64::GPRRegBankID)
3495 LLT VecTy =
MRI.getType(
I.getOperand(0).getReg());
3497 I.setDesc(
TII.get(AArch64::DUPv8i8gpr));
3499 I.setDesc(
TII.get(AArch64::DUPv16i8gpr));
3501 I.setDesc(
TII.get(AArch64::DUPv4i16gpr));
3503 I.setDesc(
TII.get(AArch64::DUPv8i16gpr));
3508 case TargetOpcode::G_BUILD_VECTOR:
3509 return selectBuildVector(
I,
MRI);
3510 case TargetOpcode::G_MERGE_VALUES:
3512 case TargetOpcode::G_UNMERGE_VALUES:
3514 case TargetOpcode::G_SHUFFLE_VECTOR:
3515 return selectShuffleVector(
I,
MRI);
3516 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3517 return selectExtractElt(
I,
MRI);
3518 case TargetOpcode::G_CONCAT_VECTORS:
3519 return selectConcatVectors(
I,
MRI);
3520 case TargetOpcode::G_JUMP_TABLE:
3521 return selectJumpTable(
I,
MRI);
3522 case TargetOpcode::G_MEMCPY:
3523 case TargetOpcode::G_MEMCPY_INLINE:
3524 case TargetOpcode::G_MEMMOVE:
3525 case TargetOpcode::G_MEMSET:
3526 assert(STI.hasMOPS() &&
"Shouldn't get here without +mops feature");
3527 return selectMOPS(
I,
MRI);
3533bool AArch64InstructionSelector::selectAndRestoreState(
MachineInstr &
I) {
3540bool AArch64InstructionSelector::selectMOPS(
MachineInstr &GI,
3544 case TargetOpcode::G_MEMCPY:
3545 case TargetOpcode::G_MEMCPY_INLINE:
3546 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3548 case TargetOpcode::G_MEMMOVE:
3549 Mopcode = AArch64::MOPSMemoryMovePseudo;
3551 case TargetOpcode::G_MEMSET:
3553 Mopcode = AArch64::MOPSMemorySetPseudo;
3562 const Register DstPtrCopy =
MRI.cloneVirtualRegister(DstPtr.getReg());
3563 const Register SrcValCopy =
MRI.cloneVirtualRegister(SrcOrVal.getReg());
3566 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3567 const auto &SrcValRegClass =
3568 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3571 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass,
MRI);
3572 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass,
MRI);
3573 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass,
MRI);
3583 Register DefDstPtr =
MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3584 Register DefSize =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3586 MIB.
buildInstr(Mopcode, {DefDstPtr, DefSize},
3587 {DstPtrCopy, SizeCopy, SrcValCopy});
3589 Register DefSrcPtr =
MRI.createVirtualRegister(&SrcValRegClass);
3590 MIB.
buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3591 {DstPtrCopy, SrcValCopy, SizeCopy});
3600 assert(
I.getOpcode() == TargetOpcode::G_BRJT &&
"Expected G_BRJT");
3601 Register JTAddr =
I.getOperand(0).getReg();
3602 unsigned JTI =
I.getOperand(1).getIndex();
3605 Register TargetReg =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3606 Register ScratchReg =
MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3609 auto JumpTableInst = MIB.
buildInstr(AArch64::JumpTableDest32,
3610 {TargetReg, ScratchReg}, {JTAddr,
Index})
3611 .addJumpTableIndex(JTI);
3613 MIB.
buildInstr(TargetOpcode::JUMP_TABLE_DEBUG_INFO, {},
3614 {
static_cast<int64_t
>(JTI)});
3616 MIB.
buildInstr(AArch64::BR, {}, {TargetReg});
3617 I.eraseFromParent();
3621bool AArch64InstructionSelector::selectJumpTable(
MachineInstr &
I,
3623 assert(
I.getOpcode() == TargetOpcode::G_JUMP_TABLE &&
"Expected jump table");
3624 assert(
I.getOperand(1).isJTI() &&
"Jump table op should have a JTI!");
3626 Register DstReg =
I.getOperand(0).getReg();
3627 unsigned JTI =
I.getOperand(1).getIndex();
3630 MIB.
buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3633 I.eraseFromParent();
3637bool AArch64InstructionSelector::selectTLSGlobalValue(
3639 if (!STI.isTargetMachO())
3644 const auto &GlobalOp =
I.getOperand(1);
3645 assert(GlobalOp.getOffset() == 0 &&
3646 "Shouldn't have an offset on TLS globals!");
3650 MIB.
buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3653 auto Load = MIB.
buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3654 {LoadGOT.getReg(0)})
3665 assert(Opcode == AArch64::BLR);
3666 Opcode = AArch64::BLRAAZ;
3675 RBI.constrainGenericRegister(
I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3677 I.eraseFromParent();
3681MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3684 auto Undef = MIRBuilder.
buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3686 auto BuildFn = [&](
unsigned SubregIndex) {
3690 .addImm(SubregIndex);
3698 return BuildFn(AArch64::bsub);
3700 return BuildFn(AArch64::hsub);
3702 return BuildFn(AArch64::ssub);
3704 return BuildFn(AArch64::dsub);
3711AArch64InstructionSelector::emitNarrowVector(
Register DstReg,
Register SrcReg,
3714 LLT DstTy =
MRI.getType(DstReg);
3716 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(SrcReg,
MRI,
TRI));
3717 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3724 if (
SubReg != AArch64::ssub &&
SubReg != AArch64::dsub) {
3730 .addReg(SrcReg, 0,
SubReg);
3731 RBI.constrainGenericRegister(DstReg, *RC,
MRI);
3735bool AArch64InstructionSelector::selectMergeValues(
3737 assert(
I.getOpcode() == TargetOpcode::G_MERGE_VALUES &&
"unexpected opcode");
3738 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
3739 const LLT SrcTy =
MRI.getType(
I.getOperand(1).getReg());
3743 if (
I.getNumOperands() != 3)
3750 Register DstReg =
I.getOperand(0).getReg();
3751 Register Src1Reg =
I.getOperand(1).getReg();
3752 Register Src2Reg =
I.getOperand(2).getReg();
3753 auto Tmp = MIB.
buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3754 MachineInstr *InsMI = emitLaneInsert(std::nullopt, Tmp.getReg(0), Src1Reg,
3759 Src2Reg, 1, RB, MIB);
3764 I.eraseFromParent();
3768 if (RB.
getID() != AArch64::GPRRegBankID)
3774 auto *DstRC = &AArch64::GPR64RegClass;
3775 Register SubToRegDef =
MRI.createVirtualRegister(DstRC);
3777 TII.get(TargetOpcode::SUBREG_TO_REG))
3780 .
addUse(
I.getOperand(1).getReg())
3781 .
addImm(AArch64::sub_32);
3782 Register SubToRegDef2 =
MRI.createVirtualRegister(DstRC);
3785 TII.get(TargetOpcode::SUBREG_TO_REG))
3788 .
addUse(
I.getOperand(2).getReg())
3789 .
addImm(AArch64::sub_32);
3791 *
BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::BFMXri))
3792 .
addDef(
I.getOperand(0).getReg())
3800 I.eraseFromParent();
3805 const unsigned EltSize) {
3810 CopyOpc = AArch64::DUPi8;
3811 ExtractSubReg = AArch64::bsub;
3814 CopyOpc = AArch64::DUPi16;
3815 ExtractSubReg = AArch64::hsub;
3818 CopyOpc = AArch64::DUPi32;
3819 ExtractSubReg = AArch64::ssub;
3822 CopyOpc = AArch64::DUPi64;
3823 ExtractSubReg = AArch64::dsub;
3827 LLVM_DEBUG(
dbgs() <<
"Elt size '" << EltSize <<
"' unsupported.\n");
3833MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3834 std::optional<Register> DstReg,
const RegisterBank &DstRB,
LLT ScalarTy,
3837 unsigned CopyOpc = 0;
3838 unsigned ExtractSubReg = 0;
3841 dbgs() <<
"Couldn't determine lane copy opcode for instruction.\n");
3846 getRegClassForTypeOnBank(ScalarTy, DstRB,
true);
3848 LLVM_DEBUG(
dbgs() <<
"Could not determine destination register class.\n");
3853 const LLT &VecTy =
MRI.getType(VecReg);
3855 getRegClassForTypeOnBank(VecTy, VecRB,
true);
3857 LLVM_DEBUG(
dbgs() <<
"Could not determine source register class.\n");
3864 DstReg =
MRI.createVirtualRegister(DstRC);
3867 auto Copy = MIRBuilder.
buildInstr(TargetOpcode::COPY, {*DstReg}, {})
3868 .addReg(VecReg, 0, ExtractSubReg);
3869 RBI.constrainGenericRegister(*DstReg, *DstRC,
MRI);
3878 VecTy.
getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
3879 if (!ScalarToVector)
3885 MIRBuilder.
buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
3889 RBI.constrainGenericRegister(*DstReg, *DstRC,
MRI);
3893bool AArch64InstructionSelector::selectExtractElt(
3895 assert(
I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
3896 "unexpected opcode!");
3897 Register DstReg =
I.getOperand(0).getReg();
3898 const LLT NarrowTy =
MRI.getType(DstReg);
3899 const Register SrcReg =
I.getOperand(1).getReg();
3900 const LLT WideTy =
MRI.getType(SrcReg);
3903 "source register size too small!");
3904 assert(!NarrowTy.
isVector() &&
"cannot extract vector into vector!");
3908 assert(LaneIdxOp.
isReg() &&
"Lane index operand was not a register?");
3910 if (RBI.getRegBank(DstReg,
MRI,
TRI)->getID() != AArch64::FPRRegBankID) {
3919 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
3923 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
3928 I.eraseFromParent();
3932bool AArch64InstructionSelector::selectSplitVectorUnmerge(
3934 unsigned NumElts =
I.getNumOperands() - 1;
3935 Register SrcReg =
I.getOperand(NumElts).getReg();
3936 const LLT NarrowTy =
MRI.getType(
I.getOperand(0).getReg());
3937 const LLT SrcTy =
MRI.getType(SrcReg);
3939 assert(NarrowTy.
isVector() &&
"Expected an unmerge into vectors");
3941 LLVM_DEBUG(
dbgs() <<
"Unexpected vector type for vec split unmerge");
3948 *RBI.getRegBank(
I.getOperand(0).getReg(),
MRI,
TRI);
3949 for (
unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
3950 Register Dst =
I.getOperand(OpIdx).getReg();
3952 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
3956 I.eraseFromParent();
3960bool AArch64InstructionSelector::selectUnmergeValues(
MachineInstr &
I,
3962 assert(
I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
3963 "unexpected opcode");
3966 if (RBI.getRegBank(
I.getOperand(0).getReg(),
MRI,
TRI)->getID() !=
3967 AArch64::FPRRegBankID ||
3968 RBI.getRegBank(
I.getOperand(1).getReg(),
MRI,
TRI)->getID() !=
3969 AArch64::FPRRegBankID) {
3970 LLVM_DEBUG(
dbgs() <<
"Unmerging vector-to-gpr and scalar-to-scalar "
3971 "currently unsupported.\n");
3977 unsigned NumElts =
I.getNumOperands() - 1;
3978 Register SrcReg =
I.getOperand(NumElts).getReg();
3979 const LLT NarrowTy =
MRI.getType(
I.getOperand(0).getReg());
3980 const LLT WideTy =
MRI.getType(SrcReg);
3983 "can only unmerge from vector or s128 types!");
3985 "source register size too small!");
3988 return selectSplitVectorUnmerge(
I,
MRI);
3992 unsigned CopyOpc = 0;
3993 unsigned ExtractSubReg = 0;
4004 unsigned NumInsertRegs = NumElts - 1;
4016 *RBI.getRegBank(SrcReg,
MRI,
TRI));
4020 assert(Found &&
"expected to find last operand's subeg idx");
4021 for (
unsigned Idx = 0;
Idx < NumInsertRegs; ++
Idx) {
4022 Register ImpDefReg =
MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4024 *
BuildMI(
MBB,
I,
I.getDebugLoc(),
TII.get(TargetOpcode::IMPLICIT_DEF),
4028 Register InsertReg =
MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4031 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4048 Register CopyTo =
I.getOperand(0).getReg();
4049 auto FirstCopy = MIB.
buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4050 .addReg(InsertRegs[0], 0, ExtractSubReg);
4054 unsigned LaneIdx = 1;
4055 for (
Register InsReg : InsertRegs) {
4056 Register CopyTo =
I.getOperand(LaneIdx).getReg();
4069 MRI.getRegClassOrNull(
I.getOperand(1).getReg());
4075 RBI.constrainGenericRegister(CopyTo, *RC,
MRI);
4076 I.eraseFromParent();
4080bool AArch64InstructionSelector::selectConcatVectors(
4082 assert(
I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4083 "Unexpected opcode");
4084 Register Dst =
I.getOperand(0).getReg();
4085 Register Op1 =
I.getOperand(1).getReg();
4086 Register Op2 =
I.getOperand(2).getReg();
4087 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4090 I.eraseFromParent();
4095AArch64InstructionSelector::emitConstantPoolEntry(
const Constant *CPVal,
4104MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4112 RC = &AArch64::FPR128RegClass;
4113 Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;
4116 RC = &AArch64::FPR64RegClass;
4117 Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;
4120 RC = &AArch64::FPR32RegClass;
4121 Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;
4124 RC = &AArch64::FPR16RegClass;
4125 Opc = AArch64::LDRHui;
4128 LLVM_DEBUG(
dbgs() <<
"Could not load from constant pool of type "
4134 auto &MF = MIRBuilder.
getMF();
4135 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4136 if (IsTiny && (
Size == 16 ||
Size == 8 ||
Size == 4)) {
4138 LoadMI = &*MIRBuilder.
buildInstr(Opc, {RC}, {}).addConstantPoolIndex(CPIdx);
4141 MIRBuilder.
buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4144 LoadMI = &*MIRBuilder.
buildInstr(Opc, {RC}, {Adrp})
4145 .addConstantPoolIndex(
4161static std::pair<unsigned, unsigned>
4163 unsigned Opc, SubregIdx;
4164 if (RB.
getID() == AArch64::GPRRegBankID) {
4166 Opc = AArch64::INSvi8gpr;
4167 SubregIdx = AArch64::bsub;
4168 }
else if (EltSize == 16) {
4169 Opc = AArch64::INSvi16gpr;
4170 SubregIdx = AArch64::ssub;
4171 }
else if (EltSize == 32) {
4172 Opc = AArch64::INSvi32gpr;
4173 SubregIdx = AArch64::ssub;
4174 }
else if (EltSize == 64) {
4175 Opc = AArch64::INSvi64gpr;
4176 SubregIdx = AArch64::dsub;
4182 Opc = AArch64::INSvi8lane;
4183 SubregIdx = AArch64::bsub;
4184 }
else if (EltSize == 16) {
4185 Opc = AArch64::INSvi16lane;
4186 SubregIdx = AArch64::hsub;
4187 }
else if (EltSize == 32) {
4188 Opc = AArch64::INSvi32lane;
4189 SubregIdx = AArch64::ssub;
4190 }
else if (EltSize == 64) {
4191 Opc = AArch64::INSvi64lane;
4192 SubregIdx = AArch64::dsub;
4197 return std::make_pair(Opc, SubregIdx);
4201 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4203 const ComplexRendererFns &RenderFns)
const {
4204 assert(Opcode &&
"Expected an opcode?");
4206 "Function should only be used to produce selected instructions!");
4207 auto MI = MIRBuilder.
buildInstr(Opcode, DstOps, SrcOps);
4209 for (
auto &Fn : *RenderFns)
4216 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4220 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4221 auto Ty =
MRI.getType(
LHS.getReg());
4224 assert((
Size == 32 ||
Size == 64) &&
"Expected a 32-bit or 64-bit type only");
4225 bool Is32Bit =
Size == 32;
4228 if (
auto Fns = selectArithImmed(RHS))
4229 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {
LHS},
4233 if (
auto Fns = selectNegArithImmed(RHS))
4234 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {
LHS},
4238 if (
auto Fns = selectArithExtendedRegister(RHS))
4239 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {
LHS},
4243 if (
auto Fns = selectShiftedRegister(RHS))
4244 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {
LHS},
4246 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {
LHS,
RHS},
4254 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4255 {{AArch64::ADDXri, AArch64::ADDWri},
4256 {AArch64::ADDXrs, AArch64::ADDWrs},
4257 {AArch64::ADDXrr, AArch64::ADDWrr},
4258 {AArch64::SUBXri, AArch64::SUBWri},
4259 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4260 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4267 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4268 {{AArch64::ADDSXri, AArch64::ADDSWri},
4269 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4270 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4271 {AArch64::SUBSXri, AArch64::SUBSWri},
4272 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4273 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4280 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4281 {{AArch64::SUBSXri, AArch64::SUBSWri},
4282 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4283 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4284 {AArch64::ADDSXri, AArch64::ADDSWri},
4285 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4286 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4293 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4295 bool Is32Bit = (
MRI->getType(
LHS.getReg()).getSizeInBits() == 32);
4296 static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr};
4297 return emitInstr(OpcTable[Is32Bit], {Dst}, {
LHS,
RHS}, MIRBuilder);
4304 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4306 bool Is32Bit = (
MRI->getType(
LHS.getReg()).getSizeInBits() == 32);
4307 static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr};
4308 return emitInstr(OpcTable[Is32Bit], {Dst}, {
LHS,
RHS}, MIRBuilder);
4315 bool Is32Bit = (
MRI.getType(
LHS.getReg()).getSizeInBits() == 32);
4316 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4317 return emitADDS(
MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4323 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4327 bool Is32Bit = (
RegSize == 32);
4328 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4329 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4330 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4334 int64_t
Imm = ValAndVReg->Value.getSExtValue();
4337 auto TstMI = MIRBuilder.
buildInstr(OpcTable[0][Is32Bit], {Ty}, {
LHS});
4344 if (
auto Fns = selectLogicalShiftedRegister(RHS))
4345 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {
LHS}, MIRBuilder, Fns);
4346 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {
LHS,
RHS}, MIRBuilder);
4349MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4352 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected LHS and RHS to be registers!");
4359 assert((
Size == 32 ||
Size == 64) &&
"Expected a 32-bit or 64-bit LHS/RHS?");
4361 if (
auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4363 auto Dst =
MRI.cloneVirtualRegister(
LHS.getReg());
4364 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4367MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4371 LLT Ty =
MRI.getType(Dst);
4373 "Expected a 32-bit scalar register?");
4375 const Register ZReg = AArch64::WZR;
4380 return emitCSINC(Dst, ZReg, ZReg, InvCC1,
4386 emitCSINC(Def1Reg, ZReg, ZReg, InvCC1, MIRBuilder);
4387 emitCSINC(Def2Reg, ZReg, ZReg, InvCC2, MIRBuilder);
4388 auto OrMI = MIRBuilder.
buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4393MachineInstr *AArch64InstructionSelector::emitFPCompare(
4395 std::optional<CmpInst::Predicate> Pred)
const {
4397 LLT Ty =
MRI.getType(LHS);
4401 assert(OpSize == 16 || OpSize == 32 || OpSize == 64);
4412 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4416 ShouldUseImm =
true;
4420 unsigned CmpOpcTbl[2][3] = {
4421 {AArch64::FCMPHrr, AArch64::FCMPSrr, AArch64::FCMPDrr},
4422 {AArch64::FCMPHri, AArch64::FCMPSri, AArch64::FCMPDri}};
4424 CmpOpcTbl[ShouldUseImm][OpSize == 16 ? 0 : (OpSize == 32 ? 1 : 2)];
4436MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4445 const LLT Op1Ty =
MRI.getType(Op1);
4446 const LLT Op2Ty =
MRI.getType(Op2);
4448 if (Op1Ty != Op2Ty) {
4449 LLVM_DEBUG(
dbgs() <<
"Could not do vector concat of differing vector tys");
4452 assert(Op1Ty.
isVector() &&
"Expected a vector for vector concat");
4455 LLVM_DEBUG(
dbgs() <<
"Vector concat not supported for full size vectors");
4471 emitScalarToVector(ScalarTy.
getSizeInBits(), DstRC, Op1, MIRBuilder);
4473 emitScalarToVector(ScalarTy.
getSizeInBits(), DstRC, Op2, MIRBuilder);
4474 if (!WidenedOp1 || !WidenedOp2) {
4475 LLVM_DEBUG(
dbgs() <<
"Could not emit a vector from scalar value");
4480 unsigned InsertOpc, InsSubRegIdx;
4481 std::tie(InsertOpc, InsSubRegIdx) =
4485 Dst =
MRI.createVirtualRegister(DstRC);
4506 Size =
TRI.getRegSizeInBits(*RC);
4508 Size =
MRI.getType(Dst).getSizeInBits();
4510 assert(
Size <= 64 &&
"Expected 64 bits or less only!");
4511 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4512 unsigned Opc = OpcTable[
Size == 64];
4513 auto CSINC = MIRBuilder.
buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
4521 unsigned Opcode =
I.getOpcode();
4525 bool NeedsNegatedCarry =
4526 (Opcode == TargetOpcode::G_USUBE || Opcode == TargetOpcode::G_SSUBE);
4536 if (SrcMI ==
I.getPrevNode()) {
4537 if (
auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(SrcMI)) {
4538 bool ProducesNegatedCarry = CarrySrcMI->isSub();
4539 if (NeedsNegatedCarry == ProducesNegatedCarry &&
4540 CarrySrcMI->isUnsigned() &&
4541 CarrySrcMI->getCarryOutReg() == CarryReg &&
4542 selectAndRestoreState(*SrcMI))
4547 Register DeadReg =
MRI->createVirtualRegister(&AArch64::GPR32RegClass);
4549 if (NeedsNegatedCarry) {
4552 return emitInstr(AArch64::SUBSWrr, {DeadReg}, {ZReg, CarryReg}, MIB);
4556 auto Fns = select12BitValueWithLeftShift(1);
4557 return emitInstr(AArch64::SUBSWri, {DeadReg}, {CarryReg}, MIB, Fns);
4560bool AArch64InstructionSelector::selectOverflowOp(
MachineInstr &
I,
4562 auto &CarryMI = cast<GAddSubCarryOut>(
I);
4564 if (
auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(&
I)) {
4566 emitCarryIn(
I, CarryInMI->getCarryInReg());
4570 auto OpAndCC = emitOverflowOp(
I.getOpcode(), CarryMI.getDstReg(),
4571 CarryMI.getLHS(), CarryMI.getRHS(), MIB);
4573 Register CarryOutReg = CarryMI.getCarryOutReg();
4576 if (!
MRI.use_nodbg_empty(CarryOutReg)) {
4582 emitCSINC(CarryOutReg, ZReg, ZReg,
4583 getInvertedCondCode(OpAndCC.second), MIB);
4586 I.eraseFromParent();
4590std::pair<MachineInstr *, AArch64CC::CondCode>
4591AArch64InstructionSelector::emitOverflowOp(
unsigned Opcode,
Register Dst,
4598 case TargetOpcode::G_SADDO:
4599 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::VS);
4600 case TargetOpcode::G_UADDO:
4601 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::HS);
4602 case TargetOpcode::G_SSUBO:
4603 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::VS);
4604 case TargetOpcode::G_USUBO:
4605 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::LO);
4606 case TargetOpcode::G_SADDE:
4607 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::VS);
4608 case TargetOpcode::G_UADDE:
4609 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::HS);
4610 case TargetOpcode::G_SSUBE:
4611 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::VS);
4612 case TargetOpcode::G_USUBE:
4613 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::LO);
4633 unsigned Depth = 0) {
4634 if (!
MRI.hasOneNonDBGUse(Val))
4638 if (isa<GAnyCmp>(ValDef)) {
4640 MustBeFirst =
false;
4646 if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
4647 bool IsOR = Opcode == TargetOpcode::G_OR;
4659 if (MustBeFirstL && MustBeFirstR)
4665 if (!CanNegateL && !CanNegateR)
4669 CanNegate = WillNegate && CanNegateL && CanNegateR;
4672 MustBeFirst = !CanNegate;
4674 assert(Opcode == TargetOpcode::G_AND &&
"Must be G_AND");
4677 MustBeFirst = MustBeFirstL || MustBeFirstR;
4684MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
4689 LLT OpTy =
MRI.getType(LHS);
4691 std::optional<ValueAndVReg>
C;
4695 if (!
C ||
C->Value.sgt(31) ||
C->Value.slt(-31))
4696 CCmpOpc = OpTy.
getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
4697 else if (
C->Value.ule(31))
4698 CCmpOpc = OpTy.
getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi;
4700 CCmpOpc = OpTy.
getSizeInBits() == 32 ? AArch64::CCMNWi : AArch64::CCMNXi;
4706 assert(STI.hasFullFP16() &&
"Expected Full FP16 for fp16 comparisons");
4707 CCmpOpc = AArch64::FCCMPHrr;
4710 CCmpOpc = AArch64::FCCMPSrr;
4713 CCmpOpc = AArch64::FCCMPDrr;
4723 if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi)
4724 CCmp.
addImm(
C->Value.getZExtValue());
4725 else if (CCmpOpc == AArch64::CCMNWi || CCmpOpc == AArch64::CCMNXi)
4726 CCmp.
addImm(
C->Value.abs().getZExtValue());
4734MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
4741 if (
auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {
4747 if (isa<GICmp>(Cmp)) {
4758 ExtraCmp = emitFPCompare(LHS, RHS, MIB,
CC);
4769 auto Dst =
MRI.cloneVirtualRegister(LHS);
4770 if (isa<GICmp>(Cmp))
4771 return emitSUBS(Dst,
Cmp->getOperand(2),
Cmp->getOperand(3), MIB);
4772 return emitFPCompare(
Cmp->getOperand(2).getReg(),
4773 Cmp->getOperand(3).getReg(), MIB);
4778 assert(
MRI.hasOneNonDBGUse(Val) &&
"Valid conjunction/disjunction tree");
4780 bool IsOR = Opcode == TargetOpcode::G_OR;
4786 assert(ValidL &&
"Valid conjunction/disjunction tree");
4793 assert(ValidR &&
"Valid conjunction/disjunction tree");
4798 assert(!MustBeFirstR &&
"Valid conjunction/disjunction tree");
4807 bool NegateAfterAll;
4808 if (Opcode == TargetOpcode::G_OR) {
4811 assert(CanNegateR &&
"at least one side must be negatable");
4812 assert(!MustBeFirstR &&
"invalid conjunction/disjunction tree");
4816 NegateAfterR =
true;
4819 NegateR = CanNegateR;
4820 NegateAfterR = !CanNegateR;
4823 NegateAfterAll = !Negate;
4825 assert(Opcode == TargetOpcode::G_AND &&
4826 "Valid conjunction/disjunction tree");
4827 assert(!Negate &&
"Valid conjunction/disjunction tree");
4831 NegateAfterR =
false;
4832 NegateAfterAll =
false;
4848MachineInstr *AArch64InstructionSelector::emitConjunction(
4850 bool DummyCanNegate;
4851 bool DummyMustBeFirst;
4858bool AArch64InstructionSelector::tryOptSelectConjunction(
GSelect &SelI,
4870bool AArch64InstructionSelector::tryOptSelect(
GSelect &
I) {
4894 if (!
MRI.hasOneNonDBGUse(CondDefReg)) {
4896 for (
const MachineInstr &UI :
MRI.use_nodbg_instructions(CondDefReg)) {
4899 if (UI.getOpcode() != TargetOpcode::G_SELECT)
4905 unsigned CondOpc = CondDef->
getOpcode();
4906 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
4907 if (tryOptSelectConjunction(
I, *CondDef))
4913 if (CondOpc == TargetOpcode::G_ICMP) {
4941 emitSelect(
I.getOperand(0).getReg(),
I.getOperand(2).getReg(),
4942 I.getOperand(3).getReg(), CondCode, MIB);
4943 I.eraseFromParent();
4947MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
4951 "Unexpected MachineOperand");
4988 return emitCMN(LHS, RHSDef->
getOperand(2), MIRBuilder);
4999 LHSDef->
getOpcode() == TargetOpcode::G_AND) {
5002 if (!ValAndVReg || ValAndVReg->Value != 0)
5012bool AArch64InstructionSelector::selectShuffleVector(
5014 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
5015 Register Src1Reg =
I.getOperand(1).getReg();
5016 const LLT Src1Ty =
MRI.getType(Src1Reg);
5017 Register Src2Reg =
I.getOperand(2).getReg();
5018 const LLT Src2Ty =
MRI.getType(Src2Reg);
5029 LLVM_DEBUG(
dbgs() <<
"Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
5036 for (
int Val : Mask) {
5039 Val = Val < 0 ? 0 : Val;
5040 for (
unsigned Byte = 0;
Byte < BytesPerElt; ++
Byte) {
5058 emitVectorConcat(std::nullopt, Src1Reg, Src2Reg, MIB);
5065 IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
5069 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
5075 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
5076 RBI.constrainGenericRegister(
Copy.getReg(0), AArch64::FPR64RegClass,
MRI);
5077 I.eraseFromParent();
5085 auto TBL2 = MIB.
buildInstr(AArch64::TBLv16i8Two, {
I.getOperand(0)},
5088 I.eraseFromParent();
5092MachineInstr *AArch64InstructionSelector::emitLaneInsert(
5102 DstReg =
MRI.createVirtualRegister(DstRC);
5104 unsigned EltSize =
MRI.getType(EltReg).getSizeInBits();
5107 if (RB.
getID() == AArch64::FPRRegBankID) {
5108 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
5109 InsElt = MIRBuilder.
buildInstr(Opc, {*DstReg}, {SrcReg})
5111 .
addUse(InsSub->getOperand(0).getReg())
5114 InsElt = MIRBuilder.
buildInstr(Opc, {*DstReg}, {SrcReg})
5123bool AArch64InstructionSelector::selectUSMovFromExtend(
5125 if (
MI.getOpcode() != TargetOpcode::G_SEXT &&
5126 MI.getOpcode() != TargetOpcode::G_ZEXT &&
5127 MI.getOpcode() != TargetOpcode::G_ANYEXT)
5129 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SEXT;
5130 const Register DefReg =
MI.getOperand(0).getReg();
5131 const LLT DstTy =
MRI.getType(DefReg);
5134 if (DstSize != 32 && DstSize != 64)
5138 MI.getOperand(1).getReg(),
MRI);
5144 const LLT &VecTy =
MRI.getType(Src0);
5147 const MachineInstr *ScalarToVector = emitScalarToVector(
5148 VecTy.
getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
5149 assert(ScalarToVector &&
"Didn't expect emitScalarToVector to fail!");
5155 Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
5157 Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
5159 Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
5161 Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
5163 Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
5172 if (DstSize == 64 && !IsSigned) {
5173 Register NewReg =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
5174 MIB.
buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
5175 ExtI = MIB.
buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
5178 .
addImm(AArch64::sub_32);
5179 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
MRI);
5181 ExtI = MIB.
buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
5184 MI.eraseFromParent();
5188MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8(
5191 if (DstSize == 128) {
5192 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5194 Op = AArch64::MOVIv16b_ns;
5196 Op = AArch64::MOVIv8b_ns;
5203 auto Mov = Builder.
buildInstr(
Op, {Dst}, {}).addImm(Val);
5210MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm16(
5215 if (DstSize == 128) {
5216 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5218 Op = Inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16;
5220 Op = Inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16;
5240MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm32(
5245 if (DstSize == 128) {
5246 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5248 Op = Inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32;
5250 Op = Inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32;
5276MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64(
5280 if (DstSize == 128) {
5281 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5283 Op = AArch64::MOVIv2d_ns;
5285 Op = AArch64::MOVID;
5291 auto Mov = Builder.
buildInstr(
Op, {Dst}, {}).addImm(Val);
5298MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm321s(
5303 if (DstSize == 128) {
5304 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5306 Op = Inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl;
5308 Op = Inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl;
5328MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(
5332 bool IsWide =
false;
5333 if (DstSize == 128) {
5334 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5336 Op = AArch64::FMOVv4f32_ns;
5339 Op = AArch64::FMOVv2f32_ns;
5348 Op = AArch64::FMOVv2f64_ns;
5352 auto Mov = Builder.
buildInstr(
Op, {Dst}, {}).addImm(Val);
5357bool AArch64InstructionSelector::selectIndexedExtLoad(
5359 auto &ExtLd = cast<GIndexedAnyExtLoad>(
MI);
5361 Register WriteBack = ExtLd.getWritebackReg();
5364 LLT Ty =
MRI.getType(Dst);
5366 unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits();
5367 bool IsPre = ExtLd.isPre();
5368 bool IsSExt = isa<GIndexedSExtLoad>(ExtLd);
5369 bool InsertIntoXReg =
false;
5377 if (MemSizeBits == 8) {
5380 Opc = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
5382 Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
5383 NewLdDstTy = IsDst64 ? s64 : s32;
5385 Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
5386 InsertIntoXReg = IsDst64;
5389 }
else if (MemSizeBits == 16) {
5392 Opc = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
5394 Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
5395 NewLdDstTy = IsDst64 ? s64 : s32;
5397 Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
5398 InsertIntoXReg = IsDst64;
5401 }
else if (MemSizeBits == 32) {
5403 Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
5406 Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
5407 InsertIntoXReg = IsDst64;
5414 if (RBI.getRegBank(Dst,
MRI,
TRI)->getID() == AArch64::FPRRegBankID)
5422 .addImm(Cst->getSExtValue());
5427 if (InsertIntoXReg) {
5429 auto SubToReg = MIB.
buildInstr(TargetOpcode::SUBREG_TO_REG, {Dst}, {})
5432 .
addImm(AArch64::sub_32);
5433 RBI.constrainGenericRegister(SubToReg.getReg(0), AArch64::GPR64RegClass,
5439 MI.eraseFromParent();
5444bool AArch64InstructionSelector::selectIndexedLoad(
MachineInstr &
MI,
5446 auto &Ld = cast<GIndexedLoad>(
MI);
5448 Register WriteBack = Ld.getWritebackReg();
5451 assert(
MRI.getType(Dst).getSizeInBits() <= 128 &&
5452 "Unexpected type for indexed load");
5453 unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes();
5455 if (MemSize <
MRI.getType(Dst).getSizeInBytes())
5456 return selectIndexedExtLoad(
MI,
MRI);
5460 static constexpr unsigned GPROpcodes[] = {
5461 AArch64::LDRBBpre, AArch64::LDRHHpre, AArch64::LDRWpre,
5463 static constexpr unsigned FPROpcodes[] = {
5464 AArch64::LDRBpre, AArch64::LDRHpre, AArch64::LDRSpre, AArch64::LDRDpre,
5466 if (RBI.getRegBank(Dst,
MRI,
TRI)->getID() == AArch64::FPRRegBankID)
5467 Opc = FPROpcodes[
Log2_32(MemSize)];
5469 Opc = GPROpcodes[
Log2_32(MemSize)];
5471 static constexpr unsigned GPROpcodes[] = {
5472 AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,
5474 static constexpr unsigned FPROpcodes[] = {
5475 AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,
5476 AArch64::LDRDpost, AArch64::LDRQpost};
5477 if (RBI.getRegBank(Dst,
MRI,
TRI)->getID() == AArch64::FPRRegBankID)
5478 Opc = FPROpcodes[
Log2_32(MemSize)];
5480 Opc = GPROpcodes[
Log2_32(MemSize)];
5486 MIB.
buildInstr(Opc, {WriteBack, Dst}, {
Base}).addImm(Cst->getSExtValue());
5489 MI.eraseFromParent();
5493bool AArch64InstructionSelector::selectIndexedStore(
GIndexedStore &
I,
5499 LLT ValTy =
MRI.getType(Val);
5504 static constexpr unsigned GPROpcodes[] = {
5505 AArch64::STRBBpre, AArch64::STRHHpre, AArch64::STRWpre,
5507 static constexpr unsigned FPROpcodes[] = {
5508 AArch64::STRBpre, AArch64::STRHpre, AArch64::STRSpre, AArch64::STRDpre,
5511 if (RBI.getRegBank(Val,
MRI,
TRI)->getID() == AArch64::FPRRegBankID)
5516 static constexpr unsigned GPROpcodes[] = {
5517 AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,
5519 static constexpr unsigned FPROpcodes[] = {
5520 AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost,
5521 AArch64::STRDpost, AArch64::STRQpost};
5523 if (RBI.getRegBank(Val,
MRI,
TRI)->getID() == AArch64::FPRRegBankID)
5533 MIB.
buildInstr(Opc, {Dst}, {Val,
Base}).addImm(Cst->getSExtValue());
5534 Str.cloneMemRefs(
I);
5536 I.eraseFromParent();
5544 LLT DstTy =
MRI.getType(Dst);
5547 if (DstSize == 128) {
5549 MIRBuilder.
buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
5554 if (DstSize == 64) {
5557 .
buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
5560 .addReg(Mov.getReg(0), 0, AArch64::dsub);
5561 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass,
MRI);
5593 if (
auto *NewOp = TryMOVIWithBits(DefBits))
5597 auto TryWithFNeg = [&](
APInt DefBits,
int NumBits,
5601 APInt NegBits(DstSize, 0);
5602 unsigned NumElts = DstSize / NumBits;
5603 for (
unsigned i = 0; i < NumElts; i++)
5604 NegBits |= Neg << (NumBits * i);
5605 NegBits = DefBits ^ NegBits;
5609 if (
auto *NewOp = TryMOVIWithBits(NegBits)) {
5610 Register NewDst =
MRI.createVirtualRegister(&AArch64::FPR128RegClass);
5612 return MIRBuilder.
buildInstr(NegOpc, {Dst}, {NewDst});
5617 if ((R = TryWithFNeg(DefBits, 32, AArch64::FNEGv4f32)) ||
5618 (R = TryWithFNeg(DefBits, 64, AArch64::FNEGv2f64)) ||
5619 (STI.hasFullFP16() &&
5620 (R = TryWithFNeg(DefBits, 16, AArch64::FNEGv8f16))))
5626 LLVM_DEBUG(
dbgs() <<
"Could not generate cp load for constant vector!");
5630 auto Copy = MIRBuilder.
buildCopy(Dst, CPLoad->getOperand(0));
5631 RBI.constrainGenericRegister(
5632 Dst, *
MRI.getRegClass(CPLoad->getOperand(0).getReg()),
MRI);
5636bool AArch64InstructionSelector::tryOptConstantBuildVec(
5638 assert(
I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5640 assert(DstSize <= 128 &&
"Unexpected build_vec type!");
5646 for (
unsigned Idx = 1;
Idx <
I.getNumOperands(); ++
Idx) {
5652 const_cast<ConstantInt *
>(OpMI->getOperand(1).getCImm()));
5653 else if ((OpMI =
getOpcodeDef(TargetOpcode::G_FCONSTANT,
5654 I.getOperand(
Idx).getReg(),
MRI)))
5656 const_cast<ConstantFP *
>(OpMI->getOperand(1).getFPImm()));
5661 if (!emitConstantVector(
I.getOperand(0).getReg(), CV, MIB,
MRI))
5663 I.eraseFromParent();
5667bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5673 Register Dst =
I.getOperand(0).getReg();
5674 Register EltReg =
I.getOperand(1).getReg();
5675 LLT EltTy =
MRI.getType(EltReg);
5683 return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(), MRI);
5691 getRegClassForTypeOnBank(
MRI.getType(Dst), DstRB);
5696 auto SubregToReg = MIB.
buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
5700 I.eraseFromParent();
5702 return RBI.constrainGenericRegister(Dst, *DstRC,
MRI);
5705bool AArch64InstructionSelector::selectBuildVector(
MachineInstr &
I,
5707 assert(
I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5710 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
5711 const LLT EltTy =
MRI.getType(
I.getOperand(1).getReg());
5714 if (tryOptConstantBuildVec(
I, DstTy,
MRI))
5716 if (tryOptBuildVecToSubregToReg(
I,
MRI))
5719 if (EltSize != 8 && EltSize != 16 && EltSize != 32 && EltSize != 64)
5726 I.getOperand(1).getReg(), MIB);
5736 for (
unsigned i = 2, e = DstSize / EltSize + 1; i <
e; ++i) {
5739 Register OpReg =
I.getOperand(i).getReg();
5741 if (!getOpcodeDef<GImplicitDef>(OpReg,
MRI)) {
5742 PrevMI = &*emitLaneInsert(std::nullopt, DstVec, OpReg, i - 1, RB, MIB);
5749 if (DstSize < 128) {
5752 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec,
MRI,
TRI));
5755 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5763 if (
SubReg != AArch64::ssub &&
SubReg != AArch64::dsub) {
5764 LLVM_DEBUG(
dbgs() <<
"Unsupported destination size! (" << DstSize
5770 Register DstReg =
I.getOperand(0).getReg();
5772 MIB.
buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0,
SubReg);
5775 RBI.constrainGenericRegister(DstReg, *RC,
MRI);
5793 if (PrevMI == ScalarToVec && DstReg.
isVirtual()) {
5795 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec,
MRI,
TRI));
5796 RBI.constrainGenericRegister(DstReg, *RC,
MRI);
5800 I.eraseFromParent();
5804bool AArch64InstructionSelector::selectVectorLoadIntrinsic(
unsigned Opc,
5807 assert(
I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5808 assert(Opc &&
"Expected an opcode?");
5809 assert(NumVecs > 1 && NumVecs < 5 &&
"Only support 2, 3, or 4 vectors");
5811 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
5814 "Destination must be 64 bits or 128 bits?");
5815 unsigned SubReg =
Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
5816 auto Ptr =
I.getOperand(
I.getNumOperands() - 1).getReg();
5817 assert(
MRI.getType(
Ptr).isPointer() &&
"Expected a pointer type?");
5819 Load.cloneMemRefs(
I);
5821 Register SelectedLoadDst =
Load->getOperand(0).getReg();
5822 for (
unsigned Idx = 0;
Idx < NumVecs; ++
Idx) {
5823 auto Vec = MIB.
buildInstr(TargetOpcode::COPY, {
I.getOperand(
Idx)}, {})
5824 .addReg(SelectedLoadDst, 0,
SubReg +
Idx);
5833bool AArch64InstructionSelector::selectVectorLoadLaneIntrinsic(
5835 assert(
I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5836 assert(Opc &&
"Expected an opcode?");
5837 assert(NumVecs > 1 && NumVecs < 5 &&
"Only support 2, 3, or 4 vectors");
5839 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
5842 auto FirstSrcRegIt =
I.operands_begin() + NumVecs + 1;
5844 std::transform(FirstSrcRegIt, FirstSrcRegIt + NumVecs, Regs.
begin(),
5845 [](
auto MO) { return MO.getReg(); });
5849 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
5864 .
addImm(LaneNo->getZExtValue())
5866 Load.cloneMemRefs(
I);
5868 Register SelectedLoadDst =
Load->getOperand(0).getReg();
5869 unsigned SubReg = AArch64::qsub0;
5870 for (
unsigned Idx = 0;
Idx < NumVecs; ++
Idx) {
5871 auto Vec = MIB.
buildInstr(TargetOpcode::COPY,
5872 {Narrow ?
DstOp(&AArch64::FPR128RegClass)
5875 .addReg(SelectedLoadDst, 0,
SubReg +
Idx);
5880 !emitNarrowVector(
I.getOperand(
Idx).getReg(), WideReg, MIB,
MRI))
5886void AArch64InstructionSelector::selectVectorStoreIntrinsic(
MachineInstr &
I,
5890 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
5894 std::transform(
I.operands_begin() + 1,
I.operands_begin() + 1 + NumVecs,
5895 Regs.
begin(), [](
auto MO) { return MO.getReg(); });
5904bool AArch64InstructionSelector::selectVectorStoreLaneIntrinsic(
5907 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
5911 std::transform(
I.operands_begin() + 1,
I.operands_begin() + 1 + NumVecs,
5912 Regs.
begin(), [](
auto MO) { return MO.getReg(); });
5916 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
5929 .
addImm(LaneNo->getZExtValue())
5936bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
5939 unsigned IntrinID = cast<GIntrinsic>(
I).getIntrinsicID();
5950 case Intrinsic::aarch64_ldxp:
5951 case Intrinsic::aarch64_ldaxp: {
5953 IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
5954 {
I.getOperand(0).
getReg(),
I.getOperand(1).getReg()},
5960 case Intrinsic::aarch64_neon_ld1x2: {
5961 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
5964 Opc = AArch64::LD1Twov8b;
5966 Opc = AArch64::LD1Twov16b;
5968 Opc = AArch64::LD1Twov4h;
5970 Opc = AArch64::LD1Twov8h;
5972 Opc = AArch64::LD1Twov2s;
5974 Opc = AArch64::LD1Twov4s;
5976 Opc = AArch64::LD1Twov2d;
5977 else if (Ty ==
S64 || Ty == P0)
5978 Opc = AArch64::LD1Twov1d;
5981 selectVectorLoadIntrinsic(Opc, 2,
I);
5984 case Intrinsic::aarch64_neon_ld1x3: {
5985 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
5988 Opc = AArch64::LD1Threev8b;
5990 Opc = AArch64::LD1Threev16b;
5992 Opc = AArch64::LD1Threev4h;
5994 Opc = AArch64::LD1Threev8h;
5996 Opc = AArch64::LD1Threev2s;
5998 Opc = AArch64::LD1Threev4s;
6000 Opc = AArch64::LD1Threev2d;
6001 else if (Ty ==
S64 || Ty == P0)
6002 Opc = AArch64::LD1Threev1d;
6005 selectVectorLoadIntrinsic(Opc, 3,
I);
6008 case Intrinsic::aarch64_neon_ld1x4: {
6009 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6012 Opc = AArch64::LD1Fourv8b;
6014 Opc = AArch64::LD1Fourv16b;
6016 Opc = AArch64::LD1Fourv4h;
6018 Opc = AArch64::LD1Fourv8h;
6020 Opc = AArch64::LD1Fourv2s;
6022 Opc = AArch64::LD1Fourv4s;
6024 Opc = AArch64::LD1Fourv2d;
6025 else if (Ty ==
S64 || Ty == P0)
6026 Opc = AArch64::LD1Fourv1d;
6029 selectVectorLoadIntrinsic(Opc, 4,
I);
6032 case Intrinsic::aarch64_neon_ld2: {
6033 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6036 Opc = AArch64::LD2Twov8b;
6038 Opc = AArch64::LD2Twov16b;
6040 Opc = AArch64::LD2Twov4h;
6042 Opc = AArch64::LD2Twov8h;
6044 Opc = AArch64::LD2Twov2s;
6046 Opc = AArch64::LD2Twov4s;
6048 Opc = AArch64::LD2Twov2d;
6049 else if (Ty ==
S64 || Ty == P0)
6050 Opc = AArch64::LD1Twov1d;
6053 selectVectorLoadIntrinsic(Opc, 2,
I);
6056 case Intrinsic::aarch64_neon_ld2lane: {
6057 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6060 Opc = AArch64::LD2i8;
6062 Opc = AArch64::LD2i16;
6064 Opc = AArch64::LD2i32;
6067 Opc = AArch64::LD2i64;
6070 if (!selectVectorLoadLaneIntrinsic(Opc, 2,
I))
6074 case Intrinsic::aarch64_neon_ld2r: {
6075 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6078 Opc = AArch64::LD2Rv8b;
6080 Opc = AArch64::LD2Rv16b;
6082 Opc = AArch64::LD2Rv4h;
6084 Opc = AArch64::LD2Rv8h;
6086 Opc = AArch64::LD2Rv2s;
6088 Opc = AArch64::LD2Rv4s;
6090 Opc = AArch64::LD2Rv2d;
6091 else if (Ty ==
S64 || Ty == P0)
6092 Opc = AArch64::LD2Rv1d;
6095 selectVectorLoadIntrinsic(Opc, 2,
I);
6098 case Intrinsic::aarch64_neon_ld3: {
6099 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6102 Opc = AArch64::LD3Threev8b;
6104 Opc = AArch64::LD3Threev16b;
6106 Opc = AArch64::LD3Threev4h;
6108 Opc = AArch64::LD3Threev8h;
6110 Opc = AArch64::LD3Threev2s;
6112 Opc = AArch64::LD3Threev4s;
6114 Opc = AArch64::LD3Threev2d;
6115 else if (Ty ==
S64 || Ty == P0)
6116 Opc = AArch64::LD1Threev1d;
6119 selectVectorLoadIntrinsic(Opc, 3,
I);
6122 case Intrinsic::aarch64_neon_ld3lane: {
6123 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6126 Opc = AArch64::LD3i8;
6128 Opc = AArch64::LD3i16;
6130 Opc = AArch64::LD3i32;
6133 Opc = AArch64::LD3i64;
6136 if (!selectVectorLoadLaneIntrinsic(Opc, 3,
I))
6140 case Intrinsic::aarch64_neon_ld3r: {
6141 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6144 Opc = AArch64::LD3Rv8b;
6146 Opc = AArch64::LD3Rv16b;
6148 Opc = AArch64::LD3Rv4h;
6150 Opc = AArch64::LD3Rv8h;
6152 Opc = AArch64::LD3Rv2s;
6154 Opc = AArch64::LD3Rv4s;
6156 Opc = AArch64::LD3Rv2d;
6157 else if (Ty ==
S64 || Ty == P0)
6158 Opc = AArch64::LD3Rv1d;
6161 selectVectorLoadIntrinsic(Opc, 3,
I);
6164 case Intrinsic::aarch64_neon_ld4: {
6165 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6168 Opc = AArch64::LD4Fourv8b;
6170 Opc = AArch64::LD4Fourv16b;
6172 Opc = AArch64::LD4Fourv4h;
6174 Opc = AArch64::LD4Fourv8h;
6176 Opc = AArch64::LD4Fourv2s;
6178 Opc = AArch64::LD4Fourv4s;
6180 Opc = AArch64::LD4Fourv2d;
6181 else if (Ty ==
S64 || Ty == P0)
6182 Opc = AArch64::LD1Fourv1d;
6185 selectVectorLoadIntrinsic(Opc, 4,
I);
6188 case Intrinsic::aarch64_neon_ld4lane: {
6189 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6192 Opc = AArch64::LD4i8;
6194 Opc = AArch64::LD4i16;
6196 Opc = AArch64::LD4i32;
6199 Opc = AArch64::LD4i64;
6202 if (!selectVectorLoadLaneIntrinsic(Opc, 4,
I))
6206 case Intrinsic::aarch64_neon_ld4r: {
6207 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6210 Opc = AArch64::LD4Rv8b;
6212 Opc = AArch64::LD4Rv16b;
6214 Opc = AArch64::LD4Rv4h;
6216 Opc = AArch64::LD4Rv8h;
6218 Opc = AArch64::LD4Rv2s;
6220 Opc = AArch64::LD4Rv4s;
6222 Opc = AArch64::LD4Rv2d;
6223 else if (Ty ==
S64 || Ty == P0)
6224 Opc = AArch64::LD4Rv1d;
6227 selectVectorLoadIntrinsic(Opc, 4,
I);
6230 case Intrinsic::aarch64_neon_st1x2: {
6231 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6234 Opc = AArch64::ST1Twov8b;
6236 Opc = AArch64::ST1Twov16b;
6238 Opc = AArch64::ST1Twov4h;
6240 Opc = AArch64::ST1Twov8h;
6242 Opc = AArch64::ST1Twov2s;
6244 Opc = AArch64::ST1Twov4s;
6246 Opc = AArch64::ST1Twov2d;
6247 else if (Ty ==
S64 || Ty == P0)
6248 Opc = AArch64::ST1Twov1d;
6251 selectVectorStoreIntrinsic(
I, 2, Opc);
6254 case Intrinsic::aarch64_neon_st1x3: {
6255 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6258 Opc = AArch64::ST1Threev8b;
6260 Opc = AArch64::ST1Threev16b;
6262 Opc = AArch64::ST1Threev4h;
6264 Opc = AArch64::ST1Threev8h;
6266 Opc = AArch64::ST1Threev2s;
6268 Opc = AArch64::ST1Threev4s;
6270 Opc = AArch64::ST1Threev2d;
6271 else if (Ty ==
S64 || Ty == P0)
6272 Opc = AArch64::ST1Threev1d;
6275 selectVectorStoreIntrinsic(
I, 3, Opc);
6278 case Intrinsic::aarch64_neon_st1x4: {
6279 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6282 Opc = AArch64::ST1Fourv8b;
6284 Opc = AArch64::ST1Fourv16b;
6286 Opc = AArch64::ST1Fourv4h;
6288 Opc = AArch64::ST1Fourv8h;
6290 Opc = AArch64::ST1Fourv2s;
6292 Opc = AArch64::ST1Fourv4s;
6294 Opc = AArch64::ST1Fourv2d;
6295 else if (Ty ==
S64 || Ty == P0)
6296 Opc = AArch64::ST1Fourv1d;
6299 selectVectorStoreIntrinsic(
I, 4, Opc);
6302 case Intrinsic::aarch64_neon_st2: {
6303 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6306 Opc = AArch64::ST2Twov8b;
6308 Opc = AArch64::ST2Twov16b;
6310 Opc = AArch64::ST2Twov4h;
6312 Opc = AArch64::ST2Twov8h;
6314 Opc = AArch64::ST2Twov2s;
6316 Opc = AArch64::ST2Twov4s;
6318 Opc = AArch64::ST2Twov2d;
6319 else if (Ty ==
S64 || Ty == P0)
6320 Opc = AArch64::ST1Twov1d;
6323 selectVectorStoreIntrinsic(
I, 2, Opc);
6326 case Intrinsic::aarch64_neon_st3: {
6327 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6330 Opc = AArch64::ST3Threev8b;
6332 Opc = AArch64::ST3Threev16b;
6334 Opc = AArch64::ST3Threev4h;
6336 Opc = AArch64::ST3Threev8h;
6338 Opc = AArch64::ST3Threev2s;
6340 Opc = AArch64::ST3Threev4s;
6342 Opc = AArch64::ST3Threev2d;
6343 else if (Ty ==
S64 || Ty == P0)
6344 Opc = AArch64::ST1Threev1d;
6347 selectVectorStoreIntrinsic(
I, 3, Opc);
6350 case Intrinsic::aarch64_neon_st4: {
6351 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6354 Opc = AArch64::ST4Fourv8b;
6356 Opc = AArch64::ST4Fourv16b;
6358 Opc = AArch64::ST4Fourv4h;
6360 Opc = AArch64::ST4Fourv8h;
6362 Opc = AArch64::ST4Fourv2s;
6364 Opc = AArch64::ST4Fourv4s;
6366 Opc = AArch64::ST4Fourv2d;
6367 else if (Ty ==
S64 || Ty == P0)
6368 Opc = AArch64::ST1Fourv1d;
6371 selectVectorStoreIntrinsic(
I, 4, Opc);
6374 case Intrinsic::aarch64_neon_st2lane: {
6375 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6378 Opc = AArch64::ST2i8;
6380 Opc = AArch64::ST2i16;
6382 Opc = AArch64::ST2i32;
6385 Opc = AArch64::ST2i64;
6388 if (!selectVectorStoreLaneIntrinsic(
I, 2, Opc))
6392 case Intrinsic::aarch64_neon_st3lane: {
6393 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6396 Opc = AArch64::ST3i8;
6398 Opc = AArch64::ST3i16;
6400 Opc = AArch64::ST3i32;
6403 Opc = AArch64::ST3i64;
6406 if (!selectVectorStoreLaneIntrinsic(
I, 3, Opc))
6410 case Intrinsic::aarch64_neon_st4lane: {
6411 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6414 Opc = AArch64::ST4i8;
6416 Opc = AArch64::ST4i16;
6418 Opc = AArch64::ST4i32;
6421 Opc = AArch64::ST4i64;
6424 if (!selectVectorStoreLaneIntrinsic(
I, 4, Opc))
6428 case Intrinsic::aarch64_mops_memset_tag: {
6441 Register DstDef =
I.getOperand(0).getReg();
6443 Register DstUse =
I.getOperand(2).getReg();
6444 Register ValUse =
I.getOperand(3).getReg();
6445 Register SizeUse =
I.getOperand(4).getReg();
6452 auto Memset = MIB.
buildInstr(AArch64::MOPSMemorySetTaggingPseudo,
6453 {DstDef, SizeDef}, {DstUse, SizeUse, ValUse});
6460 I.eraseFromParent();
6464bool AArch64InstructionSelector::selectIntrinsic(
MachineInstr &
I,
6466 unsigned IntrinID = cast<GIntrinsic>(
I).getIntrinsicID();
6471 case Intrinsic::aarch64_crypto_sha1h: {
6472 Register DstReg =
I.getOperand(0).getReg();
6473 Register SrcReg =
I.getOperand(2).getReg();
6476 if (
MRI.getType(DstReg).getSizeInBits() != 32 ||
6477 MRI.getType(SrcReg).getSizeInBits() != 32)
6482 if (RBI.getRegBank(SrcReg,
MRI,
TRI)->getID() != AArch64::FPRRegBankID) {
6483 SrcReg =
MRI.createVirtualRegister(&AArch64::FPR32RegClass);
6487 RBI.constrainGenericRegister(
I.getOperand(2).getReg(),
6488 AArch64::GPR32RegClass,
MRI);
6491 if (RBI.getRegBank(DstReg,
MRI,
TRI)->getID() != AArch64::FPRRegBankID)
6492 DstReg =
MRI.createVirtualRegister(&AArch64::FPR32RegClass);
6495 auto SHA1Inst = MIB.
buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
6499 if (DstReg !=
I.getOperand(0).getReg()) {
6503 RBI.constrainGenericRegister(
I.getOperand(0).getReg(),
6504 AArch64::GPR32RegClass,
MRI);
6507 I.eraseFromParent();
6510 case Intrinsic::frameaddress:
6511 case Intrinsic::returnaddress: {
6515 unsigned Depth =
I.getOperand(2).getImm();
6516 Register DstReg =
I.getOperand(0).getReg();
6517 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass,
MRI);
6519 if (
Depth == 0 && IntrinID == Intrinsic::returnaddress) {
6520 if (!MFReturnAddr) {
6525 MF,
TII, AArch64::LR, AArch64::GPR64RegClass,
I.getDebugLoc());
6528 if (STI.hasPAuth()) {
6529 MIB.
buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
6536 I.eraseFromParent();
6543 Register NextFrame =
MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
6545 MIB.
buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0);
6547 FrameAddr = NextFrame;
6550 if (IntrinID == Intrinsic::frameaddress)
6555 if (STI.hasPAuth()) {
6556 Register TmpReg =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
6557 MIB.
buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
6558 MIB.
buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
6567 I.eraseFromParent();
6570 case Intrinsic::aarch64_neon_tbl2:
6571 SelectTable(
I,
MRI, 2, AArch64::TBLv8i8Two, AArch64::TBLv16i8Two,
false);
6573 case Intrinsic::aarch64_neon_tbl3:
6574 SelectTable(
I,
MRI, 3, AArch64::TBLv8i8Three, AArch64::TBLv16i8Three,
6577 case Intrinsic::aarch64_neon_tbl4:
6578 SelectTable(
I,
MRI, 4, AArch64::TBLv8i8Four, AArch64::TBLv16i8Four,
false);
6580 case Intrinsic::aarch64_neon_tbx2:
6581 SelectTable(
I,
MRI, 2, AArch64::TBXv8i8Two, AArch64::TBXv16i8Two,
true);
6583 case Intrinsic::aarch64_neon_tbx3:
6584 SelectTable(
I,
MRI, 3, AArch64::TBXv8i8Three, AArch64::TBXv16i8Three,
true);
6586 case Intrinsic::aarch64_neon_tbx4:
6587 SelectTable(
I,
MRI, 4, AArch64::TBXv8i8Four, AArch64::TBXv16i8Four,
true);
6589 case Intrinsic::swift_async_context_addr:
6598 I.eraseFromParent();
6633bool AArch64InstructionSelector::selectPtrAuthGlobalValue(
6635 Register DefReg =
I.getOperand(0).getReg();
6638 Register AddrDisc =
I.getOperand(3).getReg();
6639 uint64_t Disc =
I.getOperand(4).getImm();
6647 if (!isUInt<16>(Disc))
6649 "constant discriminator in ptrauth global out of range [0, 0xffff]");
6652 if (!STI.isTargetELF() && !STI.isTargetMachO())
6662 if (!
MRI.hasOneDef(OffsetReg))
6665 if (OffsetMI.
getOpcode() != TargetOpcode::G_CONSTANT)
6691 unsigned OpFlags = STI.ClassifyGlobalReference(GV,
TM);
6694 "unsupported non-GOT op flags on ptrauth global reference");
6696 "unsupported non-GOT reference to weak ptrauth global");
6699 bool HasAddrDisc = !AddrDiscVal || *AddrDiscVal != 0;
6706 MIB.
buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X16}, {});
6707 MIB.
buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6708 MIB.
buildInstr(NeedsGOTLoad ? AArch64::LOADgotPAC : AArch64::MOVaddrPAC)
6711 .
addReg(HasAddrDisc ? AddrDisc : AArch64::XZR)
6715 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
MRI);
6716 I.eraseFromParent();
6728 "unsupported non-zero offset in weak ptrauth global reference");
6733 MIB.
buildInstr(AArch64::LOADauthptrstatic, {DefReg}, {})
6734 .addGlobalAddress(GV,
Offset)
6737 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
MRI);
6739 I.eraseFromParent();
6743void AArch64InstructionSelector::SelectTable(
MachineInstr &
I,
6745 unsigned NumVec,
unsigned Opc1,
6746 unsigned Opc2,
bool isExt) {
6747 Register DstReg =
I.getOperand(0).getReg();
6752 for (
unsigned i = 0; i < NumVec; i++)
6753 Regs.
push_back(
I.getOperand(i + 2 + isExt).getReg());
6756 Register IdxReg =
I.getOperand(2 + NumVec + isExt).getReg();
6764 I.eraseFromParent();
6768AArch64InstructionSelector::selectShiftA_32(
const MachineOperand &Root)
const {
6770 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6771 return std::nullopt;
6772 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
6777AArch64InstructionSelector::selectShiftB_32(
const MachineOperand &Root)
const {
6779 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6780 return std::nullopt;
6786AArch64InstructionSelector::selectShiftA_64(
const MachineOperand &Root)
const {
6788 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
6789 return std::nullopt;
6790 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
6795AArch64InstructionSelector::selectShiftB_64(
const MachineOperand &Root)
const {
6797 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
6798 return std::nullopt;
6809AArch64InstructionSelector::select12BitValueWithLeftShift(
6812 if (Immed >> 12 == 0) {
6814 }
else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
6816 Immed = Immed >> 12;
6818 return std::nullopt;
6831AArch64InstructionSelector::selectArithImmed(
MachineOperand &Root)
const {
6838 if (MaybeImmed == std::nullopt)
6839 return std::nullopt;
6840 return select12BitValueWithLeftShift(*MaybeImmed);
6846AArch64InstructionSelector::selectNegArithImmed(
MachineOperand &Root)
const {
6850 return std::nullopt;
6852 if (MaybeImmed == std::nullopt)
6853 return std::nullopt;
6860 return std::nullopt;
6865 if (
MRI.getType(Root.
getReg()).getSizeInBits() == 32)
6868 Immed = ~Immed + 1ULL;
6870 if (Immed & 0xFFFFFFFFFF000000ULL)
6871 return std::nullopt;
6873 Immed &= 0xFFFFFFULL;
6874 return select12BitValueWithLeftShift(Immed);
6891std::optional<bool> AArch64InstructionSelector::isWorthFoldingIntoAddrMode(
6893 if (
MI.getOpcode() == AArch64::G_SHL) {
6897 MI.getOperand(2).getReg(),
MRI)) {
6898 const APInt ShiftVal = ValAndVeg->Value;
6901 return !(STI.hasAddrLSLSlow14() && (ShiftVal == 1 || ShiftVal == 4));
6904 return std::nullopt;
6912bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
6914 bool IsAddrOperand)
const {
6918 if (
MRI.hasOneNonDBGUse(DefReg) ||
6919 MI.getParent()->getParent()->getFunction().hasOptSize())
6922 if (IsAddrOperand) {
6924 if (
const auto Worth = isWorthFoldingIntoAddrMode(
MI,
MRI))
6928 if (
MI.getOpcode() == AArch64::G_PTR_ADD) {
6935 if (
const auto Worth = isWorthFoldingIntoAddrMode(*OffsetInst,
MRI))
6945 return all_of(
MRI.use_nodbg_instructions(DefReg),
6961AArch64InstructionSelector::selectExtendedSHL(
6963 unsigned SizeInBytes,
bool WantsExt)
const {
6964 assert(
Base.isReg() &&
"Expected base to be a register operand");
6965 assert(
Offset.isReg() &&
"Expected offset to be a register operand");
6970 unsigned OffsetOpc = OffsetInst->
getOpcode();
6971 bool LookedThroughZExt =
false;
6972 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
6974 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
6975 return std::nullopt;
6979 LookedThroughZExt =
true;
6981 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
6982 return std::nullopt;
6985 int64_t LegalShiftVal =
Log2_32(SizeInBytes);
6986 if (LegalShiftVal == 0)
6987 return std::nullopt;
6988 if (!isWorthFoldingIntoExtendedReg(*OffsetInst,
MRI,
true))
6989 return std::nullopt;
7000 if (OffsetOpc == TargetOpcode::G_SHL)
7001 return std::nullopt;
7007 return std::nullopt;
7012 int64_t ImmVal = ValAndVReg->Value.getSExtValue();
7016 if (OffsetOpc == TargetOpcode::G_MUL) {
7017 if (!llvm::has_single_bit<uint32_t>(ImmVal))
7018 return std::nullopt;
7024 if ((ImmVal & 0x7) != ImmVal)
7025 return std::nullopt;
7029 if (ImmVal != LegalShiftVal)
7030 return std::nullopt;
7032 unsigned SignExtend = 0;
7036 if (!LookedThroughZExt) {
7038 auto Ext = getExtendTypeForInst(*ExtInst,
MRI,
true);
7040 return std::nullopt;
7045 return std::nullopt;
7051 OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
7061 MIB.addImm(SignExtend);
7075AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
7078 return std::nullopt;
7095 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd,
MRI,
true))
7096 return std::nullopt;
7102 return selectExtendedSHL(Root, PtrAdd->
getOperand(1),
7116AArch64InstructionSelector::selectAddrModeRegisterOffset(
7122 if (Gep->
getOpcode() != TargetOpcode::G_PTR_ADD)
7123 return std::nullopt;
7129 return std::nullopt;
7149AArch64InstructionSelector::selectAddrModeXRO(
MachineOperand &Root,
7150 unsigned SizeInBytes)
const {
7153 return std::nullopt;
7157 return std::nullopt;
7175 unsigned Scale =
Log2_32(SizeInBytes);
7176 int64_t ImmOff = ValAndVReg->Value.getSExtValue();
7180 if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
7181 ImmOff < (0x1000 << Scale))
7182 return std::nullopt;
7187 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
7191 if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
7197 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
7198 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
7203 return std::nullopt;
7207 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
7213 return selectAddrModeRegisterOffset(Root);
7223AArch64InstructionSelector::selectAddrModeWRO(
MachineOperand &Root,
7224 unsigned SizeInBytes)
const {
7229 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd,
MRI,
true))
7230 return std::nullopt;
7251 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->
getOperand(0),
7260 if (!isWorthFoldingIntoExtendedReg(*OffsetInst,
MRI,
true))
7261 return std::nullopt;
7265 getExtendTypeForInst(*OffsetInst,
MRI,
true);
7267 return std::nullopt;
7272 AArch64::GPR32RegClass, MIB);
7279 MIB.addImm(SignExtend);
7290AArch64InstructionSelector::selectAddrModeUnscaled(
MachineOperand &Root,
7291 unsigned Size)
const {
7296 return std::nullopt;
7298 if (!isBaseWithConstantOffset(Root,
MRI))
7299 return std::nullopt;
7304 if (!OffImm.
isReg())
7305 return std::nullopt;
7307 if (
RHS->getOpcode() != TargetOpcode::G_CONSTANT)
7308 return std::nullopt;
7312 return std::nullopt;
7315 if (RHSC >= -256 && RHSC < 256) {
7322 return std::nullopt;
7326AArch64InstructionSelector::tryFoldAddLowIntoImm(
MachineInstr &RootDef,
7329 if (RootDef.
getOpcode() != AArch64::G_ADD_LOW)
7330 return std::nullopt;
7333 return std::nullopt;
7338 return std::nullopt;
7342 return std::nullopt;
7346 return std::nullopt;
7348 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.
getTarget());
7353 MIB.addGlobalAddress(GV,
Offset,
7363AArch64InstructionSelector::selectAddrModeIndexed(
MachineOperand &Root,
7364 unsigned Size)
const {
7369 return std::nullopt;
7372 if (RootDef->
getOpcode() == TargetOpcode::G_FRAME_INDEX) {
7382 auto OpFns = tryFoldAddLowIntoImm(*RootDef,
Size,
MRI);
7387 if (isBaseWithConstantOffset(Root,
MRI)) {
7395 if ((RHSC & (
Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
7396 if (LHSDef->
getOpcode() == TargetOpcode::G_FRAME_INDEX)
7411 if (selectAddrModeUnscaled(Root,
Size))
7412 return std::nullopt;
7423 switch (
MI.getOpcode()) {
7426 case TargetOpcode::G_SHL:
7428 case TargetOpcode::G_LSHR:
7430 case TargetOpcode::G_ASHR:
7432 case TargetOpcode::G_ROTR:
7440AArch64InstructionSelector::selectShiftedRegister(
MachineOperand &Root,
7441 bool AllowROR)
const {
7443 return std::nullopt;
7452 return std::nullopt;
7454 return std::nullopt;
7455 if (!isWorthFoldingIntoExtendedReg(*ShiftInst,
MRI,
false))
7456 return std::nullopt;
7462 return std::nullopt;
7469 unsigned NumBits =
MRI.getType(ShiftReg).getSizeInBits();
7470 unsigned Val = *Immed & (NumBits - 1);
7479 unsigned Opc =
MI.getOpcode();
7482 if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
7484 if (Opc == TargetOpcode::G_SEXT)
7485 Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
7487 Size =
MI.getOperand(2).getImm();
7488 assert(
Size != 64 &&
"Extend from 64 bits?");
7501 if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
7502 unsigned Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
7503 assert(
Size != 64 &&
"Extend from 64 bits?");
7518 if (Opc != TargetOpcode::G_AND)
7537Register AArch64InstructionSelector::moveScalarRegClass(
7540 auto Ty =
MRI.getType(Reg);
7549 return Copy.getReg(0);
7555AArch64InstructionSelector::selectArithExtendedRegister(
7558 return std::nullopt;
7567 return std::nullopt;
7569 if (!isWorthFoldingIntoExtendedReg(*RootDef,
MRI,
false))
7570 return std::nullopt;
7573 if (RootDef->
getOpcode() == TargetOpcode::G_SHL) {
7578 return std::nullopt;
7579 ShiftVal = *MaybeShiftVal;
7581 return std::nullopt;
7586 return std::nullopt;
7587 Ext = getExtendTypeForInst(*ExtDef,
MRI);
7589 return std::nullopt;
7593 Ext = getExtendTypeForInst(*RootDef,
MRI);
7595 return std::nullopt;
7604 if (isDef32(*ExtInst))
7605 return std::nullopt;
7612 ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);
7616 MIB.addImm(getArithExtendImm(Ext, ShiftVal));
7621AArch64InstructionSelector::selectExtractHigh(
MachineOperand &Root)
const {
7623 return std::nullopt;
7628 while (Extract && Extract->MI->
getOpcode() == TargetOpcode::G_BITCAST &&
7629 STI.isLittleEndian())
7633 return std::nullopt;
7635 if (Extract->MI->
getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
7641 if (Extract->MI->
getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) {
7646 LaneIdx->Value.getSExtValue() == 1) {
7652 return std::nullopt;
7659 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7660 "Expected G_CONSTANT");
7661 std::optional<int64_t> CstVal =
7663 assert(CstVal &&
"Expected constant value");
7667void AArch64InstructionSelector::renderLogicalImm32(
7669 assert(
I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7670 "Expected G_CONSTANT");
7671 uint64_t CstVal =
I.getOperand(1).getCImm()->getZExtValue();
7676void AArch64InstructionSelector::renderLogicalImm64(
7678 assert(
I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7679 "Expected G_CONSTANT");
7680 uint64_t CstVal =
I.getOperand(1).getCImm()->getZExtValue();
7688 assert(
MI.getOpcode() == TargetOpcode::G_UBSANTRAP && OpIdx == 0 &&
7689 "Expected G_UBSANTRAP");
7690 MIB.
addImm(
MI.getOperand(0).getImm() | (
'U' << 8));
7696 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7697 "Expected G_FCONSTANT");
7705 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7706 "Expected G_FCONSTANT");
7714 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7715 "Expected G_FCONSTANT");
7720void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
7722 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7723 "Expected G_FCONSTANT");
7731bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
7733 if (!
MI.mayLoadOrStore())
7736 "Expected load/store to have only one mem op!");
7737 return (*
MI.memoperands_begin())->getSize() == NumBytes;
7740bool AArch64InstructionSelector::isDef32(
const MachineInstr &
MI)
const {
7742 if (
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits() != 32)
7749 switch (
MI.getOpcode()) {
7752 case TargetOpcode::COPY:
7753 case TargetOpcode::G_BITCAST:
7754 case TargetOpcode::G_TRUNC:
7755 case TargetOpcode::G_PHI:
7765 assert(
MI.getOpcode() == TargetOpcode::G_PHI &&
"Expected a G_PHI");
7768 assert(DstRB &&
"Expected PHI dst to have regbank assigned");
7779 auto *OpDef =
MRI.getVRegDef(OpReg);
7780 const LLT &Ty =
MRI.getType(OpReg);
7786 if (InsertPt != OpDefBB.
end() && InsertPt->isPHI())
7790 MRI.setRegBank(Copy.getReg(0), *DstRB);
7791 MO.setReg(Copy.getReg(0));
7800 for (
auto &BB : MF) {
7801 for (
auto &
MI : BB) {
7802 if (
MI.getOpcode() == TargetOpcode::G_PHI)
7807 for (
auto *
MI : Phis) {
7829 bool HasGPROp =
false, HasFPROp =
false;
7833 const LLT &Ty =
MRI.getType(MO.getReg());
7843 if (RB->
getID() == AArch64::GPRRegBankID)
7849 if (HasGPROp && HasFPROp)
7859 return new AArch64InstructionSelector(
TM, Subtarget, RBI);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
static bool isPreferredADD(int64_t ImmOff)
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue CCOp, AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, const SDLoc &DL, SelectionDAG &DAG)
can be transformed to: not (and (not (and (setCC (cmp C)) (setCD (cmp D)))) (and (not (setCA (cmp A))...
static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, AArch64CC::CondCode Predicate)
Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain of CCMP/CFCMP ops.
static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static bool isCMN(SDValue Op, ISD::CondCode CC)
static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC)
Emit expression as a conjunction (a series of CCMP/CFCMP ops).
static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy)
#define GET_GLOBALISEL_PREDICATES_INIT
static std::pair< const TargetRegisterClass *, const TargetRegisterClass * > getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Helper function to get the source and destination register classes for a copy.
#define GET_GLOBALISEL_TEMPORARIES_INIT
static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Convert an IR fp condition code to an AArch64 CC.
static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, MachineRegisterInfo &MRI)
Return a register which can be used as a bit to test in a TB(N)Z.
static unsigned getMinSizeForRegBank(const RegisterBank &RB)
Returns the minimum size the given register bank can hold.
static std::optional< int64_t > getVectorShiftImm(Register Reg, MachineRegisterInfo &MRI)
Returns the element immediate value of a vector shift operand if found.
static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the G_LOAD or G_STORE operation GenericOpc, appropriate for the (value)...
static const TargetRegisterClass * getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits, bool GetAllRegSet=false)
Given a register bank, and size in bits, return the smallest register class that can represent that c...
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P)
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV),...
static bool getSubRegForClass(const TargetRegisterClass *RC, const TargetRegisterInfo &TRI, unsigned &SubReg)
Returns the correct subregister to use for a given register class.
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register SrcReg, const TargetRegisterClass *To, unsigned SubReg)
Helper function for selectCopy.
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, const AArch64RegisterBankInfo &RBI)
static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI)
static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type)
static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI)
Given a shift instruction, return the correct shift type for that instruction.
static bool unsupportedBinOp(const MachineInstr &I, const AArch64RegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const AArch64RegisterInfo &TRI)
Check whether I is a currently unsupported binary operation:
static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst, bool WillNegate, MachineRegisterInfo &MRI, unsigned Depth=0)
Returns true if Val is a tree of AND/OR/CMP operations that can be expressed as a conjunction.
static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, const unsigned EltSize)
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static std::optional< uint64_t > getImmedFromMO(const MachineOperand &Root)
static std::pair< unsigned, unsigned > getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize)
Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given size and RB.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
static std::optional< int64_t > getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI)
Matches and returns the shift immediate value for a SHL instruction given a shift operand.
static void changeFPCCToORAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
This file declares the targeting of the RegisterBankInfo class for AArch64.
static bool isStore(int Opcode)
static bool selectMergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool selectUnmergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file contains constants used for implementing Dwarf debug support.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
const char LLVMTargetMachineRef TM
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static constexpr int Concat[]
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
int getVarArgsStackIndex() const
int getVarArgsGPRIndex() const
unsigned getVarArgsGPRSize() const
This class provides the information for the target register banks.
bool isCallingConvWin64(CallingConv::ID CC) const
APInt bitcastToAPInt() const
Class for arbitrary precision integers.
APInt zext(unsigned width) const
Zero extend to a new width.
uint64_t getZExtValue() const
Get zero extended value.
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
bool isIntPredicate() const
static Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
ConstantFP - Floating Point Values [float, double].
const APFloat & getValueAPF() const
bool isNegative() const
Return true if the sign bit is set.
bool isZero() const
Return true if the value is positive or negative zero.
This is the shared class of boolean and integer constants.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Constant * getSplatValue(bool AllowPoison=false) const
If all elements of the vector constant have the same value, return that value.
const APInt & getUniqueInteger() const
If C is a constant integer then return its value, otherwise C must be a vector of constant integers,...
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
This class represents an Operation in the Expression.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
Represents indexed stores.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
Register getCondReg() const
Register getFalseReg() const
Register getTrueReg() const
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
bool hasExternalWeakLinkage() const
bool isEquality() const
Return true if this predicate is either EQ or NE.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
constexpr LLT multiplyElements(int Factor) const
Produce a vector type that is Factor times bigger, preserving the element type.
constexpr bool isPointerVector() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
This is an important class for using LLVM in a threaded context.
TypeSize getValue() const
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setAdjustsStack(bool V)
void setFrameAddressIsTaken(bool T)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineFunction & getMF()
Getter for the function we currently build.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineIRBuilderState & getState()
Getter for the State.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
void setState(const MachineIRBuilderState &NewState)
Setter for the State.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addBlockAddress(const BlockAddress *BA, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
bool constrainAllUses(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) const
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
@ MOLoad
The memory access reads data.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
const ConstantInt * getCImm() const
bool isCImm() const
isCImm - Test if this is a MO_CImmediate operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
static MachineOperand CreateBA(const BlockAddress *BA, int64_t Offset, unsigned TargetFlags=0)
const ConstantFP * getFPImm() const
unsigned getPredicate() const
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
T get() const
Returns the value of the specified pointer type.
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwise returns null.
Analysis providing profile information.
Holds all the information related to register banks.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
This class represents the LLVM 'select' instruction.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetLowering * getTargetLowering() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static IntegerType * getInt8Ty(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
void changeFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Find the AArch64 condition codes necessary to represent P for a scalar floating point comparison.
std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint8_t encodeAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType9(uint64_t Imm)
static bool isAdvSIMDModImmType4(uint64_t Imm)
static bool isAdvSIMDModImmType5(uint64_t Imm)
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType10(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType9(uint64_t Imm)
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static bool isAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType5(uint64_t Imm)
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static bool isAdvSIMDModImmType10(uint64_t Imm)
static int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType8(uint64_t Imm)
static bool isAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType11(uint64_t Imm)
static bool isAdvSIMDModImmType11(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType6(uint64_t Imm)
static bool isAdvSIMDModImmType8(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType4(uint64_t Imm)
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static bool isAdvSIMDModImmType6(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType1(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType1(uint64_t Imm)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
NodeAddr< InstrNode * > Instr
This is an optimization pass for GlobalISel generic memory operations.
Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
bool isPreISelGenericOpcode(unsigned Opcode)
Check whether the given Opcode is a generic opcode that is not supposed to appear after ISel.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
bool shouldOptForSize(const MachineBasicBlock &MBB, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
Returns true if the given block should be optimized for size.
InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &, const AArch64Subtarget &, const AArch64RegisterBankInfo &)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
AtomicOrdering
Atomic ordering for LLVM's memory model.
DWARFExpression::Operation Op
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Class which stores all the state required in a MachineIRBuilder.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.