44#include "llvm/IR/IntrinsicsAArch64.h"
52#define DEBUG_TYPE "aarch64-isel"
55using namespace MIPatternMatch;
56using namespace AArch64GISelUtils;
65#define GET_GLOBALISEL_PREDICATE_BITSET
66#include "AArch64GenGlobalISel.inc"
67#undef GET_GLOBALISEL_PREDICATE_BITSET
82 InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
87 ProduceNonFlagSettingCondBr =
135 bool tryOptAndIntoCompareBranch(
MachineInstr &AndInst,
bool Invert,
213 bool selectVectorLoadIntrinsic(
unsigned Opc,
unsigned NumVecs,
215 bool selectVectorLoadLaneIntrinsic(
unsigned Opc,
unsigned NumVecs,
217 void selectVectorStoreIntrinsic(
MachineInstr &
I,
unsigned NumVecs,
219 bool selectVectorStoreLaneIntrinsic(
MachineInstr &
I,
unsigned NumVecs,
233 unsigned Opc1,
unsigned Opc2,
bool isExt);
239 unsigned emitConstantPoolEntry(
const Constant *CPVal,
258 std::optional<CmpInst::Predicate> = std::nullopt)
const;
261 emitInstr(
unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
262 std::initializer_list<llvm::SrcOp> SrcOps,
264 const ComplexRendererFns &RenderFns = std::nullopt)
const;
299 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
320 MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,
342 std::pair<MachineInstr *, AArch64CC::CondCode>
377 ComplexRendererFns selectShiftA_32(
const MachineOperand &Root)
const;
378 ComplexRendererFns selectShiftB_32(
const MachineOperand &Root)
const;
379 ComplexRendererFns selectShiftA_64(
const MachineOperand &Root)
const;
380 ComplexRendererFns selectShiftB_64(
const MachineOperand &Root)
const;
382 ComplexRendererFns select12BitValueWithLeftShift(
uint64_t Immed)
const;
384 ComplexRendererFns selectNegArithImmed(
MachineOperand &Root)
const;
387 unsigned Size)
const;
389 ComplexRendererFns selectAddrModeUnscaled8(
MachineOperand &Root)
const {
390 return selectAddrModeUnscaled(Root, 1);
392 ComplexRendererFns selectAddrModeUnscaled16(
MachineOperand &Root)
const {
393 return selectAddrModeUnscaled(Root, 2);
395 ComplexRendererFns selectAddrModeUnscaled32(
MachineOperand &Root)
const {
396 return selectAddrModeUnscaled(Root, 4);
398 ComplexRendererFns selectAddrModeUnscaled64(
MachineOperand &Root)
const {
399 return selectAddrModeUnscaled(Root, 8);
401 ComplexRendererFns selectAddrModeUnscaled128(
MachineOperand &Root)
const {
402 return selectAddrModeUnscaled(Root, 16);
407 ComplexRendererFns tryFoldAddLowIntoImm(
MachineInstr &RootDef,
unsigned Size,
411 unsigned Size)
const;
413 ComplexRendererFns selectAddrModeIndexed(
MachineOperand &Root)
const {
414 return selectAddrModeIndexed(Root, Width / 8);
423 bool IsAddrOperand)
const;
426 unsigned SizeInBytes)
const;
434 bool WantsExt)
const;
435 ComplexRendererFns selectAddrModeRegisterOffset(
MachineOperand &Root)
const;
437 unsigned SizeInBytes)
const;
439 ComplexRendererFns selectAddrModeXRO(
MachineOperand &Root)
const {
440 return selectAddrModeXRO(Root, Width / 8);
444 unsigned SizeInBytes)
const;
446 ComplexRendererFns selectAddrModeWRO(
MachineOperand &Root)
const {
447 return selectAddrModeWRO(Root, Width / 8);
451 bool AllowROR =
false)
const;
453 ComplexRendererFns selectArithShiftedRegister(
MachineOperand &Root)
const {
454 return selectShiftedRegister(Root);
457 ComplexRendererFns selectLogicalShiftedRegister(
MachineOperand &Root)
const {
458 return selectShiftedRegister(Root,
true);
468 bool IsLoadStore =
false)
const;
479 ComplexRendererFns selectArithExtendedRegister(
MachineOperand &Root)
const;
484 int OpIdx = -1)
const;
486 int OpIdx = -1)
const;
488 int OpIdx = -1)
const;
492 int OpIdx = -1)
const;
494 int OpIdx = -1)
const;
496 int OpIdx = -1)
const;
499 int OpIdx = -1)
const;
505 bool tryOptSelect(
GSelect &Sel);
512 bool isLoadStoreOfNumBytes(
const MachineInstr &
MI,
unsigned NumBytes)
const;
525 bool ProduceNonFlagSettingCondBr =
false;
534#define GET_GLOBALISEL_PREDICATES_DECL
535#include "AArch64GenGlobalISel.inc"
536#undef GET_GLOBALISEL_PREDICATES_DECL
540#define GET_GLOBALISEL_TEMPORARIES_DECL
541#include "AArch64GenGlobalISel.inc"
542#undef GET_GLOBALISEL_TEMPORARIES_DECL
547#define GET_GLOBALISEL_IMPL
548#include "AArch64GenGlobalISel.inc"
549#undef GET_GLOBALISEL_IMPL
551AArch64InstructionSelector::AArch64InstructionSelector(
554 : TM(TM), STI(STI),
TII(*STI.getInstrInfo()),
TRI(*STI.getRegisterInfo()),
557#include
"AArch64GenGlobalISel.inc"
560#include
"AArch64GenGlobalISel.inc"
572 bool GetAllRegSet =
false) {
573 if (RB.
getID() == AArch64::GPRRegBankID) {
575 return GetAllRegSet ? &AArch64::GPR32allRegClass
576 : &AArch64::GPR32RegClass;
578 return GetAllRegSet ? &AArch64::GPR64allRegClass
579 : &AArch64::GPR64RegClass;
581 return &AArch64::XSeqPairsClassRegClass;
585 if (RB.
getID() == AArch64::FPRRegBankID) {
588 return &AArch64::FPR8RegClass;
590 return &AArch64::FPR16RegClass;
592 return &AArch64::FPR32RegClass;
594 return &AArch64::FPR64RegClass;
596 return &AArch64::FPR128RegClass;
608 bool GetAllRegSet =
false) {
611 "Expected FPR regbank for scalable type size");
612 return &AArch64::ZPRRegClass;
615 unsigned RegBankID = RB.
getID();
617 if (RegBankID == AArch64::GPRRegBankID) {
618 if (SizeInBits <= 32)
619 return GetAllRegSet ? &AArch64::GPR32allRegClass
620 : &AArch64::GPR32RegClass;
621 if (SizeInBits == 64)
622 return GetAllRegSet ? &AArch64::GPR64allRegClass
623 : &AArch64::GPR64RegClass;
624 if (SizeInBits == 128)
625 return &AArch64::XSeqPairsClassRegClass;
628 if (RegBankID == AArch64::FPRRegBankID) {
629 switch (SizeInBits) {
633 return &AArch64::FPR8RegClass;
635 return &AArch64::FPR16RegClass;
637 return &AArch64::FPR32RegClass;
639 return &AArch64::FPR64RegClass;
641 return &AArch64::FPR128RegClass;
651 switch (
TRI.getRegSizeInBits(*RC)) {
659 if (RC != &AArch64::FPR32RegClass)
669 dbgs() <<
"Couldn't find appropriate subregister for register class.");
678 switch (RB.
getID()) {
679 case AArch64::GPRRegBankID:
681 case AArch64::FPRRegBankID:
704 const unsigned RegClassIDs[],
706 unsigned NumRegs = Regs.
size();
709 assert(NumRegs >= 2 && NumRegs <= 4 &&
710 "Only support between two and 4 registers in a tuple!");
712 auto *DesiredClass =
TRI->getRegClass(RegClassIDs[NumRegs - 2]);
714 MIB.
buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
715 for (
unsigned I = 0, E = Regs.
size();
I < E; ++
I) {
716 RegSequence.addUse(Regs[
I]);
717 RegSequence.addImm(SubRegs[
I]);
719 return RegSequence.getReg(0);
724 static const unsigned RegClassIDs[] = {
725 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
726 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
727 AArch64::dsub2, AArch64::dsub3};
728 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
733 static const unsigned RegClassIDs[] = {
734 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
735 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
736 AArch64::qsub2, AArch64::qsub3};
737 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
742 auto &
MBB = *
MI.getParent();
744 auto &
MRI = MF.getRegInfo();
750 else if (Root.
isReg()) {
755 Immed = ValAndVReg->Value.getSExtValue();
771 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
778 for (
auto &MO :
I.operands()) {
781 LLVM_DEBUG(
dbgs() <<
"Generic inst non-reg operands are unsupported\n");
789 if (!MO.getReg().isVirtual()) {
790 LLVM_DEBUG(
dbgs() <<
"Generic inst has physical register operand\n");
800 if (PrevOpBank && OpBank != PrevOpBank) {
801 LLVM_DEBUG(
dbgs() <<
"Generic inst operands have different banks\n");
816 case AArch64::GPRRegBankID:
818 switch (GenericOpc) {
819 case TargetOpcode::G_SHL:
820 return AArch64::LSLVWr;
821 case TargetOpcode::G_LSHR:
822 return AArch64::LSRVWr;
823 case TargetOpcode::G_ASHR:
824 return AArch64::ASRVWr;
828 }
else if (OpSize == 64) {
829 switch (GenericOpc) {
830 case TargetOpcode::G_PTR_ADD:
831 return AArch64::ADDXrr;
832 case TargetOpcode::G_SHL:
833 return AArch64::LSLVXr;
834 case TargetOpcode::G_LSHR:
835 return AArch64::LSRVXr;
836 case TargetOpcode::G_ASHR:
837 return AArch64::ASRVXr;
843 case AArch64::FPRRegBankID:
846 switch (GenericOpc) {
847 case TargetOpcode::G_FADD:
848 return AArch64::FADDSrr;
849 case TargetOpcode::G_FSUB:
850 return AArch64::FSUBSrr;
851 case TargetOpcode::G_FMUL:
852 return AArch64::FMULSrr;
853 case TargetOpcode::G_FDIV:
854 return AArch64::FDIVSrr;
859 switch (GenericOpc) {
860 case TargetOpcode::G_FADD:
861 return AArch64::FADDDrr;
862 case TargetOpcode::G_FSUB:
863 return AArch64::FSUBDrr;
864 case TargetOpcode::G_FMUL:
865 return AArch64::FMULDrr;
866 case TargetOpcode::G_FDIV:
867 return AArch64::FDIVDrr;
868 case TargetOpcode::G_OR:
869 return AArch64::ORRv8i8;
886 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
888 case AArch64::GPRRegBankID:
891 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
893 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
895 return isStore ? AArch64::STRWui : AArch64::LDRWui;
897 return isStore ? AArch64::STRXui : AArch64::LDRXui;
900 case AArch64::FPRRegBankID:
903 return isStore ? AArch64::STRBui : AArch64::LDRBui;
905 return isStore ? AArch64::STRHui : AArch64::LDRHui;
907 return isStore ? AArch64::STRSui : AArch64::LDRSui;
909 return isStore ? AArch64::STRDui : AArch64::LDRDui;
911 return isStore ? AArch64::STRQui : AArch64::LDRQui;
925 assert(SrcReg.
isValid() &&
"Expected a valid source register?");
926 assert(To &&
"Destination register class cannot be null");
933 RegOp.
setReg(SubRegCopy.getReg(0));
937 if (!
I.getOperand(0).getReg().isPhysical())
947static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
951 Register DstReg =
I.getOperand(0).getReg();
952 Register SrcReg =
I.getOperand(1).getReg();
967 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
985 if (Reg.isPhysical())
987 LLT Ty =
MRI.getType(Reg);
993 RC = getRegClassForTypeOnBank(Ty, RB);
996 dbgs() <<
"Warning: DBG_VALUE operand has unexpected size/bank\n");
1009 Register DstReg =
I.getOperand(0).getReg();
1010 Register SrcReg =
I.getOperand(1).getReg();
1029 LLVM_DEBUG(
dbgs() <<
"Couldn't determine source register class\n");
1033 const TypeSize SrcSize =
TRI.getRegSizeInBits(*SrcRC);
1034 const TypeSize DstSize =
TRI.getRegSizeInBits(*DstRC);
1045 auto Copy = MIB.
buildCopy({DstTempRC}, {SrcReg});
1047 }
else if (SrcSize > DstSize) {
1054 }
else if (DstSize > SrcSize) {
1061 Register PromoteReg =
MRI.createVirtualRegister(PromotionRC);
1063 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1068 RegOp.
setReg(PromoteReg);
1087 if (
I.getOpcode() == TargetOpcode::G_ZEXT) {
1088 I.setDesc(
TII.get(AArch64::COPY));
1089 assert(SrcRegBank.
getID() == AArch64::GPRRegBankID);
1093 I.setDesc(
TII.get(AArch64::COPY));
1108 switch (GenericOpc) {
1109 case TargetOpcode::G_SITOFP:
1110 return AArch64::SCVTFUWSri;
1111 case TargetOpcode::G_UITOFP:
1112 return AArch64::UCVTFUWSri;
1113 case TargetOpcode::G_FPTOSI:
1114 return AArch64::FCVTZSUWSr;
1115 case TargetOpcode::G_FPTOUI:
1116 return AArch64::FCVTZUUWSr;
1121 switch (GenericOpc) {
1122 case TargetOpcode::G_SITOFP:
1123 return AArch64::SCVTFUXSri;
1124 case TargetOpcode::G_UITOFP:
1125 return AArch64::UCVTFUXSri;
1126 case TargetOpcode::G_FPTOSI:
1127 return AArch64::FCVTZSUWDr;
1128 case TargetOpcode::G_FPTOUI:
1129 return AArch64::FCVTZUUWDr;
1139 switch (GenericOpc) {
1140 case TargetOpcode::G_SITOFP:
1141 return AArch64::SCVTFUWDri;
1142 case TargetOpcode::G_UITOFP:
1143 return AArch64::UCVTFUWDri;
1144 case TargetOpcode::G_FPTOSI:
1145 return AArch64::FCVTZSUXSr;
1146 case TargetOpcode::G_FPTOUI:
1147 return AArch64::FCVTZUUXSr;
1152 switch (GenericOpc) {
1153 case TargetOpcode::G_SITOFP:
1154 return AArch64::SCVTFUXDri;
1155 case TargetOpcode::G_UITOFP:
1156 return AArch64::UCVTFUXDri;
1157 case TargetOpcode::G_FPTOSI:
1158 return AArch64::FCVTZSUXDr;
1159 case TargetOpcode::G_FPTOUI:
1160 return AArch64::FCVTZUUXDr;
1179 RBI.getRegBank(True,
MRI,
TRI)->getID() &&
1180 "Expected both select operands to have the same regbank?");
1181 LLT Ty =
MRI.getType(True);
1186 "Expected 32 bit or 64 bit select only?");
1187 const bool Is32Bit =
Size == 32;
1188 if (RBI.getRegBank(True,
MRI,
TRI)->getID() != AArch64::GPRRegBankID) {
1189 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1190 auto FCSel = MIB.
buildInstr(Opc, {Dst}, {True, False}).addImm(
CC);
1196 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1198 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &
CC, &
MRI,
1213 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1230 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1249 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1265 auto TryOptSelectCst = [&Opc, &True, &False, &
CC, Is32Bit, &
MRI,
1271 if (!TrueCst && !FalseCst)
1274 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1275 if (TrueCst && FalseCst) {
1276 int64_t
T = TrueCst->Value.getSExtValue();
1277 int64_t
F = FalseCst->Value.getSExtValue();
1279 if (
T == 0 &&
F == 1) {
1281 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1287 if (
T == 0 &&
F == -1) {
1289 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1297 int64_t
T = TrueCst->Value.getSExtValue();
1300 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1309 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1318 int64_t
F = FalseCst->Value.getSExtValue();
1321 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1328 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1336 Optimized |= TryFoldBinOpIntoSelect(False, True,
false);
1337 Optimized |= TryFoldBinOpIntoSelect(True, False,
true);
1458 assert(Reg.isValid() &&
"Expected valid register!");
1459 bool HasZext =
false;
1461 unsigned Opc =
MI->getOpcode();
1463 if (!
MI->getOperand(0).isReg() ||
1464 !
MRI.hasOneNonDBGUse(
MI->getOperand(0).getReg()))
1471 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1472 Opc == TargetOpcode::G_TRUNC) {
1473 if (Opc == TargetOpcode::G_ZEXT)
1476 Register NextReg =
MI->getOperand(1).getReg();
1478 if (!NextReg.
isValid() || !
MRI.hasOneNonDBGUse(NextReg))
1487 std::optional<uint64_t>
C;
1492 case TargetOpcode::G_AND:
1493 case TargetOpcode::G_XOR: {
1494 TestReg =
MI->getOperand(1).getReg();
1495 Register ConstantReg =
MI->getOperand(2).getReg();
1506 C = VRegAndVal->Value.getZExtValue();
1508 C = VRegAndVal->Value.getSExtValue();
1512 case TargetOpcode::G_ASHR:
1513 case TargetOpcode::G_LSHR:
1514 case TargetOpcode::G_SHL: {
1515 TestReg =
MI->getOperand(1).getReg();
1519 C = VRegAndVal->Value.getSExtValue();
1531 unsigned TestRegSize =
MRI.getType(TestReg).getSizeInBits();
1535 case TargetOpcode::G_AND:
1537 if ((*
C >> Bit) & 1)
1540 case TargetOpcode::G_SHL:
1543 if (*
C <= Bit && (Bit - *
C) < TestRegSize) {
1548 case TargetOpcode::G_ASHR:
1553 if (Bit >= TestRegSize)
1554 Bit = TestRegSize - 1;
1556 case TargetOpcode::G_LSHR:
1558 if ((Bit + *
C) < TestRegSize) {
1563 case TargetOpcode::G_XOR:
1572 if ((*
C >> Bit) & 1)
1591 assert(ProduceNonFlagSettingCondBr &&
1592 "Cannot emit TB(N)Z with speculation tracking!");
1597 LLT Ty =
MRI.getType(TestReg);
1600 assert(Bit < 64 &&
"Bit is too large!");
1604 bool UseWReg =
Bit < 32;
1605 unsigned NecessarySize = UseWReg ? 32 : 64;
1606 if (
Size != NecessarySize)
1607 TestReg = moveScalarRegClass(
1608 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1611 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1612 {AArch64::TBZW, AArch64::TBNZW}};
1613 unsigned Opc = OpcTable[UseWReg][IsNegative];
1620bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1623 assert(AndInst.
getOpcode() == TargetOpcode::G_AND &&
"Expected G_AND only?");
1650 int32_t
Bit = MaybeBit->Value.exactLogBase2();
1657 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1665 assert(ProduceNonFlagSettingCondBr &&
"CBZ does not set flags!");
1667 assert(RBI.getRegBank(CompareReg,
MRI,
TRI)->getID() ==
1668 AArch64::GPRRegBankID &&
1669 "Expected GPRs only?");
1670 auto Ty =
MRI.getType(CompareReg);
1673 assert(Width <= 64 &&
"Expected width to be at most 64?");
1674 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1675 {AArch64::CBNZW, AArch64::CBNZX}};
1676 unsigned Opc = OpcTable[IsNegative][Width == 64];
1677 auto BranchMI = MIB.
buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1682bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1685 assert(
I.getOpcode() == TargetOpcode::G_BRCOND);
1697 I.eraseFromParent();
1701bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1704 assert(
I.getOpcode() == TargetOpcode::G_BRCOND);
1710 if (!ProduceNonFlagSettingCondBr)
1729 if (VRegAndVal && !AndInst) {
1730 int64_t
C = VRegAndVal->Value.getSExtValue();
1736 emitTestBit(LHS, Bit,
false, DestMBB, MIB);
1737 I.eraseFromParent();
1745 emitTestBit(LHS, Bit,
true, DestMBB, MIB);
1746 I.eraseFromParent();
1754 emitTestBit(LHS, Bit,
false, DestMBB, MIB);
1755 I.eraseFromParent();
1769 if (VRegAndVal && VRegAndVal->Value == 0) {
1777 tryOptAndIntoCompareBranch(
1779 I.eraseFromParent();
1784 auto LHSTy =
MRI.getType(LHS);
1785 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1787 I.eraseFromParent();
1796bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1799 assert(
I.getOpcode() == TargetOpcode::G_BRCOND);
1800 if (tryOptCompareBranchFedByICmp(
I, ICmp, MIB))
1810 I.eraseFromParent();
1814bool AArch64InstructionSelector::selectCompareBranch(
1816 Register CondReg =
I.getOperand(0).getReg();
1821 if (CCMIOpc == TargetOpcode::G_FCMP)
1822 return selectCompareBranchFedByFCmp(
I, *CCMI, MIB);
1823 if (CCMIOpc == TargetOpcode::G_ICMP)
1824 return selectCompareBranchFedByICmp(
I, *CCMI, MIB);
1829 if (ProduceNonFlagSettingCondBr) {
1830 emitTestBit(CondReg, 0,
true,
1831 I.getOperand(1).getMBB(), MIB);
1832 I.eraseFromParent();
1842 .
addMBB(
I.getOperand(1).getMBB());
1843 I.eraseFromParent();
1851 assert(
MRI.getType(Reg).isVector() &&
"Expected a *vector* shift operand");
1862 return std::nullopt;
1864 int64_t Imm = *ShiftImm;
1866 return std::nullopt;
1870 return std::nullopt;
1873 return std::nullopt;
1877 return std::nullopt;
1881 return std::nullopt;
1885 return std::nullopt;
1891bool AArch64InstructionSelector::selectVectorSHL(
MachineInstr &
I,
1893 assert(
I.getOpcode() == TargetOpcode::G_SHL);
1894 Register DstReg =
I.getOperand(0).getReg();
1895 const LLT Ty =
MRI.getType(DstReg);
1896 Register Src1Reg =
I.getOperand(1).getReg();
1897 Register Src2Reg =
I.getOperand(2).getReg();
1908 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1910 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1912 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1914 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1916 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1918 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1920 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1926 auto Shl = MIB.
buildInstr(Opc, {DstReg}, {Src1Reg});
1932 I.eraseFromParent();
1936bool AArch64InstructionSelector::selectVectorAshrLshr(
1938 assert(
I.getOpcode() == TargetOpcode::G_ASHR ||
1939 I.getOpcode() == TargetOpcode::G_LSHR);
1940 Register DstReg =
I.getOperand(0).getReg();
1941 const LLT Ty =
MRI.getType(DstReg);
1942 Register Src1Reg =
I.getOperand(1).getReg();
1943 Register Src2Reg =
I.getOperand(2).getReg();
1948 bool IsASHR =
I.getOpcode() == TargetOpcode::G_ASHR;
1958 unsigned NegOpc = 0;
1960 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
1962 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1963 NegOpc = AArch64::NEGv2i64;
1965 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1966 NegOpc = AArch64::NEGv4i32;
1968 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1969 NegOpc = AArch64::NEGv2i32;
1971 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1972 NegOpc = AArch64::NEGv4i16;
1974 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1975 NegOpc = AArch64::NEGv8i16;
1977 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1978 NegOpc = AArch64::NEGv16i8;
1980 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1981 NegOpc = AArch64::NEGv8i8;
1987 auto Neg = MIB.
buildInstr(NegOpc, {RC}, {Src2Reg});
1989 auto SShl = MIB.
buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1991 I.eraseFromParent();
1995bool AArch64InstructionSelector::selectVaStartAAPCS(
2000bool AArch64InstructionSelector::selectVaStartDarwin(
2003 Register ListReg =
I.getOperand(0).getReg();
2005 Register ArgsAddrReg =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2016 BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::ADDXri))
2024 MIB =
BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::STRXui))
2031 I.eraseFromParent();
2035void AArch64InstructionSelector::materializeLargeCMVal(
2041 auto MovZ = MIB.
buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
2052 :
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2054 if (
auto *GV = dyn_cast<GlobalValue>(V)) {
2056 GV, MovZ->getOperand(1).getOffset(), Flags));
2060 MovZ->getOperand(1).getOffset(), Flags));
2066 Register DstReg = BuildMovK(MovZ.getReg(0),
2072bool AArch64InstructionSelector::preISelLower(
MachineInstr &
I) {
2077 switch (
I.getOpcode()) {
2078 case TargetOpcode::G_STORE: {
2079 bool Changed = contractCrossBankCopyIntoStore(
I,
MRI);
2087 SrcOp.setReg(NewSrc);
2088 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass,
MRI);
2093 case TargetOpcode::G_PTR_ADD:
2094 return convertPtrAddToAdd(
I,
MRI);
2095 case TargetOpcode::G_LOAD: {
2100 Register DstReg =
I.getOperand(0).getReg();
2101 const LLT DstTy =
MRI.getType(DstReg);
2107 case AArch64::G_DUP: {
2109 LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2113 MRI.setType(
I.getOperand(0).getReg(),
2115 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2116 I.getOperand(1).setReg(NewSrc.getReg(0));
2119 case TargetOpcode::G_UITOFP:
2120 case TargetOpcode::G_SITOFP: {
2125 Register SrcReg =
I.getOperand(1).getReg();
2126 LLT SrcTy =
MRI.getType(SrcReg);
2127 LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2131 if (RBI.getRegBank(SrcReg,
MRI,
TRI)->getID() == AArch64::FPRRegBankID) {
2132 if (
I.getOpcode() == TargetOpcode::G_SITOFP)
2133 I.setDesc(
TII.get(AArch64::G_SITOF));
2135 I.setDesc(
TII.get(AArch64::G_UITOF));
2153bool AArch64InstructionSelector::convertPtrAddToAdd(
2155 assert(
I.getOpcode() == TargetOpcode::G_PTR_ADD &&
"Expected G_PTR_ADD");
2156 Register DstReg =
I.getOperand(0).getReg();
2157 Register AddOp1Reg =
I.getOperand(1).getReg();
2158 const LLT PtrTy =
MRI.getType(DstReg);
2162 const LLT CastPtrTy =
2167 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2169 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2173 I.setDesc(
TII.get(TargetOpcode::G_ADD));
2174 MRI.setType(DstReg, CastPtrTy);
2175 I.getOperand(1).setReg(PtrToInt.getReg(0));
2176 if (!select(*PtrToInt)) {
2177 LLVM_DEBUG(
dbgs() <<
"Failed to select G_PTRTOINT in convertPtrAddToAdd");
2186 I.getOperand(2).setReg(NegatedReg);
2187 I.setDesc(
TII.get(TargetOpcode::G_SUB));
2191bool AArch64InstructionSelector::earlySelectSHL(
MachineInstr &
I,
2196 assert(
I.getOpcode() == TargetOpcode::G_SHL &&
"unexpected op");
2197 const auto &MO =
I.getOperand(2);
2202 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2206 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2207 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2209 if (!Imm1Fn || !Imm2Fn)
2213 MIB.
buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2216 for (
auto &RenderFn : *Imm1Fn)
2218 for (
auto &RenderFn : *Imm2Fn)
2221 I.eraseFromParent();
2225bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2227 assert(
I.getOpcode() == TargetOpcode::G_STORE &&
"Expected G_STORE");
2245 LLT DefDstTy =
MRI.getType(DefDstReg);
2246 Register StoreSrcReg =
I.getOperand(0).getReg();
2247 LLT StoreSrcTy =
MRI.getType(StoreSrcReg);
2258 if (RBI.getRegBank(StoreSrcReg,
MRI,
TRI) ==
2259 RBI.getRegBank(DefDstReg,
MRI,
TRI))
2263 I.getOperand(0).setReg(DefDstReg);
2267bool AArch64InstructionSelector::earlySelect(
MachineInstr &
I) {
2268 assert(
I.getParent() &&
"Instruction should be in a basic block!");
2269 assert(
I.getParent()->getParent() &&
"Instruction should be in a function!");
2275 switch (
I.getOpcode()) {
2276 case AArch64::G_DUP: {
2279 Register Src =
I.getOperand(1).getReg();
2284 Register Dst =
I.getOperand(0).getReg();
2286 MRI.getType(Dst).getNumElements(),
2289 ValAndVReg->Value.trunc(
MRI.getType(Dst).getScalarSizeInBits())));
2290 if (!emitConstantVector(Dst, CV, MIB,
MRI))
2292 I.eraseFromParent();
2295 case TargetOpcode::G_SEXT:
2298 if (selectUSMovFromExtend(
I,
MRI))
2301 case TargetOpcode::G_BR:
2303 case TargetOpcode::G_SHL:
2304 return earlySelectSHL(
I,
MRI);
2305 case TargetOpcode::G_CONSTANT: {
2306 bool IsZero =
false;
2307 if (
I.getOperand(1).isCImm())
2308 IsZero =
I.getOperand(1).getCImm()->isZero();
2309 else if (
I.getOperand(1).isImm())
2310 IsZero =
I.getOperand(1).getImm() == 0;
2315 Register DefReg =
I.getOperand(0).getReg();
2316 LLT Ty =
MRI.getType(DefReg);
2318 I.getOperand(1).ChangeToRegister(AArch64::XZR,
false);
2319 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
MRI);
2321 I.getOperand(1).ChangeToRegister(AArch64::WZR,
false);
2322 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass,
MRI);
2326 I.setDesc(
TII.get(TargetOpcode::COPY));
2330 case TargetOpcode::G_ADD: {
2339 Register AddDst =
I.getOperand(0).getReg();
2340 Register AddLHS =
I.getOperand(1).getReg();
2341 Register AddRHS =
I.getOperand(2).getReg();
2343 LLT Ty =
MRI.getType(AddLHS);
2352 if (!
MRI.hasOneNonDBGUse(Reg))
2366 MRI.getType(
Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2376 Cmp = MatchCmp(AddRHS);
2380 auto &PredOp =
Cmp->getOperand(1);
2385 emitIntegerCompare(
Cmp->getOperand(2),
2386 Cmp->getOperand(3), PredOp, MIB);
2387 emitCSINC(AddDst, AddLHS, AddLHS, InvCC, MIB);
2388 I.eraseFromParent();
2391 case TargetOpcode::G_OR: {
2395 Register Dst =
I.getOperand(0).getReg();
2396 LLT Ty =
MRI.getType(Dst);
2415 if (ShiftImm >
Size || ((1ULL << ShiftImm) - 1ULL) !=
uint64_t(MaskImm))
2418 int64_t Immr =
Size - ShiftImm;
2419 int64_t Imms =
Size - ShiftImm - 1;
2420 unsigned Opc =
Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2421 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2422 I.eraseFromParent();
2425 case TargetOpcode::G_FENCE: {
2426 if (
I.getOperand(1).getImm() == 0)
2430 .
addImm(
I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
2431 I.eraseFromParent();
2440 assert(
I.getParent() &&
"Instruction should be in a basic block!");
2441 assert(
I.getParent()->getParent() &&
"Instruction should be in a function!");
2448 if (Subtarget->requiresStrictAlign()) {
2450 LLVM_DEBUG(
dbgs() <<
"AArch64 GISel does not support strict-align yet\n");
2456 unsigned Opcode =
I.getOpcode();
2458 if (!
I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2461 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2464 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2465 const Register DefReg =
I.getOperand(0).getReg();
2466 const LLT DefTy =
MRI.getType(DefReg);
2469 MRI.getRegClassOrRegBank(DefReg);
2479 DefRC = getRegClassForTypeOnBank(DefTy, RB);
2486 I.setDesc(
TII.get(TargetOpcode::PHI));
2488 return RBI.constrainGenericRegister(DefReg, *DefRC,
MRI);
2494 if (
I.isDebugInstr())
2501 if (
I.getNumOperands() !=
I.getNumExplicitOperands()) {
2503 dbgs() <<
"Generic instruction has unexpected implicit operands\n");
2510 if (preISelLower(
I)) {
2511 Opcode =
I.getOpcode();
2522 if (selectImpl(
I, *CoverageInfo))
2526 I.getOperand(0).isReg() ?
MRI.getType(
I.getOperand(0).getReg()) :
LLT{};
2529 case TargetOpcode::G_SBFX:
2530 case TargetOpcode::G_UBFX: {
2531 static const unsigned OpcTable[2][2] = {
2532 {AArch64::UBFMWri, AArch64::UBFMXri},
2533 {AArch64::SBFMWri, AArch64::SBFMXri}};
2534 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2536 unsigned Opc = OpcTable[IsSigned][
Size == 64];
2539 assert(Cst1 &&
"Should have gotten a constant for src 1?");
2542 assert(Cst2 &&
"Should have gotten a constant for src 2?");
2543 auto LSB = Cst1->Value.getZExtValue();
2544 auto Width = Cst2->Value.getZExtValue();
2546 MIB.
buildInstr(Opc, {
I.getOperand(0)}, {
I.getOperand(1)})
2548 .
addImm(LSB + Width - 1);
2549 I.eraseFromParent();
2552 case TargetOpcode::G_BRCOND:
2553 return selectCompareBranch(
I, MF,
MRI);
2555 case TargetOpcode::G_BRINDIRECT: {
2557 if (std::optional<uint16_t> BADisc =
2558 STI.getPtrAuthBlockAddressDiscriminatorIfEnabled(Fn)) {
2562 MI.addReg(AArch64::XZR);
2563 I.eraseFromParent();
2566 I.setDesc(
TII.get(AArch64::BR));
2570 case TargetOpcode::G_BRJT:
2571 return selectBrJT(
I,
MRI);
2573 case AArch64::G_ADD_LOW: {
2579 if (BaseMI->
getOpcode() != AArch64::ADRP) {
2580 I.setDesc(
TII.get(AArch64::ADDXri));
2585 "Expected small code model");
2587 auto Op2 =
I.getOperand(2);
2588 auto MovAddr = MIB.
buildInstr(AArch64::MOVaddr, {
I.getOperand(0)}, {})
2589 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2590 Op1.getTargetFlags())
2592 Op2.getTargetFlags());
2593 I.eraseFromParent();
2597 case TargetOpcode::G_FCONSTANT:
2598 case TargetOpcode::G_CONSTANT: {
2599 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2608 const Register DefReg =
I.getOperand(0).getReg();
2609 const LLT DefTy =
MRI.getType(DefReg);
2615 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2617 <<
" constant, expected: " << s16 <<
" or " << s32
2618 <<
" or " << s64 <<
" or " << s128 <<
'\n');
2622 if (RB.
getID() != AArch64::FPRRegBankID) {
2624 <<
" constant on bank: " << RB
2625 <<
", expected: FPR\n");
2633 if (DefSize != 128 &&
I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2637 if (Ty != p0 && Ty != s8 && Ty != s16) {
2639 <<
" constant, expected: " << s32 <<
", " << s64
2640 <<
", or " << p0 <<
'\n');
2644 if (RB.
getID() != AArch64::GPRRegBankID) {
2646 <<
" constant on bank: " << RB
2647 <<
", expected: GPR\n");
2664 if (TLI->isFPImmLegal(
I.getOperand(1).getFPImm()->getValueAPF(),
2671 auto *FPImm =
I.getOperand(1).getFPImm();
2674 LLVM_DEBUG(
dbgs() <<
"Failed to load double constant pool entry\n");
2678 I.eraseFromParent();
2679 return RBI.constrainGenericRegister(DefReg, FPRRC,
MRI);
2683 assert((DefSize == 32 || DefSize == 64) &&
"Unexpected const def size");
2685 const Register DefGPRReg =
MRI.createVirtualRegister(
2686 DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2692 if (!RBI.constrainGenericRegister(DefReg, FPRRC,
MRI)) {
2693 LLVM_DEBUG(
dbgs() <<
"Failed to constrain G_FCONSTANT def operand\n");
2701 }
else if (
I.getOperand(1).isCImm()) {
2702 uint64_t Val =
I.getOperand(1).getCImm()->getZExtValue();
2703 I.getOperand(1).ChangeToImmediate(Val);
2704 }
else if (
I.getOperand(1).isImm()) {
2705 uint64_t Val =
I.getOperand(1).getImm();
2706 I.getOperand(1).ChangeToImmediate(Val);
2709 const unsigned MovOpc =
2710 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2711 I.setDesc(
TII.get(MovOpc));
2715 case TargetOpcode::G_EXTRACT: {
2716 Register DstReg =
I.getOperand(0).getReg();
2717 Register SrcReg =
I.getOperand(1).getReg();
2718 LLT SrcTy =
MRI.getType(SrcReg);
2719 LLT DstTy =
MRI.getType(DstReg);
2731 unsigned Offset =
I.getOperand(2).getImm();
2740 if (SrcRB.
getID() == AArch64::GPRRegBankID) {
2742 MIB.
buildInstr(TargetOpcode::COPY, {DstReg}, {})
2744 Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2746 AArch64::GPR64RegClass, NewI->getOperand(0));
2747 I.eraseFromParent();
2753 unsigned LaneIdx =
Offset / 64;
2755 DstReg, DstRB,
LLT::scalar(64), SrcReg, LaneIdx, MIB);
2758 I.eraseFromParent();
2762 I.setDesc(
TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2768 "unexpected G_EXTRACT types");
2775 .addReg(DstReg, 0, AArch64::sub_32);
2776 RBI.constrainGenericRegister(
I.getOperand(0).getReg(),
2777 AArch64::GPR32RegClass,
MRI);
2778 I.getOperand(0).setReg(DstReg);
2783 case TargetOpcode::G_INSERT: {
2784 LLT SrcTy =
MRI.getType(
I.getOperand(2).getReg());
2785 LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2792 I.setDesc(
TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2793 unsigned LSB =
I.getOperand(3).getImm();
2794 unsigned Width =
MRI.getType(
I.getOperand(2).getReg()).getSizeInBits();
2795 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2800 "unexpected G_INSERT types");
2806 TII.get(AArch64::SUBREG_TO_REG))
2809 .
addUse(
I.getOperand(2).getReg())
2810 .
addImm(AArch64::sub_32);
2811 RBI.constrainGenericRegister(
I.getOperand(2).getReg(),
2812 AArch64::GPR32RegClass,
MRI);
2813 I.getOperand(2).setReg(SrcReg);
2817 case TargetOpcode::G_FRAME_INDEX: {
2824 I.setDesc(
TII.get(AArch64::ADDXri));
2833 case TargetOpcode::G_GLOBAL_VALUE: {
2836 if (
I.getOperand(1).isSymbol()) {
2837 OpFlags =
I.getOperand(1).getTargetFlags();
2841 GV =
I.getOperand(1).getGlobal();
2843 return selectTLSGlobalValue(
I,
MRI);
2844 OpFlags = STI.ClassifyGlobalReference(GV, TM);
2848 I.setDesc(
TII.get(AArch64::LOADgot));
2849 I.getOperand(1).setTargetFlags(OpFlags);
2851 !
TM.isPositionIndependent()) {
2853 materializeLargeCMVal(
I, GV, OpFlags);
2854 I.eraseFromParent();
2857 I.setDesc(
TII.get(AArch64::ADR));
2858 I.getOperand(1).setTargetFlags(OpFlags);
2860 I.setDesc(
TII.get(AArch64::MOVaddr));
2863 MIB.addGlobalAddress(GV,
I.getOperand(1).getOffset(),
2869 case TargetOpcode::G_PTRAUTH_GLOBAL_VALUE:
2870 return selectPtrAuthGlobalValue(
I,
MRI);
2872 case TargetOpcode::G_ZEXTLOAD:
2873 case TargetOpcode::G_LOAD:
2874 case TargetOpcode::G_STORE: {
2876 bool IsZExtLoad =
I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2890 if (Order != AtomicOrdering::NotAtomic &&
2891 Order != AtomicOrdering::Unordered &&
2892 Order != AtomicOrdering::Monotonic) {
2893 assert(!isa<GZExtLoad>(LdSt));
2894 assert(MemSizeInBytes <= 8 &&
2895 "128-bit atomics should already be custom-legalized");
2897 if (isa<GLoad>(LdSt)) {
2898 static constexpr unsigned LDAPROpcodes[] = {
2899 AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
2900 static constexpr unsigned LDAROpcodes[] = {
2901 AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
2903 STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent
2906 I.setDesc(
TII.get(Opcodes[
Log2_32(MemSizeInBytes)]));
2908 static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2909 AArch64::STLRW, AArch64::STLRX};
2911 if (
MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
2913 Register NewVal =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2914 MIB.
buildInstr(TargetOpcode::COPY, {NewVal}, {})
2915 .addReg(
I.getOperand(0).getReg(), 0, AArch64::sub_32);
2916 I.getOperand(0).setReg(NewVal);
2918 I.setDesc(
TII.get(Opcodes[
Log2_32(MemSizeInBytes)]));
2929 "Load/Store pointer operand isn't a GPR");
2930 assert(
MRI.getType(PtrReg).isPointer() &&
2931 "Load/Store pointer operand isn't a pointer");
2935 const LLT ValTy =
MRI.getType(ValReg);
2940 if (isa<GStore>(LdSt) && ValTy.
getSizeInBits() > MemSizeInBits) {
2943 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2949 .addReg(ValReg, 0,
SubReg)
2951 RBI.constrainGenericRegister(Copy, *RC,
MRI);
2953 }
else if (isa<GLoad>(LdSt) && ValTy.
getSizeInBits() > MemSizeInBits) {
2956 if (RB.
getID() == AArch64::FPRRegBankID) {
2959 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2966 MRI.setRegBank(NewDst, RB);
2969 MIB.
buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
2973 auto SubRegRC = getRegClassForTypeOnBank(
MRI.getType(OldDst), RB);
2974 RBI.constrainGenericRegister(OldDst, *SubRegRC,
MRI);
2981 auto SelectLoadStoreAddressingMode = [&]() ->
MachineInstr * {
2982 bool IsStore = isa<GStore>(
I);
2983 const unsigned NewOpc =
2985 if (NewOpc ==
I.getOpcode())
2989 selectAddrModeIndexed(
I.getOperand(1), MemSizeInBytes);
2992 I.setDesc(
TII.get(NewOpc));
2998 auto NewInst = MIB.
buildInstr(NewOpc, {}, {},
I.getFlags());
2999 Register CurValReg =
I.getOperand(0).getReg();
3000 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
3001 NewInst.cloneMemRefs(
I);
3002 for (
auto &Fn : *AddrModeFns)
3004 I.eraseFromParent();
3013 if (Opcode == TargetOpcode::G_STORE) {
3016 if (CVal && CVal->Value == 0) {
3018 case AArch64::STRWui:
3019 case AArch64::STRHHui:
3020 case AArch64::STRBBui:
3021 LoadStore->getOperand(0).setReg(AArch64::WZR);
3023 case AArch64::STRXui:
3024 LoadStore->getOperand(0).setReg(AArch64::XZR);
3030 if (IsZExtLoad || (Opcode == TargetOpcode::G_LOAD &&
3031 ValTy ==
LLT::scalar(64) && MemSizeInBits == 32)) {
3034 if (
MRI.getType(
LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
3038 Register LdReg =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3043 MIB.
buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
3046 .
addImm(AArch64::sub_32);
3048 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
3054 case TargetOpcode::G_INDEXED_ZEXTLOAD:
3055 case TargetOpcode::G_INDEXED_SEXTLOAD:
3056 return selectIndexedExtLoad(
I,
MRI);
3057 case TargetOpcode::G_INDEXED_LOAD:
3058 return selectIndexedLoad(
I,
MRI);
3059 case TargetOpcode::G_INDEXED_STORE:
3060 return selectIndexedStore(cast<GIndexedStore>(
I),
MRI);
3062 case TargetOpcode::G_LSHR:
3063 case TargetOpcode::G_ASHR:
3064 if (
MRI.getType(
I.getOperand(0).getReg()).isVector())
3065 return selectVectorAshrLshr(
I,
MRI);
3067 case TargetOpcode::G_SHL:
3068 if (Opcode == TargetOpcode::G_SHL &&
3069 MRI.getType(
I.getOperand(0).getReg()).isVector())
3070 return selectVectorSHL(
I,
MRI);
3077 Register SrcReg =
I.getOperand(1).getReg();
3078 Register ShiftReg =
I.getOperand(2).getReg();
3079 const LLT ShiftTy =
MRI.getType(ShiftReg);
3080 const LLT SrcTy =
MRI.getType(SrcReg);
3085 auto Trunc = MIB.
buildInstr(TargetOpcode::COPY, {SrcTy}, {})
3086 .addReg(ShiftReg, 0, AArch64::sub_32);
3087 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
3088 I.getOperand(2).setReg(Trunc.getReg(0));
3092 case TargetOpcode::G_OR: {
3099 const Register DefReg =
I.getOperand(0).getReg();
3103 if (NewOpc ==
I.getOpcode())
3106 I.setDesc(
TII.get(NewOpc));
3114 case TargetOpcode::G_PTR_ADD: {
3115 emitADD(
I.getOperand(0).getReg(),
I.getOperand(1),
I.getOperand(2), MIB);
3116 I.eraseFromParent();
3120 case TargetOpcode::G_SADDE:
3121 case TargetOpcode::G_UADDE:
3122 case TargetOpcode::G_SSUBE:
3123 case TargetOpcode::G_USUBE:
3124 case TargetOpcode::G_SADDO:
3125 case TargetOpcode::G_UADDO:
3126 case TargetOpcode::G_SSUBO:
3127 case TargetOpcode::G_USUBO:
3128 return selectOverflowOp(
I,
MRI);
3130 case TargetOpcode::G_PTRMASK: {
3131 Register MaskReg =
I.getOperand(2).getReg();
3138 I.setDesc(
TII.get(AArch64::ANDXri));
3139 I.getOperand(2).ChangeToImmediate(
3144 case TargetOpcode::G_PTRTOINT:
3145 case TargetOpcode::G_TRUNC: {
3146 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
3147 const LLT SrcTy =
MRI.getType(
I.getOperand(1).getReg());
3149 const Register DstReg =
I.getOperand(0).getReg();
3150 const Register SrcReg =
I.getOperand(1).getReg();
3157 dbgs() <<
"G_TRUNC/G_PTRTOINT input/output on different banks\n");
3161 if (DstRB.
getID() == AArch64::GPRRegBankID) {
3170 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC,
MRI) ||
3171 !RBI.constrainGenericRegister(DstReg, *DstRC,
MRI)) {
3172 LLVM_DEBUG(
dbgs() <<
"Failed to constrain G_TRUNC/G_PTRTOINT\n");
3176 if (DstRC == SrcRC) {
3178 }
else if (Opcode == TargetOpcode::G_TRUNC && DstTy ==
LLT::scalar(32) &&
3182 }
else if (DstRC == &AArch64::GPR32RegClass &&
3183 SrcRC == &AArch64::GPR64RegClass) {
3184 I.getOperand(1).setSubReg(AArch64::sub_32);
3187 dbgs() <<
"Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3191 I.setDesc(
TII.get(TargetOpcode::COPY));
3193 }
else if (DstRB.
getID() == AArch64::FPRRegBankID) {
3196 I.setDesc(
TII.get(AArch64::XTNv4i16));
3206 I.eraseFromParent();
3211 if (Opcode == TargetOpcode::G_PTRTOINT) {
3212 assert(DstTy.
isVector() &&
"Expected an FPR ptrtoint to be a vector");
3213 I.setDesc(
TII.get(TargetOpcode::COPY));
3221 case TargetOpcode::G_ANYEXT: {
3222 if (selectUSMovFromExtend(
I,
MRI))
3225 const Register DstReg =
I.getOperand(0).getReg();
3226 const Register SrcReg =
I.getOperand(1).getReg();
3229 if (RBDst.
getID() != AArch64::GPRRegBankID) {
3231 <<
", expected: GPR\n");
3236 if (RBSrc.
getID() != AArch64::GPRRegBankID) {
3238 <<
", expected: GPR\n");
3242 const unsigned DstSize =
MRI.getType(DstReg).getSizeInBits();
3245 LLVM_DEBUG(
dbgs() <<
"G_ANYEXT operand has no size, not a gvreg?\n");
3249 if (DstSize != 64 && DstSize > 32) {
3251 <<
", expected: 32 or 64\n");
3257 Register ExtSrc =
MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3262 .
addImm(AArch64::sub_32);
3263 I.getOperand(1).setReg(ExtSrc);
3268 case TargetOpcode::G_ZEXT:
3269 case TargetOpcode::G_SEXT_INREG:
3270 case TargetOpcode::G_SEXT: {
3271 if (selectUSMovFromExtend(
I,
MRI))
3274 unsigned Opcode =
I.getOpcode();
3275 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3276 const Register DefReg =
I.getOperand(0).getReg();
3277 Register SrcReg =
I.getOperand(1).getReg();
3278 const LLT DstTy =
MRI.getType(DefReg);
3279 const LLT SrcTy =
MRI.getType(SrcReg);
3285 if (Opcode == TargetOpcode::G_SEXT_INREG)
3286 SrcSize =
I.getOperand(2).getImm();
3292 AArch64::GPRRegBankID &&
3293 "Unexpected ext regbank");
3306 RBI.getRegBank(SrcReg,
MRI,
TRI)->getID() == AArch64::GPRRegBankID;
3307 if (LoadMI && IsGPR) {
3309 unsigned BytesLoaded =
MemOp->getSize().getValue();
3316 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3318 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3319 const Register ZReg = AArch64::WZR;
3320 MIB.
buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg})
3323 MIB.
buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3326 .
addImm(AArch64::sub_32);
3328 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3330 LLVM_DEBUG(
dbgs() <<
"Failed to constrain G_ZEXT destination\n");
3334 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3340 I.eraseFromParent();
3345 if (DstSize == 64) {
3346 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3348 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3354 SrcReg = MIB.
buildInstr(AArch64::SUBREG_TO_REG,
3355 {&AArch64::GPR64RegClass}, {})
3362 ExtI = MIB.
buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3366 }
else if (DstSize <= 32) {
3367 ExtI = MIB.
buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3376 I.eraseFromParent();
3380 case TargetOpcode::G_SITOFP:
3381 case TargetOpcode::G_UITOFP:
3382 case TargetOpcode::G_FPTOSI:
3383 case TargetOpcode::G_FPTOUI: {
3384 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg()),
3385 SrcTy =
MRI.getType(
I.getOperand(1).getReg());
3387 if (NewOpc == Opcode)
3390 I.setDesc(
TII.get(NewOpc));
3397 case TargetOpcode::G_FREEZE:
3400 case TargetOpcode::G_INTTOPTR:
3405 case TargetOpcode::G_BITCAST:
3413 case TargetOpcode::G_SELECT: {
3414 auto &Sel = cast<GSelect>(
I);
3415 const Register CondReg = Sel.getCondReg();
3416 const Register TReg = Sel.getTrueReg();
3417 const Register FReg = Sel.getFalseReg();
3419 if (tryOptSelect(Sel))
3424 Register DeadVReg =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3425 auto TstMI = MIB.
buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3428 if (!emitSelect(Sel.getReg(0), TReg, FReg,
AArch64CC::NE, MIB))
3430 Sel.eraseFromParent();
3433 case TargetOpcode::G_ICMP: {
3446 emitIntegerCompare(
I.getOperand(2),
I.getOperand(3),
I.getOperand(1), MIB);
3447 emitCSINC(
I.getOperand(0).getReg(), AArch64::WZR,
3448 AArch64::WZR, InvCC, MIB);
3449 I.eraseFromParent();
3453 case TargetOpcode::G_FCMP: {
3456 if (!emitFPCompare(
I.getOperand(2).getReg(),
I.getOperand(3).getReg(), MIB,
3458 !emitCSetForFCmp(
I.getOperand(0).getReg(), Pred, MIB))
3460 I.eraseFromParent();
3463 case TargetOpcode::G_VASTART:
3464 return STI.isTargetDarwin() ? selectVaStartDarwin(
I, MF,
MRI)
3465 : selectVaStartAAPCS(
I, MF,
MRI);
3466 case TargetOpcode::G_INTRINSIC:
3467 return selectIntrinsic(
I,
MRI);
3468 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3469 return selectIntrinsicWithSideEffects(
I,
MRI);
3470 case TargetOpcode::G_IMPLICIT_DEF: {
3471 I.setDesc(
TII.get(TargetOpcode::IMPLICIT_DEF));
3472 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
3473 const Register DstReg =
I.getOperand(0).getReg();
3476 RBI.constrainGenericRegister(DstReg, *DstRC,
MRI);
3479 case TargetOpcode::G_BLOCK_ADDR: {
3480 Function *BAFn =
I.getOperand(1).getBlockAddress()->getFunction();
3481 if (std::optional<uint16_t> BADisc =
3482 STI.getPtrAuthBlockAddressDiscriminatorIfEnabled(*BAFn)) {
3483 MIB.
buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X16}, {});
3484 MIB.
buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
3492 RBI.constrainGenericRegister(
I.getOperand(0).getReg(),
3493 AArch64::GPR64RegClass,
MRI);
3494 I.eraseFromParent();
3498 materializeLargeCMVal(
I,
I.getOperand(1).getBlockAddress(), 0);
3499 I.eraseFromParent();
3502 I.setDesc(
TII.get(AArch64::MOVaddrBA));
3503 auto MovMI =
BuildMI(
MBB,
I,
I.getDebugLoc(),
TII.get(AArch64::MOVaddrBA),
3504 I.getOperand(0).getReg())
3508 I.getOperand(1).getBlockAddress(), 0,
3510 I.eraseFromParent();
3514 case AArch64::G_DUP: {
3520 if (RBI.getRegBank(
I.getOperand(1).getReg(),
MRI,
TRI)->getID() !=
3521 AArch64::GPRRegBankID)
3523 LLT VecTy =
MRI.getType(
I.getOperand(0).getReg());
3525 I.setDesc(
TII.get(AArch64::DUPv8i8gpr));
3527 I.setDesc(
TII.get(AArch64::DUPv16i8gpr));
3529 I.setDesc(
TII.get(AArch64::DUPv4i16gpr));
3531 I.setDesc(
TII.get(AArch64::DUPv8i16gpr));
3536 case TargetOpcode::G_BUILD_VECTOR:
3537 return selectBuildVector(
I,
MRI);
3538 case TargetOpcode::G_MERGE_VALUES:
3540 case TargetOpcode::G_UNMERGE_VALUES:
3542 case TargetOpcode::G_SHUFFLE_VECTOR:
3543 return selectShuffleVector(
I,
MRI);
3544 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3545 return selectExtractElt(
I,
MRI);
3546 case TargetOpcode::G_CONCAT_VECTORS:
3547 return selectConcatVectors(
I,
MRI);
3548 case TargetOpcode::G_JUMP_TABLE:
3549 return selectJumpTable(
I,
MRI);
3550 case TargetOpcode::G_MEMCPY:
3551 case TargetOpcode::G_MEMCPY_INLINE:
3552 case TargetOpcode::G_MEMMOVE:
3553 case TargetOpcode::G_MEMSET:
3554 assert(STI.hasMOPS() &&
"Shouldn't get here without +mops feature");
3555 return selectMOPS(
I,
MRI);
3561bool AArch64InstructionSelector::selectAndRestoreState(
MachineInstr &
I) {
3568bool AArch64InstructionSelector::selectMOPS(
MachineInstr &GI,
3572 case TargetOpcode::G_MEMCPY:
3573 case TargetOpcode::G_MEMCPY_INLINE:
3574 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3576 case TargetOpcode::G_MEMMOVE:
3577 Mopcode = AArch64::MOPSMemoryMovePseudo;
3579 case TargetOpcode::G_MEMSET:
3581 Mopcode = AArch64::MOPSMemorySetPseudo;
3590 const Register DstPtrCopy =
MRI.cloneVirtualRegister(DstPtr.getReg());
3591 const Register SrcValCopy =
MRI.cloneVirtualRegister(SrcOrVal.getReg());
3594 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3595 const auto &SrcValRegClass =
3596 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3599 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass,
MRI);
3600 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass,
MRI);
3601 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass,
MRI);
3611 Register DefDstPtr =
MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3612 Register DefSize =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3614 MIB.
buildInstr(Mopcode, {DefDstPtr, DefSize},
3615 {DstPtrCopy, SizeCopy, SrcValCopy});
3617 Register DefSrcPtr =
MRI.createVirtualRegister(&SrcValRegClass);
3618 MIB.
buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3619 {DstPtrCopy, SrcValCopy, SizeCopy});
3628 assert(
I.getOpcode() == TargetOpcode::G_BRJT &&
"Expected G_BRJT");
3629 Register JTAddr =
I.getOperand(0).getReg();
3630 unsigned JTI =
I.getOperand(1).getIndex();
3639 if (STI.isTargetMachO()) {
3644 assert(STI.isTargetELF() &&
3645 "jump table hardening only supported on MachO/ELF");
3653 I.eraseFromParent();
3657 Register TargetReg =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3658 Register ScratchReg =
MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3660 auto JumpTableInst = MIB.
buildInstr(AArch64::JumpTableDest32,
3661 {TargetReg, ScratchReg}, {JTAddr,
Index})
3662 .addJumpTableIndex(JTI);
3664 MIB.
buildInstr(TargetOpcode::JUMP_TABLE_DEBUG_INFO, {},
3665 {
static_cast<int64_t
>(JTI)});
3667 MIB.
buildInstr(AArch64::BR, {}, {TargetReg});
3668 I.eraseFromParent();
3672bool AArch64InstructionSelector::selectJumpTable(
MachineInstr &
I,
3674 assert(
I.getOpcode() == TargetOpcode::G_JUMP_TABLE &&
"Expected jump table");
3675 assert(
I.getOperand(1).isJTI() &&
"Jump table op should have a JTI!");
3677 Register DstReg =
I.getOperand(0).getReg();
3678 unsigned JTI =
I.getOperand(1).getIndex();
3681 MIB.
buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3684 I.eraseFromParent();
3688bool AArch64InstructionSelector::selectTLSGlobalValue(
3690 if (!STI.isTargetMachO())
3695 const auto &GlobalOp =
I.getOperand(1);
3696 assert(GlobalOp.getOffset() == 0 &&
3697 "Shouldn't have an offset on TLS globals!");
3701 MIB.
buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3704 auto Load = MIB.
buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3705 {LoadGOT.getReg(0)})
3716 assert(Opcode == AArch64::BLR);
3717 Opcode = AArch64::BLRAAZ;
3726 RBI.constrainGenericRegister(
I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3728 I.eraseFromParent();
3732MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3735 auto Undef = MIRBuilder.
buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3737 auto BuildFn = [&](
unsigned SubregIndex) {
3741 .addImm(SubregIndex);
3749 return BuildFn(AArch64::bsub);
3751 return BuildFn(AArch64::hsub);
3753 return BuildFn(AArch64::ssub);
3755 return BuildFn(AArch64::dsub);
3762AArch64InstructionSelector::emitNarrowVector(
Register DstReg,
Register SrcReg,
3765 LLT DstTy =
MRI.getType(DstReg);
3767 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(SrcReg,
MRI,
TRI));
3768 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3775 if (
SubReg != AArch64::ssub &&
SubReg != AArch64::dsub) {
3781 .addReg(SrcReg, 0,
SubReg);
3782 RBI.constrainGenericRegister(DstReg, *RC,
MRI);
3786bool AArch64InstructionSelector::selectMergeValues(
3788 assert(
I.getOpcode() == TargetOpcode::G_MERGE_VALUES &&
"unexpected opcode");
3789 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
3790 const LLT SrcTy =
MRI.getType(
I.getOperand(1).getReg());
3794 if (
I.getNumOperands() != 3)
3801 Register DstReg =
I.getOperand(0).getReg();
3802 Register Src1Reg =
I.getOperand(1).getReg();
3803 Register Src2Reg =
I.getOperand(2).getReg();
3804 auto Tmp = MIB.
buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3805 MachineInstr *InsMI = emitLaneInsert(std::nullopt, Tmp.getReg(0), Src1Reg,
3810 Src2Reg, 1, RB, MIB);
3815 I.eraseFromParent();
3819 if (RB.
getID() != AArch64::GPRRegBankID)
3825 auto *DstRC = &AArch64::GPR64RegClass;
3826 Register SubToRegDef =
MRI.createVirtualRegister(DstRC);
3828 TII.get(TargetOpcode::SUBREG_TO_REG))
3831 .
addUse(
I.getOperand(1).getReg())
3832 .
addImm(AArch64::sub_32);
3833 Register SubToRegDef2 =
MRI.createVirtualRegister(DstRC);
3836 TII.get(TargetOpcode::SUBREG_TO_REG))
3839 .
addUse(
I.getOperand(2).getReg())
3840 .
addImm(AArch64::sub_32);
3842 *
BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::BFMXri))
3843 .
addDef(
I.getOperand(0).getReg())
3851 I.eraseFromParent();
3856 const unsigned EltSize) {
3861 CopyOpc = AArch64::DUPi8;
3862 ExtractSubReg = AArch64::bsub;
3865 CopyOpc = AArch64::DUPi16;
3866 ExtractSubReg = AArch64::hsub;
3869 CopyOpc = AArch64::DUPi32;
3870 ExtractSubReg = AArch64::ssub;
3873 CopyOpc = AArch64::DUPi64;
3874 ExtractSubReg = AArch64::dsub;
3878 LLVM_DEBUG(
dbgs() <<
"Elt size '" << EltSize <<
"' unsupported.\n");
3884MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3885 std::optional<Register> DstReg,
const RegisterBank &DstRB,
LLT ScalarTy,
3888 unsigned CopyOpc = 0;
3889 unsigned ExtractSubReg = 0;
3892 dbgs() <<
"Couldn't determine lane copy opcode for instruction.\n");
3897 getRegClassForTypeOnBank(ScalarTy, DstRB,
true);
3899 LLVM_DEBUG(
dbgs() <<
"Could not determine destination register class.\n");
3904 const LLT &VecTy =
MRI.getType(VecReg);
3906 getRegClassForTypeOnBank(VecTy, VecRB,
true);
3908 LLVM_DEBUG(
dbgs() <<
"Could not determine source register class.\n");
3915 DstReg =
MRI.createVirtualRegister(DstRC);
3918 auto Copy = MIRBuilder.
buildInstr(TargetOpcode::COPY, {*DstReg}, {})
3919 .addReg(VecReg, 0, ExtractSubReg);
3920 RBI.constrainGenericRegister(*DstReg, *DstRC,
MRI);
3929 VecTy.
getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
3930 if (!ScalarToVector)
3936 MIRBuilder.
buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
3940 RBI.constrainGenericRegister(*DstReg, *DstRC,
MRI);
3944bool AArch64InstructionSelector::selectExtractElt(
3946 assert(
I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
3947 "unexpected opcode!");
3948 Register DstReg =
I.getOperand(0).getReg();
3949 const LLT NarrowTy =
MRI.getType(DstReg);
3950 const Register SrcReg =
I.getOperand(1).getReg();
3951 const LLT WideTy =
MRI.getType(SrcReg);
3954 "source register size too small!");
3955 assert(!NarrowTy.
isVector() &&
"cannot extract vector into vector!");
3959 assert(LaneIdxOp.
isReg() &&
"Lane index operand was not a register?");
3961 if (RBI.getRegBank(DstReg,
MRI,
TRI)->getID() != AArch64::FPRRegBankID) {
3970 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
3974 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
3979 I.eraseFromParent();
3983bool AArch64InstructionSelector::selectSplitVectorUnmerge(
3985 unsigned NumElts =
I.getNumOperands() - 1;
3986 Register SrcReg =
I.getOperand(NumElts).getReg();
3987 const LLT NarrowTy =
MRI.getType(
I.getOperand(0).getReg());
3988 const LLT SrcTy =
MRI.getType(SrcReg);
3990 assert(NarrowTy.
isVector() &&
"Expected an unmerge into vectors");
3992 LLVM_DEBUG(
dbgs() <<
"Unexpected vector type for vec split unmerge");
3999 *RBI.getRegBank(
I.getOperand(0).getReg(),
MRI,
TRI);
4000 for (
unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4001 Register Dst =
I.getOperand(OpIdx).getReg();
4003 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4007 I.eraseFromParent();
4011bool AArch64InstructionSelector::selectUnmergeValues(
MachineInstr &
I,
4013 assert(
I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
4014 "unexpected opcode");
4017 if (RBI.getRegBank(
I.getOperand(0).getReg(),
MRI,
TRI)->getID() !=
4018 AArch64::FPRRegBankID ||
4019 RBI.getRegBank(
I.getOperand(1).getReg(),
MRI,
TRI)->getID() !=
4020 AArch64::FPRRegBankID) {
4021 LLVM_DEBUG(
dbgs() <<
"Unmerging vector-to-gpr and scalar-to-scalar "
4022 "currently unsupported.\n");
4028 unsigned NumElts =
I.getNumOperands() - 1;
4029 Register SrcReg =
I.getOperand(NumElts).getReg();
4030 const LLT NarrowTy =
MRI.getType(
I.getOperand(0).getReg());
4031 const LLT WideTy =
MRI.getType(SrcReg);
4034 "can only unmerge from vector or s128 types!");
4036 "source register size too small!");
4039 return selectSplitVectorUnmerge(
I,
MRI);
4043 unsigned CopyOpc = 0;
4044 unsigned ExtractSubReg = 0;
4055 unsigned NumInsertRegs = NumElts - 1;
4067 *RBI.getRegBank(SrcReg,
MRI,
TRI));
4071 assert(Found &&
"expected to find last operand's subeg idx");
4072 for (
unsigned Idx = 0;
Idx < NumInsertRegs; ++
Idx) {
4073 Register ImpDefReg =
MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4075 *
BuildMI(
MBB,
I,
I.getDebugLoc(),
TII.get(TargetOpcode::IMPLICIT_DEF),
4079 Register InsertReg =
MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4082 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4099 Register CopyTo =
I.getOperand(0).getReg();
4100 auto FirstCopy = MIB.
buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4101 .addReg(InsertRegs[0], 0, ExtractSubReg);
4105 unsigned LaneIdx = 1;
4106 for (
Register InsReg : InsertRegs) {
4107 Register CopyTo =
I.getOperand(LaneIdx).getReg();
4120 MRI.getRegClassOrNull(
I.getOperand(1).getReg());
4126 RBI.constrainGenericRegister(CopyTo, *RC,
MRI);
4127 I.eraseFromParent();
4131bool AArch64InstructionSelector::selectConcatVectors(
4133 assert(
I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4134 "Unexpected opcode");
4135 Register Dst =
I.getOperand(0).getReg();
4136 Register Op1 =
I.getOperand(1).getReg();
4137 Register Op2 =
I.getOperand(2).getReg();
4138 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4141 I.eraseFromParent();
4146AArch64InstructionSelector::emitConstantPoolEntry(
const Constant *CPVal,
4155MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4163 RC = &AArch64::FPR128RegClass;
4164 Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;
4167 RC = &AArch64::FPR64RegClass;
4168 Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;
4171 RC = &AArch64::FPR32RegClass;
4172 Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;
4175 RC = &AArch64::FPR16RegClass;
4176 Opc = AArch64::LDRHui;
4179 LLVM_DEBUG(
dbgs() <<
"Could not load from constant pool of type "
4185 auto &MF = MIRBuilder.
getMF();
4186 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4187 if (IsTiny && (
Size == 16 ||
Size == 8 ||
Size == 4)) {
4189 LoadMI = &*MIRBuilder.
buildInstr(Opc, {RC}, {}).addConstantPoolIndex(CPIdx);
4192 MIRBuilder.
buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4195 LoadMI = &*MIRBuilder.
buildInstr(Opc, {RC}, {Adrp})
4196 .addConstantPoolIndex(
4212static std::pair<unsigned, unsigned>
4214 unsigned Opc, SubregIdx;
4215 if (RB.
getID() == AArch64::GPRRegBankID) {
4217 Opc = AArch64::INSvi8gpr;
4218 SubregIdx = AArch64::bsub;
4219 }
else if (EltSize == 16) {
4220 Opc = AArch64::INSvi16gpr;
4221 SubregIdx = AArch64::ssub;
4222 }
else if (EltSize == 32) {
4223 Opc = AArch64::INSvi32gpr;
4224 SubregIdx = AArch64::ssub;
4225 }
else if (EltSize == 64) {
4226 Opc = AArch64::INSvi64gpr;
4227 SubregIdx = AArch64::dsub;
4233 Opc = AArch64::INSvi8lane;
4234 SubregIdx = AArch64::bsub;
4235 }
else if (EltSize == 16) {
4236 Opc = AArch64::INSvi16lane;
4237 SubregIdx = AArch64::hsub;
4238 }
else if (EltSize == 32) {
4239 Opc = AArch64::INSvi32lane;
4240 SubregIdx = AArch64::ssub;
4241 }
else if (EltSize == 64) {
4242 Opc = AArch64::INSvi64lane;
4243 SubregIdx = AArch64::dsub;
4248 return std::make_pair(Opc, SubregIdx);
4252 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4254 const ComplexRendererFns &RenderFns)
const {
4255 assert(Opcode &&
"Expected an opcode?");
4257 "Function should only be used to produce selected instructions!");
4258 auto MI = MIRBuilder.
buildInstr(Opcode, DstOps, SrcOps);
4260 for (
auto &Fn : *RenderFns)
4267 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4271 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4272 auto Ty =
MRI.getType(
LHS.getReg());
4275 assert((
Size == 32 ||
Size == 64) &&
"Expected a 32-bit or 64-bit type only");
4276 bool Is32Bit =
Size == 32;
4279 if (
auto Fns = selectArithImmed(RHS))
4280 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {
LHS},
4284 if (
auto Fns = selectNegArithImmed(RHS))
4285 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {
LHS},
4289 if (
auto Fns = selectArithExtendedRegister(RHS))
4290 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {
LHS},
4294 if (
auto Fns = selectShiftedRegister(RHS))
4295 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {
LHS},
4297 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {
LHS,
RHS},
4305 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4306 {{AArch64::ADDXri, AArch64::ADDWri},
4307 {AArch64::ADDXrs, AArch64::ADDWrs},
4308 {AArch64::ADDXrr, AArch64::ADDWrr},
4309 {AArch64::SUBXri, AArch64::SUBWri},
4310 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4311 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4318 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4319 {{AArch64::ADDSXri, AArch64::ADDSWri},
4320 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4321 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4322 {AArch64::SUBSXri, AArch64::SUBSWri},
4323 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4324 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4331 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4332 {{AArch64::SUBSXri, AArch64::SUBSWri},
4333 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4334 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4335 {AArch64::ADDSXri, AArch64::ADDSWri},
4336 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4337 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4344 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4346 bool Is32Bit = (
MRI->getType(
LHS.getReg()).getSizeInBits() == 32);
4347 static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr};
4348 return emitInstr(OpcTable[Is32Bit], {Dst}, {
LHS,
RHS}, MIRBuilder);
4355 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4357 bool Is32Bit = (
MRI->getType(
LHS.getReg()).getSizeInBits() == 32);
4358 static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr};
4359 return emitInstr(OpcTable[Is32Bit], {Dst}, {
LHS,
RHS}, MIRBuilder);
4366 bool Is32Bit = (
MRI.getType(
LHS.getReg()).getSizeInBits() == 32);
4367 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4368 return emitADDS(
MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4374 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4378 bool Is32Bit = (
RegSize == 32);
4379 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4380 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4381 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4385 int64_t
Imm = ValAndVReg->Value.getSExtValue();
4388 auto TstMI = MIRBuilder.
buildInstr(OpcTable[0][Is32Bit], {Ty}, {
LHS});
4395 if (
auto Fns = selectLogicalShiftedRegister(RHS))
4396 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {
LHS}, MIRBuilder, Fns);
4397 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {
LHS,
RHS}, MIRBuilder);
4400MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4403 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected LHS and RHS to be registers!");
4410 assert((
Size == 32 ||
Size == 64) &&
"Expected a 32-bit or 64-bit LHS/RHS?");
4412 if (
auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4414 auto Dst =
MRI.cloneVirtualRegister(
LHS.getReg());
4415 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4418MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4422 LLT Ty =
MRI.getType(Dst);
4424 "Expected a 32-bit scalar register?");
4426 const Register ZReg = AArch64::WZR;
4431 return emitCSINC(Dst, ZReg, ZReg, InvCC1,
4437 emitCSINC(Def1Reg, ZReg, ZReg, InvCC1, MIRBuilder);
4438 emitCSINC(Def2Reg, ZReg, ZReg, InvCC2, MIRBuilder);
4439 auto OrMI = MIRBuilder.
buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4444MachineInstr *AArch64InstructionSelector::emitFPCompare(
4446 std::optional<CmpInst::Predicate> Pred)
const {
4448 LLT Ty =
MRI.getType(LHS);
4452 assert(OpSize == 16 || OpSize == 32 || OpSize == 64);
4463 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4467 ShouldUseImm =
true;
4471 unsigned CmpOpcTbl[2][3] = {
4472 {AArch64::FCMPHrr, AArch64::FCMPSrr, AArch64::FCMPDrr},
4473 {AArch64::FCMPHri, AArch64::FCMPSri, AArch64::FCMPDri}};
4475 CmpOpcTbl[ShouldUseImm][OpSize == 16 ? 0 : (OpSize == 32 ? 1 : 2)];
4487MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4496 const LLT Op1Ty =
MRI.getType(Op1);
4497 const LLT Op2Ty =
MRI.getType(Op2);
4499 if (Op1Ty != Op2Ty) {
4500 LLVM_DEBUG(
dbgs() <<
"Could not do vector concat of differing vector tys");
4503 assert(Op1Ty.
isVector() &&
"Expected a vector for vector concat");
4506 LLVM_DEBUG(
dbgs() <<
"Vector concat not supported for full size vectors");
4522 emitScalarToVector(ScalarTy.
getSizeInBits(), DstRC, Op1, MIRBuilder);
4524 emitScalarToVector(ScalarTy.
getSizeInBits(), DstRC, Op2, MIRBuilder);
4525 if (!WidenedOp1 || !WidenedOp2) {
4526 LLVM_DEBUG(
dbgs() <<
"Could not emit a vector from scalar value");
4531 unsigned InsertOpc, InsSubRegIdx;
4532 std::tie(InsertOpc, InsSubRegIdx) =
4536 Dst =
MRI.createVirtualRegister(DstRC);
4557 Size =
TRI.getRegSizeInBits(*RC);
4559 Size =
MRI.getType(Dst).getSizeInBits();
4561 assert(
Size <= 64 &&
"Expected 64 bits or less only!");
4562 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4563 unsigned Opc = OpcTable[
Size == 64];
4564 auto CSINC = MIRBuilder.
buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
4572 unsigned Opcode =
I.getOpcode();
4576 bool NeedsNegatedCarry =
4577 (Opcode == TargetOpcode::G_USUBE || Opcode == TargetOpcode::G_SSUBE);
4587 if (SrcMI ==
I.getPrevNode()) {
4588 if (
auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(SrcMI)) {
4589 bool ProducesNegatedCarry = CarrySrcMI->isSub();
4590 if (NeedsNegatedCarry == ProducesNegatedCarry &&
4591 CarrySrcMI->isUnsigned() &&
4592 CarrySrcMI->getCarryOutReg() == CarryReg &&
4593 selectAndRestoreState(*SrcMI))
4598 Register DeadReg =
MRI->createVirtualRegister(&AArch64::GPR32RegClass);
4600 if (NeedsNegatedCarry) {
4603 return emitInstr(AArch64::SUBSWrr, {DeadReg}, {ZReg, CarryReg}, MIB);
4607 auto Fns = select12BitValueWithLeftShift(1);
4608 return emitInstr(AArch64::SUBSWri, {DeadReg}, {CarryReg}, MIB, Fns);
4611bool AArch64InstructionSelector::selectOverflowOp(
MachineInstr &
I,
4613 auto &CarryMI = cast<GAddSubCarryOut>(
I);
4615 if (
auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(&
I)) {
4617 emitCarryIn(
I, CarryInMI->getCarryInReg());
4621 auto OpAndCC = emitOverflowOp(
I.getOpcode(), CarryMI.getDstReg(),
4622 CarryMI.getLHS(), CarryMI.getRHS(), MIB);
4624 Register CarryOutReg = CarryMI.getCarryOutReg();
4627 if (!
MRI.use_nodbg_empty(CarryOutReg)) {
4633 emitCSINC(CarryOutReg, ZReg, ZReg,
4634 getInvertedCondCode(OpAndCC.second), MIB);
4637 I.eraseFromParent();
4641std::pair<MachineInstr *, AArch64CC::CondCode>
4642AArch64InstructionSelector::emitOverflowOp(
unsigned Opcode,
Register Dst,
4649 case TargetOpcode::G_SADDO:
4650 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::VS);
4651 case TargetOpcode::G_UADDO:
4652 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::HS);
4653 case TargetOpcode::G_SSUBO:
4654 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::VS);
4655 case TargetOpcode::G_USUBO:
4656 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::LO);
4657 case TargetOpcode::G_SADDE:
4658 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::VS);
4659 case TargetOpcode::G_UADDE:
4660 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::HS);
4661 case TargetOpcode::G_SSUBE:
4662 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::VS);
4663 case TargetOpcode::G_USUBE:
4664 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::LO);
4684 unsigned Depth = 0) {
4685 if (!
MRI.hasOneNonDBGUse(Val))
4689 if (isa<GAnyCmp>(ValDef)) {
4691 MustBeFirst =
false;
4697 if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
4698 bool IsOR = Opcode == TargetOpcode::G_OR;
4710 if (MustBeFirstL && MustBeFirstR)
4716 if (!CanNegateL && !CanNegateR)
4720 CanNegate = WillNegate && CanNegateL && CanNegateR;
4723 MustBeFirst = !CanNegate;
4725 assert(Opcode == TargetOpcode::G_AND &&
"Must be G_AND");
4728 MustBeFirst = MustBeFirstL || MustBeFirstR;
4735MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
4740 LLT OpTy =
MRI.getType(LHS);
4742 std::optional<ValueAndVReg>
C;
4746 if (!
C ||
C->Value.sgt(31) ||
C->Value.slt(-31))
4747 CCmpOpc = OpTy.
getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
4748 else if (
C->Value.ule(31))
4749 CCmpOpc = OpTy.
getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi;
4751 CCmpOpc = OpTy.
getSizeInBits() == 32 ? AArch64::CCMNWi : AArch64::CCMNXi;
4757 assert(STI.hasFullFP16() &&
"Expected Full FP16 for fp16 comparisons");
4758 CCmpOpc = AArch64::FCCMPHrr;
4761 CCmpOpc = AArch64::FCCMPSrr;
4764 CCmpOpc = AArch64::FCCMPDrr;
4774 if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi)
4775 CCmp.
addImm(
C->Value.getZExtValue());
4776 else if (CCmpOpc == AArch64::CCMNWi || CCmpOpc == AArch64::CCMNXi)
4777 CCmp.
addImm(
C->Value.abs().getZExtValue());
4785MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
4792 if (
auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {
4798 if (isa<GICmp>(Cmp)) {
4809 ExtraCmp = emitFPCompare(LHS, RHS, MIB,
CC);
4820 auto Dst =
MRI.cloneVirtualRegister(LHS);
4821 if (isa<GICmp>(Cmp))
4822 return emitSUBS(Dst,
Cmp->getOperand(2),
Cmp->getOperand(3), MIB);
4823 return emitFPCompare(
Cmp->getOperand(2).getReg(),
4824 Cmp->getOperand(3).getReg(), MIB);
4829 assert(
MRI.hasOneNonDBGUse(Val) &&
"Valid conjunction/disjunction tree");
4831 bool IsOR = Opcode == TargetOpcode::G_OR;
4837 assert(ValidL &&
"Valid conjunction/disjunction tree");
4844 assert(ValidR &&
"Valid conjunction/disjunction tree");
4849 assert(!MustBeFirstR &&
"Valid conjunction/disjunction tree");
4858 bool NegateAfterAll;
4859 if (Opcode == TargetOpcode::G_OR) {
4862 assert(CanNegateR &&
"at least one side must be negatable");
4863 assert(!MustBeFirstR &&
"invalid conjunction/disjunction tree");
4867 NegateAfterR =
true;
4870 NegateR = CanNegateR;
4871 NegateAfterR = !CanNegateR;
4874 NegateAfterAll = !Negate;
4876 assert(Opcode == TargetOpcode::G_AND &&
4877 "Valid conjunction/disjunction tree");
4878 assert(!Negate &&
"Valid conjunction/disjunction tree");
4882 NegateAfterR =
false;
4883 NegateAfterAll =
false;
4899MachineInstr *AArch64InstructionSelector::emitConjunction(
4901 bool DummyCanNegate;
4902 bool DummyMustBeFirst;
4909bool AArch64InstructionSelector::tryOptSelectConjunction(
GSelect &SelI,
4921bool AArch64InstructionSelector::tryOptSelect(
GSelect &
I) {
4945 if (!
MRI.hasOneNonDBGUse(CondDefReg)) {
4947 for (
const MachineInstr &UI :
MRI.use_nodbg_instructions(CondDefReg)) {
4950 if (UI.getOpcode() != TargetOpcode::G_SELECT)
4956 unsigned CondOpc = CondDef->
getOpcode();
4957 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
4958 if (tryOptSelectConjunction(
I, *CondDef))
4964 if (CondOpc == TargetOpcode::G_ICMP) {
4992 emitSelect(
I.getOperand(0).getReg(),
I.getOperand(2).getReg(),
4993 I.getOperand(3).getReg(), CondCode, MIB);
4994 I.eraseFromParent();
4998MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
5002 "Unexpected MachineOperand");
5039 return emitCMN(LHS, RHSDef->
getOperand(2), MIRBuilder);
5050 LHSDef->
getOpcode() == TargetOpcode::G_AND) {
5053 if (!ValAndVReg || ValAndVReg->Value != 0)
5063bool AArch64InstructionSelector::selectShuffleVector(
5065 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
5066 Register Src1Reg =
I.getOperand(1).getReg();
5067 const LLT Src1Ty =
MRI.getType(Src1Reg);
5068 Register Src2Reg =
I.getOperand(2).getReg();
5069 const LLT Src2Ty =
MRI.getType(Src2Reg);
5080 LLVM_DEBUG(
dbgs() <<
"Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
5087 for (
int Val : Mask) {
5090 Val = Val < 0 ? 0 : Val;
5091 for (
unsigned Byte = 0;
Byte < BytesPerElt; ++
Byte) {
5109 emitVectorConcat(std::nullopt, Src1Reg, Src2Reg, MIB);
5116 IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
5120 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
5126 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
5127 RBI.constrainGenericRegister(
Copy.getReg(0), AArch64::FPR64RegClass,
MRI);
5128 I.eraseFromParent();
5136 auto TBL2 = MIB.
buildInstr(AArch64::TBLv16i8Two, {
I.getOperand(0)},
5139 I.eraseFromParent();
5143MachineInstr *AArch64InstructionSelector::emitLaneInsert(
5153 DstReg =
MRI.createVirtualRegister(DstRC);
5155 unsigned EltSize =
MRI.getType(EltReg).getSizeInBits();
5158 if (RB.
getID() == AArch64::FPRRegBankID) {
5159 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
5160 InsElt = MIRBuilder.
buildInstr(Opc, {*DstReg}, {SrcReg})
5162 .
addUse(InsSub->getOperand(0).getReg())
5165 InsElt = MIRBuilder.
buildInstr(Opc, {*DstReg}, {SrcReg})
5174bool AArch64InstructionSelector::selectUSMovFromExtend(
5176 if (
MI.getOpcode() != TargetOpcode::G_SEXT &&
5177 MI.getOpcode() != TargetOpcode::G_ZEXT &&
5178 MI.getOpcode() != TargetOpcode::G_ANYEXT)
5180 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SEXT;
5181 const Register DefReg =
MI.getOperand(0).getReg();
5182 const LLT DstTy =
MRI.getType(DefReg);
5185 if (DstSize != 32 && DstSize != 64)
5189 MI.getOperand(1).getReg(),
MRI);
5195 const LLT &VecTy =
MRI.getType(Src0);
5198 const MachineInstr *ScalarToVector = emitScalarToVector(
5199 VecTy.
getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
5200 assert(ScalarToVector &&
"Didn't expect emitScalarToVector to fail!");
5206 Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
5208 Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
5210 Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
5212 Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
5214 Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
5223 if (DstSize == 64 && !IsSigned) {
5224 Register NewReg =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
5225 MIB.
buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
5226 ExtI = MIB.
buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
5229 .
addImm(AArch64::sub_32);
5230 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
MRI);
5232 ExtI = MIB.
buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
5235 MI.eraseFromParent();
5239MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8(
5242 if (DstSize == 128) {
5243 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5245 Op = AArch64::MOVIv16b_ns;
5247 Op = AArch64::MOVIv8b_ns;
5254 auto Mov = Builder.
buildInstr(
Op, {Dst}, {}).addImm(Val);
5261MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm16(
5266 if (DstSize == 128) {
5267 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5269 Op = Inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16;
5271 Op = Inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16;
5291MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm32(
5296 if (DstSize == 128) {
5297 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5299 Op = Inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32;
5301 Op = Inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32;
5327MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64(
5331 if (DstSize == 128) {
5332 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5334 Op = AArch64::MOVIv2d_ns;
5336 Op = AArch64::MOVID;
5342 auto Mov = Builder.
buildInstr(
Op, {Dst}, {}).addImm(Val);
5349MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm321s(
5354 if (DstSize == 128) {
5355 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5357 Op = Inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl;
5359 Op = Inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl;
5379MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(
5383 bool IsWide =
false;
5384 if (DstSize == 128) {
5385 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5387 Op = AArch64::FMOVv4f32_ns;
5390 Op = AArch64::FMOVv2f32_ns;
5399 Op = AArch64::FMOVv2f64_ns;
5403 auto Mov = Builder.
buildInstr(
Op, {Dst}, {}).addImm(Val);
5408bool AArch64InstructionSelector::selectIndexedExtLoad(
5410 auto &ExtLd = cast<GIndexedAnyExtLoad>(
MI);
5412 Register WriteBack = ExtLd.getWritebackReg();
5415 LLT Ty =
MRI.getType(Dst);
5417 unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits();
5418 bool IsPre = ExtLd.isPre();
5419 bool IsSExt = isa<GIndexedSExtLoad>(ExtLd);
5420 bool InsertIntoXReg =
false;
5428 if (MemSizeBits == 8) {
5431 Opc = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
5433 Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
5434 NewLdDstTy = IsDst64 ? s64 : s32;
5436 Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
5437 InsertIntoXReg = IsDst64;
5440 }
else if (MemSizeBits == 16) {
5443 Opc = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
5445 Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
5446 NewLdDstTy = IsDst64 ? s64 : s32;
5448 Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
5449 InsertIntoXReg = IsDst64;
5452 }
else if (MemSizeBits == 32) {
5454 Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
5457 Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
5458 InsertIntoXReg = IsDst64;
5465 if (RBI.getRegBank(Dst,
MRI,
TRI)->getID() == AArch64::FPRRegBankID)
5473 .addImm(Cst->getSExtValue());
5478 if (InsertIntoXReg) {
5480 auto SubToReg = MIB.
buildInstr(TargetOpcode::SUBREG_TO_REG, {Dst}, {})
5483 .
addImm(AArch64::sub_32);
5484 RBI.constrainGenericRegister(SubToReg.getReg(0), AArch64::GPR64RegClass,
5490 MI.eraseFromParent();
5495bool AArch64InstructionSelector::selectIndexedLoad(
MachineInstr &
MI,
5497 auto &Ld = cast<GIndexedLoad>(
MI);
5499 Register WriteBack = Ld.getWritebackReg();
5502 assert(
MRI.getType(Dst).getSizeInBits() <= 128 &&
5503 "Unexpected type for indexed load");
5504 unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes();
5506 if (MemSize <
MRI.getType(Dst).getSizeInBytes())
5507 return selectIndexedExtLoad(
MI,
MRI);
5511 static constexpr unsigned GPROpcodes[] = {
5512 AArch64::LDRBBpre, AArch64::LDRHHpre, AArch64::LDRWpre,
5514 static constexpr unsigned FPROpcodes[] = {
5515 AArch64::LDRBpre, AArch64::LDRHpre, AArch64::LDRSpre, AArch64::LDRDpre,
5517 if (RBI.getRegBank(Dst,
MRI,
TRI)->getID() == AArch64::FPRRegBankID)
5518 Opc = FPROpcodes[
Log2_32(MemSize)];
5520 Opc = GPROpcodes[
Log2_32(MemSize)];
5522 static constexpr unsigned GPROpcodes[] = {
5523 AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,
5525 static constexpr unsigned FPROpcodes[] = {
5526 AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,
5527 AArch64::LDRDpost, AArch64::LDRQpost};
5528 if (RBI.getRegBank(Dst,
MRI,
TRI)->getID() == AArch64::FPRRegBankID)
5529 Opc = FPROpcodes[
Log2_32(MemSize)];
5531 Opc = GPROpcodes[
Log2_32(MemSize)];
5537 MIB.
buildInstr(Opc, {WriteBack, Dst}, {
Base}).addImm(Cst->getSExtValue());
5540 MI.eraseFromParent();
5544bool AArch64InstructionSelector::selectIndexedStore(
GIndexedStore &
I,
5550 LLT ValTy =
MRI.getType(Val);
5555 static constexpr unsigned GPROpcodes[] = {
5556 AArch64::STRBBpre, AArch64::STRHHpre, AArch64::STRWpre,
5558 static constexpr unsigned FPROpcodes[] = {
5559 AArch64::STRBpre, AArch64::STRHpre, AArch64::STRSpre, AArch64::STRDpre,
5562 if (RBI.getRegBank(Val,
MRI,
TRI)->getID() == AArch64::FPRRegBankID)
5567 static constexpr unsigned GPROpcodes[] = {
5568 AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,
5570 static constexpr unsigned FPROpcodes[] = {
5571 AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost,
5572 AArch64::STRDpost, AArch64::STRQpost};
5574 if (RBI.getRegBank(Val,
MRI,
TRI)->getID() == AArch64::FPRRegBankID)
5584 MIB.
buildInstr(Opc, {Dst}, {Val,
Base}).addImm(Cst->getSExtValue());
5585 Str.cloneMemRefs(
I);
5587 I.eraseFromParent();
5595 LLT DstTy =
MRI.getType(Dst);
5598 if (DstSize == 128) {
5600 MIRBuilder.
buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
5605 if (DstSize == 64) {
5608 .
buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
5611 .addReg(Mov.getReg(0), 0, AArch64::dsub);
5612 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass,
MRI);
5645 if (
auto *NewOp = TryMOVIWithBits(DefBits))
5649 auto TryWithFNeg = [&](
APInt DefBits,
int NumBits,
5653 APInt NegBits(DstSize, 0);
5654 unsigned NumElts = DstSize / NumBits;
5655 for (
unsigned i = 0; i < NumElts; i++)
5656 NegBits |= Neg << (NumBits * i);
5657 NegBits = DefBits ^ NegBits;
5661 if (
auto *NewOp = TryMOVIWithBits(NegBits)) {
5662 Register NewDst =
MRI.createVirtualRegister(&AArch64::FPR128RegClass);
5664 return MIRBuilder.
buildInstr(NegOpc, {Dst}, {NewDst});
5669 if ((R = TryWithFNeg(DefBits, 32, AArch64::FNEGv4f32)) ||
5670 (R = TryWithFNeg(DefBits, 64, AArch64::FNEGv2f64)) ||
5671 (STI.hasFullFP16() &&
5672 (R = TryWithFNeg(DefBits, 16, AArch64::FNEGv8f16))))
5678 LLVM_DEBUG(
dbgs() <<
"Could not generate cp load for constant vector!");
5682 auto Copy = MIRBuilder.
buildCopy(Dst, CPLoad->getOperand(0));
5683 RBI.constrainGenericRegister(
5684 Dst, *
MRI.getRegClass(CPLoad->getOperand(0).getReg()),
MRI);
5688bool AArch64InstructionSelector::tryOptConstantBuildVec(
5690 assert(
I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5692 assert(DstSize <= 128 &&
"Unexpected build_vec type!");
5698 for (
unsigned Idx = 1;
Idx <
I.getNumOperands(); ++
Idx) {
5704 const_cast<ConstantInt *
>(OpMI->getOperand(1).getCImm()));
5705 else if ((OpMI =
getOpcodeDef(TargetOpcode::G_FCONSTANT,
5706 I.getOperand(
Idx).getReg(),
MRI)))
5708 const_cast<ConstantFP *
>(OpMI->getOperand(1).getFPImm()));
5713 if (!emitConstantVector(
I.getOperand(0).getReg(), CV, MIB,
MRI))
5715 I.eraseFromParent();
5719bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5725 Register Dst =
I.getOperand(0).getReg();
5726 Register EltReg =
I.getOperand(1).getReg();
5727 LLT EltTy =
MRI.getType(EltReg);
5735 return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(), MRI);
5743 getRegClassForTypeOnBank(
MRI.getType(Dst), DstRB);
5748 auto SubregToReg = MIB.
buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
5752 I.eraseFromParent();
5754 return RBI.constrainGenericRegister(Dst, *DstRC,
MRI);
5757bool AArch64InstructionSelector::selectBuildVector(
MachineInstr &
I,
5759 assert(
I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5762 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
5763 const LLT EltTy =
MRI.getType(
I.getOperand(1).getReg());
5766 if (tryOptConstantBuildVec(
I, DstTy,
MRI))
5768 if (tryOptBuildVecToSubregToReg(
I,
MRI))
5771 if (EltSize != 8 && EltSize != 16 && EltSize != 32 && EltSize != 64)
5778 I.getOperand(1).getReg(), MIB);
5788 for (
unsigned i = 2, e = DstSize / EltSize + 1; i <
e; ++i) {
5791 Register OpReg =
I.getOperand(i).getReg();
5793 if (!getOpcodeDef<GImplicitDef>(OpReg,
MRI)) {
5794 PrevMI = &*emitLaneInsert(std::nullopt, DstVec, OpReg, i - 1, RB, MIB);
5801 if (DstSize < 128) {
5804 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec,
MRI,
TRI));
5807 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5815 if (
SubReg != AArch64::ssub &&
SubReg != AArch64::dsub) {
5816 LLVM_DEBUG(
dbgs() <<
"Unsupported destination size! (" << DstSize
5822 Register DstReg =
I.getOperand(0).getReg();
5824 MIB.
buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0,
SubReg);
5827 RBI.constrainGenericRegister(DstReg, *RC,
MRI);
5845 if (PrevMI == ScalarToVec && DstReg.
isVirtual()) {
5847 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec,
MRI,
TRI));
5848 RBI.constrainGenericRegister(DstReg, *RC,
MRI);
5852 I.eraseFromParent();
5856bool AArch64InstructionSelector::selectVectorLoadIntrinsic(
unsigned Opc,
5859 assert(
I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5860 assert(Opc &&
"Expected an opcode?");
5861 assert(NumVecs > 1 && NumVecs < 5 &&
"Only support 2, 3, or 4 vectors");
5863 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
5866 "Destination must be 64 bits or 128 bits?");
5867 unsigned SubReg =
Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
5868 auto Ptr =
I.getOperand(
I.getNumOperands() - 1).getReg();
5869 assert(
MRI.getType(
Ptr).isPointer() &&
"Expected a pointer type?");
5871 Load.cloneMemRefs(
I);
5873 Register SelectedLoadDst =
Load->getOperand(0).getReg();
5874 for (
unsigned Idx = 0;
Idx < NumVecs; ++
Idx) {
5875 auto Vec = MIB.
buildInstr(TargetOpcode::COPY, {
I.getOperand(
Idx)}, {})
5876 .addReg(SelectedLoadDst, 0,
SubReg +
Idx);
5885bool AArch64InstructionSelector::selectVectorLoadLaneIntrinsic(
5887 assert(
I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5888 assert(Opc &&
"Expected an opcode?");
5889 assert(NumVecs > 1 && NumVecs < 5 &&
"Only support 2, 3, or 4 vectors");
5891 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
5894 auto FirstSrcRegIt =
I.operands_begin() + NumVecs + 1;
5896 std::transform(FirstSrcRegIt, FirstSrcRegIt + NumVecs, Regs.
begin(),
5897 [](
auto MO) { return MO.getReg(); });
5901 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
5916 .
addImm(LaneNo->getZExtValue())
5918 Load.cloneMemRefs(
I);
5920 Register SelectedLoadDst =
Load->getOperand(0).getReg();
5921 unsigned SubReg = AArch64::qsub0;
5922 for (
unsigned Idx = 0;
Idx < NumVecs; ++
Idx) {
5923 auto Vec = MIB.
buildInstr(TargetOpcode::COPY,
5924 {Narrow ?
DstOp(&AArch64::FPR128RegClass)
5927 .addReg(SelectedLoadDst, 0,
SubReg +
Idx);
5932 !emitNarrowVector(
I.getOperand(
Idx).getReg(), WideReg, MIB,
MRI))
5938void AArch64InstructionSelector::selectVectorStoreIntrinsic(
MachineInstr &
I,
5942 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
5946 std::transform(
I.operands_begin() + 1,
I.operands_begin() + 1 + NumVecs,
5947 Regs.
begin(), [](
auto MO) { return MO.getReg(); });
5956bool AArch64InstructionSelector::selectVectorStoreLaneIntrinsic(
5959 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
5963 std::transform(
I.operands_begin() + 1,
I.operands_begin() + 1 + NumVecs,
5964 Regs.
begin(), [](
auto MO) { return MO.getReg(); });
5968 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
5981 .
addImm(LaneNo->getZExtValue())
5988bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
5991 unsigned IntrinID = cast<GIntrinsic>(
I).getIntrinsicID();
6002 case Intrinsic::aarch64_ldxp:
6003 case Intrinsic::aarch64_ldaxp: {
6005 IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
6006 {
I.getOperand(0).
getReg(),
I.getOperand(1).getReg()},
6012 case Intrinsic::aarch64_neon_ld1x2: {
6013 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6016 Opc = AArch64::LD1Twov8b;
6018 Opc = AArch64::LD1Twov16b;
6020 Opc = AArch64::LD1Twov4h;
6022 Opc = AArch64::LD1Twov8h;
6024 Opc = AArch64::LD1Twov2s;
6026 Opc = AArch64::LD1Twov4s;
6028 Opc = AArch64::LD1Twov2d;
6029 else if (Ty ==
S64 || Ty == P0)
6030 Opc = AArch64::LD1Twov1d;
6033 selectVectorLoadIntrinsic(Opc, 2,
I);
6036 case Intrinsic::aarch64_neon_ld1x3: {
6037 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6040 Opc = AArch64::LD1Threev8b;
6042 Opc = AArch64::LD1Threev16b;
6044 Opc = AArch64::LD1Threev4h;
6046 Opc = AArch64::LD1Threev8h;
6048 Opc = AArch64::LD1Threev2s;
6050 Opc = AArch64::LD1Threev4s;
6052 Opc = AArch64::LD1Threev2d;
6053 else if (Ty ==
S64 || Ty == P0)
6054 Opc = AArch64::LD1Threev1d;
6057 selectVectorLoadIntrinsic(Opc, 3,
I);
6060 case Intrinsic::aarch64_neon_ld1x4: {
6061 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6064 Opc = AArch64::LD1Fourv8b;
6066 Opc = AArch64::LD1Fourv16b;
6068 Opc = AArch64::LD1Fourv4h;
6070 Opc = AArch64::LD1Fourv8h;
6072 Opc = AArch64::LD1Fourv2s;
6074 Opc = AArch64::LD1Fourv4s;
6076 Opc = AArch64::LD1Fourv2d;
6077 else if (Ty ==
S64 || Ty == P0)
6078 Opc = AArch64::LD1Fourv1d;
6081 selectVectorLoadIntrinsic(Opc, 4,
I);
6084 case Intrinsic::aarch64_neon_ld2: {
6085 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6088 Opc = AArch64::LD2Twov8b;
6090 Opc = AArch64::LD2Twov16b;
6092 Opc = AArch64::LD2Twov4h;
6094 Opc = AArch64::LD2Twov8h;
6096 Opc = AArch64::LD2Twov2s;
6098 Opc = AArch64::LD2Twov4s;
6100 Opc = AArch64::LD2Twov2d;
6101 else if (Ty ==
S64 || Ty == P0)
6102 Opc = AArch64::LD1Twov1d;
6105 selectVectorLoadIntrinsic(Opc, 2,
I);
6108 case Intrinsic::aarch64_neon_ld2lane: {
6109 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6112 Opc = AArch64::LD2i8;
6114 Opc = AArch64::LD2i16;
6116 Opc = AArch64::LD2i32;
6119 Opc = AArch64::LD2i64;
6122 if (!selectVectorLoadLaneIntrinsic(Opc, 2,
I))
6126 case Intrinsic::aarch64_neon_ld2r: {
6127 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6130 Opc = AArch64::LD2Rv8b;
6132 Opc = AArch64::LD2Rv16b;
6134 Opc = AArch64::LD2Rv4h;
6136 Opc = AArch64::LD2Rv8h;
6138 Opc = AArch64::LD2Rv2s;
6140 Opc = AArch64::LD2Rv4s;
6142 Opc = AArch64::LD2Rv2d;
6143 else if (Ty ==
S64 || Ty == P0)
6144 Opc = AArch64::LD2Rv1d;
6147 selectVectorLoadIntrinsic(Opc, 2,
I);
6150 case Intrinsic::aarch64_neon_ld3: {
6151 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6154 Opc = AArch64::LD3Threev8b;
6156 Opc = AArch64::LD3Threev16b;
6158 Opc = AArch64::LD3Threev4h;
6160 Opc = AArch64::LD3Threev8h;
6162 Opc = AArch64::LD3Threev2s;
6164 Opc = AArch64::LD3Threev4s;
6166 Opc = AArch64::LD3Threev2d;
6167 else if (Ty ==
S64 || Ty == P0)
6168 Opc = AArch64::LD1Threev1d;
6171 selectVectorLoadIntrinsic(Opc, 3,
I);
6174 case Intrinsic::aarch64_neon_ld3lane: {
6175 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6178 Opc = AArch64::LD3i8;
6180 Opc = AArch64::LD3i16;
6182 Opc = AArch64::LD3i32;
6185 Opc = AArch64::LD3i64;
6188 if (!selectVectorLoadLaneIntrinsic(Opc, 3,
I))
6192 case Intrinsic::aarch64_neon_ld3r: {
6193 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6196 Opc = AArch64::LD3Rv8b;
6198 Opc = AArch64::LD3Rv16b;
6200 Opc = AArch64::LD3Rv4h;
6202 Opc = AArch64::LD3Rv8h;
6204 Opc = AArch64::LD3Rv2s;
6206 Opc = AArch64::LD3Rv4s;
6208 Opc = AArch64::LD3Rv2d;
6209 else if (Ty ==
S64 || Ty == P0)
6210 Opc = AArch64::LD3Rv1d;
6213 selectVectorLoadIntrinsic(Opc, 3,
I);
6216 case Intrinsic::aarch64_neon_ld4: {
6217 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6220 Opc = AArch64::LD4Fourv8b;
6222 Opc = AArch64::LD4Fourv16b;
6224 Opc = AArch64::LD4Fourv4h;
6226 Opc = AArch64::LD4Fourv8h;
6228 Opc = AArch64::LD4Fourv2s;
6230 Opc = AArch64::LD4Fourv4s;
6232 Opc = AArch64::LD4Fourv2d;
6233 else if (Ty ==
S64 || Ty == P0)
6234 Opc = AArch64::LD1Fourv1d;
6237 selectVectorLoadIntrinsic(Opc, 4,
I);
6240 case Intrinsic::aarch64_neon_ld4lane: {
6241 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6244 Opc = AArch64::LD4i8;
6246 Opc = AArch64::LD4i16;
6248 Opc = AArch64::LD4i32;
6251 Opc = AArch64::LD4i64;
6254 if (!selectVectorLoadLaneIntrinsic(Opc, 4,
I))
6258 case Intrinsic::aarch64_neon_ld4r: {
6259 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6262 Opc = AArch64::LD4Rv8b;
6264 Opc = AArch64::LD4Rv16b;
6266 Opc = AArch64::LD4Rv4h;
6268 Opc = AArch64::LD4Rv8h;
6270 Opc = AArch64::LD4Rv2s;
6272 Opc = AArch64::LD4Rv4s;
6274 Opc = AArch64::LD4Rv2d;
6275 else if (Ty ==
S64 || Ty == P0)
6276 Opc = AArch64::LD4Rv1d;
6279 selectVectorLoadIntrinsic(Opc, 4,
I);
6282 case Intrinsic::aarch64_neon_st1x2: {
6283 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6286 Opc = AArch64::ST1Twov8b;
6288 Opc = AArch64::ST1Twov16b;
6290 Opc = AArch64::ST1Twov4h;
6292 Opc = AArch64::ST1Twov8h;
6294 Opc = AArch64::ST1Twov2s;
6296 Opc = AArch64::ST1Twov4s;
6298 Opc = AArch64::ST1Twov2d;
6299 else if (Ty ==
S64 || Ty == P0)
6300 Opc = AArch64::ST1Twov1d;
6303 selectVectorStoreIntrinsic(
I, 2, Opc);
6306 case Intrinsic::aarch64_neon_st1x3: {
6307 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6310 Opc = AArch64::ST1Threev8b;
6312 Opc = AArch64::ST1Threev16b;
6314 Opc = AArch64::ST1Threev4h;
6316 Opc = AArch64::ST1Threev8h;
6318 Opc = AArch64::ST1Threev2s;
6320 Opc = AArch64::ST1Threev4s;
6322 Opc = AArch64::ST1Threev2d;
6323 else if (Ty ==
S64 || Ty == P0)
6324 Opc = AArch64::ST1Threev1d;
6327 selectVectorStoreIntrinsic(
I, 3, Opc);
6330 case Intrinsic::aarch64_neon_st1x4: {
6331 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6334 Opc = AArch64::ST1Fourv8b;
6336 Opc = AArch64::ST1Fourv16b;
6338 Opc = AArch64::ST1Fourv4h;
6340 Opc = AArch64::ST1Fourv8h;
6342 Opc = AArch64::ST1Fourv2s;
6344 Opc = AArch64::ST1Fourv4s;
6346 Opc = AArch64::ST1Fourv2d;
6347 else if (Ty ==
S64 || Ty == P0)
6348 Opc = AArch64::ST1Fourv1d;
6351 selectVectorStoreIntrinsic(
I, 4, Opc);
6354 case Intrinsic::aarch64_neon_st2: {
6355 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6358 Opc = AArch64::ST2Twov8b;
6360 Opc = AArch64::ST2Twov16b;
6362 Opc = AArch64::ST2Twov4h;
6364 Opc = AArch64::ST2Twov8h;
6366 Opc = AArch64::ST2Twov2s;
6368 Opc = AArch64::ST2Twov4s;
6370 Opc = AArch64::ST2Twov2d;
6371 else if (Ty ==
S64 || Ty == P0)
6372 Opc = AArch64::ST1Twov1d;
6375 selectVectorStoreIntrinsic(
I, 2, Opc);
6378 case Intrinsic::aarch64_neon_st3: {
6379 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6382 Opc = AArch64::ST3Threev8b;
6384 Opc = AArch64::ST3Threev16b;
6386 Opc = AArch64::ST3Threev4h;
6388 Opc = AArch64::ST3Threev8h;
6390 Opc = AArch64::ST3Threev2s;
6392 Opc = AArch64::ST3Threev4s;
6394 Opc = AArch64::ST3Threev2d;
6395 else if (Ty ==
S64 || Ty == P0)
6396 Opc = AArch64::ST1Threev1d;
6399 selectVectorStoreIntrinsic(
I, 3, Opc);
6402 case Intrinsic::aarch64_neon_st4: {
6403 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6406 Opc = AArch64::ST4Fourv8b;
6408 Opc = AArch64::ST4Fourv16b;
6410 Opc = AArch64::ST4Fourv4h;
6412 Opc = AArch64::ST4Fourv8h;
6414 Opc = AArch64::ST4Fourv2s;
6416 Opc = AArch64::ST4Fourv4s;
6418 Opc = AArch64::ST4Fourv2d;
6419 else if (Ty ==
S64 || Ty == P0)
6420 Opc = AArch64::ST1Fourv1d;
6423 selectVectorStoreIntrinsic(
I, 4, Opc);
6426 case Intrinsic::aarch64_neon_st2lane: {
6427 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6430 Opc = AArch64::ST2i8;
6432 Opc = AArch64::ST2i16;
6434 Opc = AArch64::ST2i32;
6437 Opc = AArch64::ST2i64;
6440 if (!selectVectorStoreLaneIntrinsic(
I, 2, Opc))
6444 case Intrinsic::aarch64_neon_st3lane: {
6445 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6448 Opc = AArch64::ST3i8;
6450 Opc = AArch64::ST3i16;
6452 Opc = AArch64::ST3i32;
6455 Opc = AArch64::ST3i64;
6458 if (!selectVectorStoreLaneIntrinsic(
I, 3, Opc))
6462 case Intrinsic::aarch64_neon_st4lane: {
6463 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6466 Opc = AArch64::ST4i8;
6468 Opc = AArch64::ST4i16;
6470 Opc = AArch64::ST4i32;
6473 Opc = AArch64::ST4i64;
6476 if (!selectVectorStoreLaneIntrinsic(
I, 4, Opc))
6480 case Intrinsic::aarch64_mops_memset_tag: {
6493 Register DstDef =
I.getOperand(0).getReg();
6495 Register DstUse =
I.getOperand(2).getReg();
6496 Register ValUse =
I.getOperand(3).getReg();
6497 Register SizeUse =
I.getOperand(4).getReg();
6504 auto Memset = MIB.
buildInstr(AArch64::MOPSMemorySetTaggingPseudo,
6505 {DstDef, SizeDef}, {DstUse, SizeUse, ValUse});
6512 I.eraseFromParent();
6516bool AArch64InstructionSelector::selectIntrinsic(
MachineInstr &
I,
6518 unsigned IntrinID = cast<GIntrinsic>(
I).getIntrinsicID();
6523 case Intrinsic::aarch64_crypto_sha1h: {
6524 Register DstReg =
I.getOperand(0).getReg();
6525 Register SrcReg =
I.getOperand(2).getReg();
6528 if (
MRI.getType(DstReg).getSizeInBits() != 32 ||
6529 MRI.getType(SrcReg).getSizeInBits() != 32)
6534 if (RBI.getRegBank(SrcReg,
MRI,
TRI)->getID() != AArch64::FPRRegBankID) {
6535 SrcReg =
MRI.createVirtualRegister(&AArch64::FPR32RegClass);
6539 RBI.constrainGenericRegister(
I.getOperand(2).getReg(),
6540 AArch64::GPR32RegClass,
MRI);
6543 if (RBI.getRegBank(DstReg,
MRI,
TRI)->getID() != AArch64::FPRRegBankID)
6544 DstReg =
MRI.createVirtualRegister(&AArch64::FPR32RegClass);
6547 auto SHA1Inst = MIB.
buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
6551 if (DstReg !=
I.getOperand(0).getReg()) {
6555 RBI.constrainGenericRegister(
I.getOperand(0).getReg(),
6556 AArch64::GPR32RegClass,
MRI);
6559 I.eraseFromParent();
6562 case Intrinsic::ptrauth_resign: {
6563 Register DstReg =
I.getOperand(0).getReg();
6564 Register ValReg =
I.getOperand(2).getReg();
6565 uint64_t AUTKey =
I.getOperand(3).getImm();
6566 Register AUTDisc =
I.getOperand(4).getReg();
6567 uint64_t PACKey =
I.getOperand(5).getImm();
6568 Register PACDisc =
I.getOperand(6).getReg();
6572 std::tie(AUTConstDiscC, AUTAddrDisc) =
6577 std::tie(PACConstDiscC, PACAddrDisc) =
6580 MIB.
buildCopy({AArch64::X16}, {ValReg});
6581 MIB.
buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6592 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass,
MRI);
6593 I.eraseFromParent();
6596 case Intrinsic::ptrauth_auth: {
6597 Register DstReg =
I.getOperand(0).getReg();
6598 Register ValReg =
I.getOperand(2).getReg();
6599 uint64_t AUTKey =
I.getOperand(3).getImm();
6600 Register AUTDisc =
I.getOperand(4).getReg();
6604 std::tie(AUTConstDiscC, AUTAddrDisc) =
6607 MIB.
buildCopy({AArch64::X16}, {ValReg});
6608 MIB.
buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6616 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass,
MRI);
6617 I.eraseFromParent();
6620 case Intrinsic::frameaddress:
6621 case Intrinsic::returnaddress: {
6625 unsigned Depth =
I.getOperand(2).getImm();
6626 Register DstReg =
I.getOperand(0).getReg();
6627 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass,
MRI);
6629 if (
Depth == 0 && IntrinID == Intrinsic::returnaddress) {
6630 if (!MFReturnAddr) {
6635 MF,
TII, AArch64::LR, AArch64::GPR64RegClass,
I.getDebugLoc());
6638 if (STI.hasPAuth()) {
6639 MIB.
buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
6646 I.eraseFromParent();
6653 Register NextFrame =
MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
6655 MIB.
buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0);
6657 FrameAddr = NextFrame;
6660 if (IntrinID == Intrinsic::frameaddress)
6665 if (STI.hasPAuth()) {
6666 Register TmpReg =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
6667 MIB.
buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
6668 MIB.
buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
6677 I.eraseFromParent();
6680 case Intrinsic::aarch64_neon_tbl2:
6681 SelectTable(
I,
MRI, 2, AArch64::TBLv8i8Two, AArch64::TBLv16i8Two,
false);
6683 case Intrinsic::aarch64_neon_tbl3:
6684 SelectTable(
I,
MRI, 3, AArch64::TBLv8i8Three, AArch64::TBLv16i8Three,
6687 case Intrinsic::aarch64_neon_tbl4:
6688 SelectTable(
I,
MRI, 4, AArch64::TBLv8i8Four, AArch64::TBLv16i8Four,
false);
6690 case Intrinsic::aarch64_neon_tbx2:
6691 SelectTable(
I,
MRI, 2, AArch64::TBXv8i8Two, AArch64::TBXv16i8Two,
true);
6693 case Intrinsic::aarch64_neon_tbx3:
6694 SelectTable(
I,
MRI, 3, AArch64::TBXv8i8Three, AArch64::TBXv16i8Three,
true);
6696 case Intrinsic::aarch64_neon_tbx4:
6697 SelectTable(
I,
MRI, 4, AArch64::TBXv8i8Four, AArch64::TBXv16i8Four,
true);
6699 case Intrinsic::swift_async_context_addr:
6708 I.eraseFromParent();
6743bool AArch64InstructionSelector::selectPtrAuthGlobalValue(
6745 Register DefReg =
I.getOperand(0).getReg();
6748 Register AddrDisc =
I.getOperand(3).getReg();
6749 uint64_t Disc =
I.getOperand(4).getImm();
6757 if (!isUInt<16>(Disc))
6759 "constant discriminator in ptrauth global out of range [0, 0xffff]");
6762 if (!STI.isTargetELF() && !STI.isTargetMachO())
6772 if (!
MRI.hasOneDef(OffsetReg))
6775 if (OffsetMI.
getOpcode() != TargetOpcode::G_CONSTANT)
6801 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
6804 "unsupported non-GOT op flags on ptrauth global reference");
6806 "unsupported non-GOT reference to weak ptrauth global");
6809 bool HasAddrDisc = !AddrDiscVal || *AddrDiscVal != 0;
6816 MIB.
buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X16}, {});
6817 MIB.
buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6818 MIB.
buildInstr(NeedsGOTLoad ? AArch64::LOADgotPAC : AArch64::MOVaddrPAC)
6821 .
addReg(HasAddrDisc ? AddrDisc : AArch64::XZR)
6825 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
MRI);
6826 I.eraseFromParent();
6838 "unsupported non-zero offset in weak ptrauth global reference");
6843 MIB.
buildInstr(AArch64::LOADauthptrstatic, {DefReg}, {})
6844 .addGlobalAddress(GV,
Offset)
6847 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
MRI);
6849 I.eraseFromParent();
6853void AArch64InstructionSelector::SelectTable(
MachineInstr &
I,
6855 unsigned NumVec,
unsigned Opc1,
6856 unsigned Opc2,
bool isExt) {
6857 Register DstReg =
I.getOperand(0).getReg();
6862 for (
unsigned i = 0; i < NumVec; i++)
6863 Regs.
push_back(
I.getOperand(i + 2 + isExt).getReg());
6866 Register IdxReg =
I.getOperand(2 + NumVec + isExt).getReg();
6874 I.eraseFromParent();
6878AArch64InstructionSelector::selectShiftA_32(
const MachineOperand &Root)
const {
6880 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6881 return std::nullopt;
6882 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
6887AArch64InstructionSelector::selectShiftB_32(
const MachineOperand &Root)
const {
6889 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6890 return std::nullopt;
6896AArch64InstructionSelector::selectShiftA_64(
const MachineOperand &Root)
const {
6898 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
6899 return std::nullopt;
6900 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
6905AArch64InstructionSelector::selectShiftB_64(
const MachineOperand &Root)
const {
6907 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
6908 return std::nullopt;
6919AArch64InstructionSelector::select12BitValueWithLeftShift(
6922 if (Immed >> 12 == 0) {
6924 }
else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
6926 Immed = Immed >> 12;
6928 return std::nullopt;
6941AArch64InstructionSelector::selectArithImmed(
MachineOperand &Root)
const {
6948 if (MaybeImmed == std::nullopt)
6949 return std::nullopt;
6950 return select12BitValueWithLeftShift(*MaybeImmed);
6956AArch64InstructionSelector::selectNegArithImmed(
MachineOperand &Root)
const {
6960 return std::nullopt;
6962 if (MaybeImmed == std::nullopt)
6963 return std::nullopt;
6970 return std::nullopt;
6975 if (
MRI.getType(Root.
getReg()).getSizeInBits() == 32)
6978 Immed = ~Immed + 1ULL;
6980 if (Immed & 0xFFFFFFFFFF000000ULL)
6981 return std::nullopt;
6983 Immed &= 0xFFFFFFULL;
6984 return select12BitValueWithLeftShift(Immed);
7001std::optional<bool> AArch64InstructionSelector::isWorthFoldingIntoAddrMode(
7003 if (
MI.getOpcode() == AArch64::G_SHL) {
7007 MI.getOperand(2).getReg(),
MRI)) {
7008 const APInt ShiftVal = ValAndVeg->Value;
7011 return !(STI.hasAddrLSLSlow14() && (ShiftVal == 1 || ShiftVal == 4));
7014 return std::nullopt;
7022bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
7024 bool IsAddrOperand)
const {
7028 if (
MRI.hasOneNonDBGUse(DefReg) ||
7029 MI.getParent()->getParent()->getFunction().hasOptSize())
7032 if (IsAddrOperand) {
7034 if (
const auto Worth = isWorthFoldingIntoAddrMode(
MI,
MRI))
7038 if (
MI.getOpcode() == AArch64::G_PTR_ADD) {
7045 if (
const auto Worth = isWorthFoldingIntoAddrMode(*OffsetInst,
MRI))
7055 return all_of(
MRI.use_nodbg_instructions(DefReg),
7071AArch64InstructionSelector::selectExtendedSHL(
7073 unsigned SizeInBytes,
bool WantsExt)
const {
7074 assert(
Base.isReg() &&
"Expected base to be a register operand");
7075 assert(
Offset.isReg() &&
"Expected offset to be a register operand");
7080 unsigned OffsetOpc = OffsetInst->
getOpcode();
7081 bool LookedThroughZExt =
false;
7082 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
7084 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
7085 return std::nullopt;
7089 LookedThroughZExt =
true;
7091 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
7092 return std::nullopt;
7095 int64_t LegalShiftVal =
Log2_32(SizeInBytes);
7096 if (LegalShiftVal == 0)
7097 return std::nullopt;
7098 if (!isWorthFoldingIntoExtendedReg(*OffsetInst,
MRI,
true))
7099 return std::nullopt;
7110 if (OffsetOpc == TargetOpcode::G_SHL)
7111 return std::nullopt;
7117 return std::nullopt;
7122 int64_t ImmVal = ValAndVReg->Value.getSExtValue();
7126 if (OffsetOpc == TargetOpcode::G_MUL) {
7127 if (!llvm::has_single_bit<uint32_t>(ImmVal))
7128 return std::nullopt;
7134 if ((ImmVal & 0x7) != ImmVal)
7135 return std::nullopt;
7139 if (ImmVal != LegalShiftVal)
7140 return std::nullopt;
7142 unsigned SignExtend = 0;
7146 if (!LookedThroughZExt) {
7148 auto Ext = getExtendTypeForInst(*ExtInst,
MRI,
true);
7150 return std::nullopt;
7155 return std::nullopt;
7161 OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
7171 MIB.addImm(SignExtend);
7185AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
7188 return std::nullopt;
7205 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd,
MRI,
true))
7206 return std::nullopt;
7212 return selectExtendedSHL(Root, PtrAdd->
getOperand(1),
7226AArch64InstructionSelector::selectAddrModeRegisterOffset(
7232 if (Gep->
getOpcode() != TargetOpcode::G_PTR_ADD)
7233 return std::nullopt;
7239 return std::nullopt;
7259AArch64InstructionSelector::selectAddrModeXRO(
MachineOperand &Root,
7260 unsigned SizeInBytes)
const {
7263 return std::nullopt;
7267 return std::nullopt;
7285 unsigned Scale =
Log2_32(SizeInBytes);
7286 int64_t ImmOff = ValAndVReg->Value.getSExtValue();
7290 if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
7291 ImmOff < (0x1000 << Scale))
7292 return std::nullopt;
7297 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
7301 if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
7307 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
7308 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
7313 return std::nullopt;
7317 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
7323 return selectAddrModeRegisterOffset(Root);
7333AArch64InstructionSelector::selectAddrModeWRO(
MachineOperand &Root,
7334 unsigned SizeInBytes)
const {
7339 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd,
MRI,
true))
7340 return std::nullopt;
7361 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->
getOperand(0),
7370 if (!isWorthFoldingIntoExtendedReg(*OffsetInst,
MRI,
true))
7371 return std::nullopt;
7375 getExtendTypeForInst(*OffsetInst,
MRI,
true);
7377 return std::nullopt;
7382 AArch64::GPR32RegClass, MIB);
7389 MIB.addImm(SignExtend);
7400AArch64InstructionSelector::selectAddrModeUnscaled(
MachineOperand &Root,
7401 unsigned Size)
const {
7406 return std::nullopt;
7408 if (!isBaseWithConstantOffset(Root,
MRI))
7409 return std::nullopt;
7414 if (!OffImm.
isReg())
7415 return std::nullopt;
7417 if (
RHS->getOpcode() != TargetOpcode::G_CONSTANT)
7418 return std::nullopt;
7422 return std::nullopt;
7425 if (RHSC >= -256 && RHSC < 256) {
7432 return std::nullopt;
7436AArch64InstructionSelector::tryFoldAddLowIntoImm(
MachineInstr &RootDef,
7439 if (RootDef.
getOpcode() != AArch64::G_ADD_LOW)
7440 return std::nullopt;
7443 return std::nullopt;
7448 return std::nullopt;
7452 return std::nullopt;
7456 return std::nullopt;
7458 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.
getTarget());
7463 MIB.addGlobalAddress(GV,
Offset,
7473AArch64InstructionSelector::selectAddrModeIndexed(
MachineOperand &Root,
7474 unsigned Size)
const {
7479 return std::nullopt;
7482 if (RootDef->
getOpcode() == TargetOpcode::G_FRAME_INDEX) {
7492 auto OpFns = tryFoldAddLowIntoImm(*RootDef,
Size,
MRI);
7497 if (isBaseWithConstantOffset(Root,
MRI)) {
7505 if ((RHSC & (
Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
7506 if (LHSDef->
getOpcode() == TargetOpcode::G_FRAME_INDEX)
7521 if (selectAddrModeUnscaled(Root,
Size))
7522 return std::nullopt;
7533 switch (
MI.getOpcode()) {
7536 case TargetOpcode::G_SHL:
7538 case TargetOpcode::G_LSHR:
7540 case TargetOpcode::G_ASHR:
7542 case TargetOpcode::G_ROTR:
7550AArch64InstructionSelector::selectShiftedRegister(
MachineOperand &Root,
7551 bool AllowROR)
const {
7553 return std::nullopt;
7562 return std::nullopt;
7564 return std::nullopt;
7565 if (!isWorthFoldingIntoExtendedReg(*ShiftInst,
MRI,
false))
7566 return std::nullopt;
7572 return std::nullopt;
7579 unsigned NumBits =
MRI.getType(ShiftReg).getSizeInBits();
7580 unsigned Val = *Immed & (NumBits - 1);
7589 unsigned Opc =
MI.getOpcode();
7592 if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
7594 if (Opc == TargetOpcode::G_SEXT)
7595 Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
7597 Size =
MI.getOperand(2).getImm();
7598 assert(
Size != 64 &&
"Extend from 64 bits?");
7611 if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
7612 unsigned Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
7613 assert(
Size != 64 &&
"Extend from 64 bits?");
7628 if (Opc != TargetOpcode::G_AND)
7647Register AArch64InstructionSelector::moveScalarRegClass(
7650 auto Ty =
MRI.getType(Reg);
7659 return Copy.getReg(0);
7665AArch64InstructionSelector::selectArithExtendedRegister(
7668 return std::nullopt;
7677 return std::nullopt;
7679 if (!isWorthFoldingIntoExtendedReg(*RootDef,
MRI,
false))
7680 return std::nullopt;
7683 if (RootDef->
getOpcode() == TargetOpcode::G_SHL) {
7688 return std::nullopt;
7689 ShiftVal = *MaybeShiftVal;
7691 return std::nullopt;
7696 return std::nullopt;
7697 Ext = getExtendTypeForInst(*ExtDef,
MRI);
7699 return std::nullopt;
7703 Ext = getExtendTypeForInst(*RootDef,
MRI);
7705 return std::nullopt;
7714 if (isDef32(*ExtInst))
7715 return std::nullopt;
7722 ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);
7726 MIB.addImm(getArithExtendImm(Ext, ShiftVal));
7731AArch64InstructionSelector::selectExtractHigh(
MachineOperand &Root)
const {
7733 return std::nullopt;
7738 while (Extract && Extract->MI->
getOpcode() == TargetOpcode::G_BITCAST &&
7739 STI.isLittleEndian())
7743 return std::nullopt;
7745 if (Extract->MI->
getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
7751 if (Extract->MI->
getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) {
7756 LaneIdx->Value.getSExtValue() == 1) {
7762 return std::nullopt;
7769 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7770 "Expected G_CONSTANT");
7771 std::optional<int64_t> CstVal =
7773 assert(CstVal &&
"Expected constant value");
7777void AArch64InstructionSelector::renderLogicalImm32(
7779 assert(
I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7780 "Expected G_CONSTANT");
7781 uint64_t CstVal =
I.getOperand(1).getCImm()->getZExtValue();
7786void AArch64InstructionSelector::renderLogicalImm64(
7788 assert(
I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7789 "Expected G_CONSTANT");
7790 uint64_t CstVal =
I.getOperand(1).getCImm()->getZExtValue();
7798 assert(
MI.getOpcode() == TargetOpcode::G_UBSANTRAP && OpIdx == 0 &&
7799 "Expected G_UBSANTRAP");
7800 MIB.
addImm(
MI.getOperand(0).getImm() | (
'U' << 8));
7806 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7807 "Expected G_FCONSTANT");
7815 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7816 "Expected G_FCONSTANT");
7824 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7825 "Expected G_FCONSTANT");
7830void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
7832 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7833 "Expected G_FCONSTANT");
7841bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
7843 if (!
MI.mayLoadOrStore())
7846 "Expected load/store to have only one mem op!");
7847 return (*
MI.memoperands_begin())->getSize() == NumBytes;
7850bool AArch64InstructionSelector::isDef32(
const MachineInstr &
MI)
const {
7852 if (
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits() != 32)
7859 switch (
MI.getOpcode()) {
7862 case TargetOpcode::COPY:
7863 case TargetOpcode::G_BITCAST:
7864 case TargetOpcode::G_TRUNC:
7865 case TargetOpcode::G_PHI:
7875 assert(
MI.getOpcode() == TargetOpcode::G_PHI &&
"Expected a G_PHI");
7878 assert(DstRB &&
"Expected PHI dst to have regbank assigned");
7889 auto *OpDef =
MRI.getVRegDef(OpReg);
7890 const LLT &Ty =
MRI.getType(OpReg);
7896 if (InsertPt != OpDefBB.
end() && InsertPt->isPHI())
7900 MRI.setRegBank(Copy.getReg(0), *DstRB);
7901 MO.setReg(Copy.getReg(0));
7910 for (
auto &BB : MF) {
7911 for (
auto &
MI : BB) {
7912 if (
MI.getOpcode() == TargetOpcode::G_PHI)
7917 for (
auto *
MI : Phis) {
7939 bool HasGPROp =
false, HasFPROp =
false;
7943 const LLT &Ty =
MRI.getType(MO.getReg());
7953 if (RB->
getID() == AArch64::GPRRegBankID)
7959 if (HasGPROp && HasFPROp)
7969 return new AArch64InstructionSelector(TM, Subtarget, RBI);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static bool isPreferredADD(int64_t ImmOff)
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue CCOp, AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, const SDLoc &DL, SelectionDAG &DAG)
can be transformed to: not (and (not (and (setCC (cmp C)) (setCD (cmp D)))) (and (not (setCA (cmp A))...
static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static bool isCMN(SDValue Op, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, AArch64CC::CondCode Predicate)
Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain of CCMP/CFCMP ops.
static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC)
Emit expression as a conjunction (a series of CCMP/CFCMP ops).
static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy)
#define GET_GLOBALISEL_PREDICATES_INIT
static std::pair< const TargetRegisterClass *, const TargetRegisterClass * > getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Helper function to get the source and destination register classes for a copy.
#define GET_GLOBALISEL_TEMPORARIES_INIT
static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Convert an IR fp condition code to an AArch64 CC.
static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, MachineRegisterInfo &MRI)
Return a register which can be used as a bit to test in a TB(N)Z.
static unsigned getMinSizeForRegBank(const RegisterBank &RB)
Returns the minimum size the given register bank can hold.
static std::optional< int64_t > getVectorShiftImm(Register Reg, MachineRegisterInfo &MRI)
Returns the element immediate value of a vector shift operand if found.
static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the G_LOAD or G_STORE operation GenericOpc, appropriate for the (value)...
static const TargetRegisterClass * getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits, bool GetAllRegSet=false)
Given a register bank, and size in bits, return the smallest register class that can represent that c...
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P)
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV),...
static bool getSubRegForClass(const TargetRegisterClass *RC, const TargetRegisterInfo &TRI, unsigned &SubReg)
Returns the correct subregister to use for a given register class.
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register SrcReg, const TargetRegisterClass *To, unsigned SubReg)
Helper function for selectCopy.
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, const AArch64RegisterBankInfo &RBI)
static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI)
static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type)
static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI)
Given a shift instruction, return the correct shift type for that instruction.
static bool unsupportedBinOp(const MachineInstr &I, const AArch64RegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const AArch64RegisterInfo &TRI)
Check whether I is a currently unsupported binary operation:
static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst, bool WillNegate, MachineRegisterInfo &MRI, unsigned Depth=0)
Returns true if Val is a tree of AND/OR/CMP operations that can be expressed as a conjunction.
static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, const unsigned EltSize)
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static std::optional< uint64_t > getImmedFromMO(const MachineOperand &Root)
static std::pair< unsigned, unsigned > getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize)
Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given size and RB.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
static std::optional< int64_t > getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI)
Matches and returns the shift immediate value for a SHL instruction given a shift operand.
static void changeFPCCToORAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
This file declares the targeting of the RegisterBankInfo class for AArch64.
static bool isStore(int Opcode)
static bool selectMergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool selectUnmergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file contains constants used for implementing Dwarf debug support.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static constexpr int Concat[]
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
int getVarArgsStackIndex() const
int getVarArgsGPRIndex() const
unsigned getVarArgsGPRSize() const
This class provides the information for the target register banks.
bool isCallingConvWin64(CallingConv::ID CC, bool IsVarArg) const
APInt bitcastToAPInt() const
Class for arbitrary precision integers.
APInt zext(unsigned width) const
Zero extend to a new width.
uint64_t getZExtValue() const
Get zero extended value.
APInt trunc(unsigned width) const
Truncate to new width.
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
bool isIntPredicate() const
static Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
ConstantFP - Floating Point Values [float, double].
const APFloat & getValueAPF() const
bool isNegative() const
Return true if the sign bit is set.
bool isZero() const
Return true if the value is positive or negative zero.
This is the shared class of boolean and integer constants.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Constant * getSplatValue(bool AllowPoison=false) const
If all elements of the vector constant have the same value, return that value.
const APInt & getUniqueInteger() const
If C is a constant integer then return its value, otherwise C must be a vector of constant integers,...
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
This class represents an Operation in the Expression.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
Represents indexed stores.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
Register getCondReg() const
Register getFalseReg() const
Register getTrueReg() const
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
bool hasExternalWeakLinkage() const
bool isEquality() const
Return true if this predicate is either EQ or NE.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
constexpr LLT multiplyElements(int Factor) const
Produce a vector type that is Factor times bigger, preserving the element type.
constexpr bool isPointerVector() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
This is an important class for using LLVM in a threaded context.
TypeSize getValue() const
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setAdjustsStack(bool V)
void setFrameAddressIsTaken(bool T)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineFunction & getMF()
Getter for the function we currently build.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineIRBuilderState & getState()
Getter for the State.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
void setState(const MachineIRBuilderState &NewState)
Setter for the State.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addBlockAddress(const BlockAddress *BA, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
bool constrainAllUses(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) const
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
@ MOLoad
The memory access reads data.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
const ConstantInt * getCImm() const
bool isCImm() const
isCImm - Test if this is a MO_CImmediate operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
static MachineOperand CreateBA(const BlockAddress *BA, int64_t Offset, unsigned TargetFlags=0)
const ConstantFP * getFPImm() const
unsigned getPredicate() const
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
T get() const
Returns the value of the specified pointer type.
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwise returns null.
Analysis providing profile information.
Holds all the information related to register banks.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
This class represents the LLVM 'select' instruction.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetLowering * getTargetLowering() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static IntegerType * getInt8Ty(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
void changeFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Find the AArch64 condition codes necessary to represent P for a scalar floating point comparison.
std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint8_t encodeAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType9(uint64_t Imm)
static bool isAdvSIMDModImmType4(uint64_t Imm)
static bool isAdvSIMDModImmType5(uint64_t Imm)
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType10(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType9(uint64_t Imm)
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static bool isAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType5(uint64_t Imm)
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static bool isAdvSIMDModImmType10(uint64_t Imm)
static int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType8(uint64_t Imm)
static bool isAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType11(uint64_t Imm)
static bool isAdvSIMDModImmType11(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType6(uint64_t Imm)
static bool isAdvSIMDModImmType8(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType4(uint64_t Imm)
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static bool isAdvSIMDModImmType6(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType1(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType1(uint64_t Imm)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
NodeAddr< InstrNode * > Instr
This is an optimization pass for GlobalISel generic memory operations.
Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
bool isPreISelGenericOpcode(unsigned Opcode)
Check whether the given Opcode is a generic opcode that is not supposed to appear after ISel.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
bool shouldOptForSize(const MachineBasicBlock &MBB, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
Returns true if the given block should be optimized for size.
InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &, const AArch64Subtarget &, const AArch64RegisterBankInfo &)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
AtomicOrdering
Atomic ordering for LLVM's memory model.
DWARFExpression::Operation Op
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Class which stores all the state required in a MachineIRBuilder.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.