80#define DEBUG_TYPE "aarch64-mi-peephole-opt"
94 using OpcodePair = std::pair<unsigned, unsigned>;
96 using SplitAndOpcFunc =
97 std::function<std::optional<OpcodePair>(
T,
unsigned,
T &,
T &)>;
99 std::function<void(
MachineInstr &, OpcodePair,
unsigned,
unsigned,
116 template <
typename T>
118 SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr);
123 template <
typename T>
124 bool visitADDSUB(
unsigned PosOpc,
unsigned NegOpc,
MachineInstr &
MI);
125 template <
typename T>
126 bool visitADDSSUBS(OpcodePair PosOpcs, OpcodePair NegOpcs,
MachineInstr &
MI);
129 enum class SplitStrategy {
133 template <
typename T>
135 SplitStrategy Strategy,
unsigned OtherOpc = 0);
147 return "AArch64 MI Peephole Optimization pass";
157char AArch64MIPeepholeOpt::ID = 0;
162 "AArch64 MI Peephole Optimization",
false,
false)
166 T UImm =
static_cast<T>(Imm);
167 assert(UImm && (UImm != ~
static_cast<T>(0)) &&
"Invalid immediate!");
198 assert(Imm && (Imm != ~
static_cast<T>(0)) &&
"Invalid immediate!");
205 unsigned LowestGapBitUnset =
209 assert(LowestGapBitUnset <
sizeof(
T) * CHAR_BIT &&
"Undefined behaviour!");
210 T NewImm1 = (
static_cast<T>(1) << LowestGapBitUnset) -
226 SplitStrategy Strategy,
238 return splitTwoPartImm<T>(
240 [
Opc, Strategy, OtherOpc](
T Imm,
unsigned RegSize,
T &Imm0,
241 T &Imm1) -> std::optional<OpcodePair> {
250 if (Insn.
size() == 1)
253 bool SplitSucc =
false;
255 case SplitStrategy::Intersect:
256 SplitSucc = splitBitmaskImm(Imm,
RegSize, Imm0, Imm1);
258 case SplitStrategy::Disjoint:
263 return std::make_pair(
Opc, !OtherOpc ?
Opc : OtherOpc);
266 [&
TII =
TII](MachineInstr &
MI, OpcodePair Opcode,
unsigned Imm0,
270 MachineBasicBlock *
MBB =
MI.getParent();
280bool AArch64MIPeepholeOpt::visitORR(MachineInstr &
MI) {
285 if (
MI.getOperand(3).getImm() != 0)
288 if (
MI.getOperand(1).getReg() != AArch64::WZR)
291 MachineInstr *SrcMI =
MRI->getUniqueVRegDef(
MI.getOperand(2).getReg());
305 if (SrcMI->
getOpcode() == TargetOpcode::COPY &&
307 const TargetRegisterClass *RC =
312 if (RC != &AArch64::FPR32RegClass &&
313 ((RC != &AArch64::FPR64RegClass && RC != &AArch64::FPR128RegClass &&
314 RC != &AArch64::ZPRRegClass) ||
319 CpySrc =
MRI->createVirtualRegister(&AArch64::FPR32RegClass);
321 TII->get(TargetOpcode::COPY), CpySrc)
331 else if (SrcMI->
getOpcode() <= TargetOpcode::GENERIC_OP_END)
336 MRI->replaceRegWith(DefReg, SrcReg);
337 MRI->clearKillFlags(SrcReg);
339 MI.eraseFromParent();
344bool AArch64MIPeepholeOpt::visitCSEL(MachineInstr &
MI) {
346 if (
MI.getOperand(1).getReg() !=
MI.getOperand(2).getReg())
350 MI.getOpcode() == AArch64::CSELXr ? AArch64::XZR : AArch64::WZR;
352 MI.getOpcode() == AArch64::CSELXr ? AArch64::ORRXrs : AArch64::ORRWrs;
355 .
addReg(
MI.getOperand(0).getReg(), RegState::Define)
360 MI.eraseFromParent();
364bool AArch64MIPeepholeOpt::visitINSERT(MachineInstr &
MI) {
372 if (!
MI.isRegTiedToDefOperand(1))
376 const TargetRegisterClass *RC =
MRI->getRegClass(DstReg);
377 MachineInstr *SrcMI =
MRI->getUniqueVRegDef(
MI.getOperand(2).getReg());
391 if ((SrcMI->
getOpcode() <= TargetOpcode::GENERIC_OP_END) ||
392 !AArch64::GPR64allRegClass.hasSubClassEq(RC))
396 MachineInstr *SubregMI =
398 TII->get(TargetOpcode::SUBREG_TO_REG), DstReg)
399 .
add(
MI.getOperand(2))
400 .
add(
MI.getOperand(3));
403 MI.eraseFromParent();
412 if ((Imm & 0xfff000) == 0 || (Imm & 0xfff) == 0 ||
413 (Imm & ~
static_cast<T>(0xffffff)) != 0)
419 if (Insn.
size() == 1)
423 Imm0 = (Imm >> 12) & 0xfff;
429bool AArch64MIPeepholeOpt::visitADDSUB(
430 unsigned PosOpc,
unsigned NegOpc, MachineInstr &
MI) {
447 if (
MI.getOperand(1).getReg() == AArch64::XZR ||
448 MI.getOperand(1).getReg() == AArch64::WZR)
451 return splitTwoPartImm<T>(
453 [PosOpc, NegOpc](
T Imm,
unsigned RegSize,
T &Imm0,
454 T &Imm1) -> std::optional<OpcodePair> {
456 return std::make_pair(PosOpc, PosOpc);
458 return std::make_pair(NegOpc, NegOpc);
461 [&
TII =
TII](MachineInstr &
MI, OpcodePair Opcode,
unsigned Imm0,
465 MachineBasicBlock *
MBB =
MI.getParent();
478bool AArch64MIPeepholeOpt::visitADDSSUBS(
479 OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &
MI) {
483 if (
MI.getOperand(1).getReg() == AArch64::XZR ||
484 MI.getOperand(1).getReg() == AArch64::WZR)
487 return splitTwoPartImm<T>(
491 T &Imm1) -> std::optional<OpcodePair> {
501 MachineInstr &SrcMI = *
MRI->getUniqueVRegDef(
MI.getOperand(1).getReg());
503 if (!NZCVUsed || NZCVUsed->C || NZCVUsed->V)
507 [&
TII =
TII](MachineInstr &
MI, OpcodePair Opcode,
unsigned Imm0,
511 MachineBasicBlock *
MBB =
MI.getParent();
525bool AArch64MIPeepholeOpt::checkMovImmInstr(MachineInstr &
MI,
526 MachineInstr *&MovMI,
527 MachineInstr *&SubregToRegMI) {
529 MachineBasicBlock *
MBB =
MI.getParent();
531 if (L && !
L->isLoopInvariant(
MI))
535 MovMI =
MRI->getUniqueVRegDef(
MI.getOperand(2).getReg());
540 SubregToRegMI =
nullptr;
541 if (MovMI->
getOpcode() == TargetOpcode::SUBREG_TO_REG) {
542 SubregToRegMI = MovMI;
548 if (MovMI->
getOpcode() != AArch64::MOVi32imm &&
549 MovMI->
getOpcode() != AArch64::MOVi64imm)
564bool AArch64MIPeepholeOpt::splitTwoPartImm(
566 SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr) {
569 "Invalid RegSize for legal immediate peephole optimization");
572 MachineInstr *MovMI, *SubregToRegMI;
573 if (!checkMovImmInstr(
MI, MovMI, SubregToRegMI))
585 if (
auto R = SplitAndOpc(Imm,
RegSize, Imm0, Imm1))
596 const TargetRegisterClass *FirstInstrDstRC =
597 TII->getRegClass(
TII->get(Opcode.first), 0);
598 const TargetRegisterClass *FirstInstrOperandRC =
599 TII->getRegClass(
TII->get(Opcode.first), 1);
600 const TargetRegisterClass *SecondInstrDstRC =
601 (Opcode.first == Opcode.second)
604 const TargetRegisterClass *SecondInstrOperandRC =
605 (Opcode.first == Opcode.second)
606 ? FirstInstrOperandRC
607 :
TII->getRegClass(
TII->get(Opcode.second), 1);
612 Register NewTmpReg =
MRI->createVirtualRegister(FirstInstrDstRC);
616 ?
MRI->createVirtualRegister(SecondInstrDstRC)
620 MRI->constrainRegClass(SrcReg, FirstInstrOperandRC);
621 MRI->constrainRegClass(NewTmpReg, SecondInstrOperandRC);
622 if (DstReg != NewDstReg)
623 MRI->constrainRegClass(NewDstReg,
MRI->getRegClass(DstReg));
626 BuildInstr(
MI, Opcode, Imm0, Imm1, SrcReg, NewTmpReg, NewDstReg);
630 if (DstReg != NewDstReg) {
631 MRI->replaceRegWith(DstReg, NewDstReg);
632 MI.getOperand(0).setReg(DstReg);
636 MI.eraseFromParent();
644bool AArch64MIPeepholeOpt::visitINSviGPR(MachineInstr &
MI,
unsigned Opc) {
656 MachineInstr *SrcMI =
MRI->getUniqueVRegDef(
MI.getOperand(3).getReg());
661 if (!SrcMI || SrcMI->
getOpcode() != TargetOpcode::COPY)
668 &AArch64::FPR128RegClass) {
676 MachineInstr *INSvilaneMI =
678 .
add(
MI.getOperand(1))
679 .
add(
MI.getOperand(2))
685 MI.eraseFromParent();
693 if (!
MI->getOperand(0).isReg() || !
MI->getOperand(0).isDef())
696 if (RC != &AArch64::FPR64RegClass)
698 return MI->getOpcode() > TargetOpcode::GENERIC_OP_END;
701bool AArch64MIPeepholeOpt::visitINSvi64lane(MachineInstr &
MI) {
709 MachineInstr *Low64MI =
MRI->getUniqueVRegDef(
MI.getOperand(1).getReg());
710 if (Low64MI->
getOpcode() != AArch64::INSERT_SUBREG)
729 MachineInstr *High64MI =
MRI->getUniqueVRegDef(
MI.getOperand(3).getReg());
730 if (!High64MI || High64MI->
getOpcode() != AArch64::INSERT_SUBREG)
733 if (High64MI && High64MI->
getOpcode() == TargetOpcode::COPY)
735 if (!High64MI || (High64MI->
getOpcode() != AArch64::MOVID &&
736 High64MI->
getOpcode() != AArch64::MOVIv2d_ns))
744 MRI->constrainRegClass(NewDef,
MRI->getRegClass(OldDef));
745 MRI->replaceRegWith(OldDef, NewDef);
746 MI.eraseFromParent();
751bool AArch64MIPeepholeOpt::visitFMOVDr(MachineInstr &
MI) {
753 MachineInstr *Low64MI =
MRI->getUniqueVRegDef(
MI.getOperand(1).getReg());
761 MRI->clearKillFlags(OldDef);
762 MRI->clearKillFlags(NewDef);
763 MRI->constrainRegClass(NewDef,
MRI->getRegClass(OldDef));
764 MRI->replaceRegWith(OldDef, NewDef);
765 MI.eraseFromParent();
770bool AArch64MIPeepholeOpt::visitUBFMXri(MachineInstr &
MI) {
773 int64_t Immr =
MI.getOperand(2).getImm();
774 int64_t Imms =
MI.getOperand(3).getImm();
776 bool IsLSR = Imms == 31 && Immr <= Imms;
777 bool IsLSL = Immr == Imms + 33;
778 if (!IsLSR && !IsLSL)
785 const TargetRegisterClass *DstRC64 =
786 TII->getRegClass(
TII->get(
MI.getOpcode()), 0);
787 const TargetRegisterClass *DstRC32 =
788 TRI->getSubRegisterClass(DstRC64, AArch64::sub_32);
789 assert(DstRC32 &&
"Destination register class of UBFMXri doesn't have a "
790 "sub_32 subregister class");
792 const TargetRegisterClass *SrcRC64 =
793 TII->getRegClass(
TII->get(
MI.getOpcode()), 1);
794 const TargetRegisterClass *SrcRC32 =
795 TRI->getSubRegisterClass(SrcRC64, AArch64::sub_32);
796 assert(SrcRC32 &&
"Source register class of UBFMXri doesn't have a sub_32 "
797 "subregister class");
799 Register DstReg64 =
MI.getOperand(0).getReg();
800 Register DstReg32 =
MRI->createVirtualRegister(DstRC32);
801 Register SrcReg64 =
MI.getOperand(1).getReg();
802 Register SrcReg32 =
MRI->createVirtualRegister(SrcRC32);
806 .
addReg(SrcReg64, {}, AArch64::sub_32);
813 TII->get(AArch64::SUBREG_TO_REG), DstReg64)
816 MI.eraseFromParent();
823bool AArch64MIPeepholeOpt::visitCopy(MachineInstr &
MI) {
824 Register InputReg =
MI.getOperand(1).getReg();
825 if (
MI.getOperand(1).getSubReg() != AArch64::sub_32 ||
826 !
MRI->hasOneNonDBGUse(InputReg))
829 MachineInstr *SrcMI =
MRI->getUniqueVRegDef(InputReg);
830 SmallPtrSet<MachineInstr *, 4> DeadInstrs;
842 auto getSXTWSrcReg = [](MachineInstr *SrcMI) ->
Register {
843 if (SrcMI->
getOpcode() != AArch64::SBFMXri ||
846 return AArch64::NoRegister;
850 auto getUXTWSrcReg = [&](MachineInstr *SrcMI) ->
Register {
851 if (SrcMI->
getOpcode() != AArch64::SUBREG_TO_REG ||
854 return AArch64::NoRegister;
856 if (!Orr || Orr->
getOpcode() != AArch64::ORRWrr ||
859 return AArch64::NoRegister;
861 if (!Cpy || Cpy->
getOpcode() != AArch64::COPY ||
863 return AArch64::NoRegister;
868 Register SrcReg = getSXTWSrcReg(SrcMI);
870 SrcReg = getUXTWSrcReg(SrcMI);
874 MRI->constrainRegClass(SrcReg,
MRI->getRegClass(InputReg));
876 MI.getOperand(1).setReg(SrcReg);
878 for (
auto *DeadMI : DeadInstrs) {
880 DeadMI->eraseFromParent();
885bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
889 TII =
static_cast<const AArch64InstrInfo *
>(MF.
getSubtarget().getInstrInfo());
890 TRI =
static_cast<const AArch64RegisterInfo *
>(
892 MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
895 assert(
MRI->isSSA() &&
"Expected to be run on SSA form!");
899 for (MachineBasicBlock &
MBB : MF) {
901 switch (
MI.getOpcode()) {
904 case AArch64::INSERT_SUBREG:
907 case AArch64::ANDWrr:
908 Changed |= trySplitLogicalImm<uint32_t>(AArch64::ANDWri,
MI,
909 SplitStrategy::Intersect);
911 case AArch64::ANDXrr:
912 Changed |= trySplitLogicalImm<uint64_t>(AArch64::ANDXri,
MI,
913 SplitStrategy::Intersect);
915 case AArch64::ANDSWrr:
916 Changed |= trySplitLogicalImm<uint32_t>(
917 AArch64::ANDWri,
MI, SplitStrategy::Intersect, AArch64::ANDSWri);
919 case AArch64::ANDSXrr:
920 Changed |= trySplitLogicalImm<uint64_t>(
921 AArch64::ANDXri,
MI, SplitStrategy::Intersect, AArch64::ANDSXri);
923 case AArch64::EORWrr:
924 Changed |= trySplitLogicalImm<uint32_t>(AArch64::EORWri,
MI,
925 SplitStrategy::Disjoint);
927 case AArch64::EORXrr:
928 Changed |= trySplitLogicalImm<uint64_t>(AArch64::EORXri,
MI,
929 SplitStrategy::Disjoint);
931 case AArch64::ORRWrr:
932 Changed |= trySplitLogicalImm<uint32_t>(AArch64::ORRWri,
MI,
933 SplitStrategy::Disjoint);
935 case AArch64::ORRXrr:
936 Changed |= trySplitLogicalImm<uint64_t>(AArch64::ORRXri,
MI,
937 SplitStrategy::Disjoint);
939 case AArch64::ORRWrs:
942 case AArch64::ADDWrr:
943 Changed |= visitADDSUB<uint32_t>(AArch64::ADDWri, AArch64::SUBWri,
MI);
945 case AArch64::SUBWrr:
946 Changed |= visitADDSUB<uint32_t>(AArch64::SUBWri, AArch64::ADDWri,
MI);
948 case AArch64::ADDXrr:
949 Changed |= visitADDSUB<uint64_t>(AArch64::ADDXri, AArch64::SUBXri,
MI);
951 case AArch64::SUBXrr:
952 Changed |= visitADDSUB<uint64_t>(AArch64::SUBXri, AArch64::ADDXri,
MI);
954 case AArch64::ADDSWrr:
956 visitADDSSUBS<uint32_t>({AArch64::ADDWri, AArch64::ADDSWri},
957 {AArch64::SUBWri, AArch64::SUBSWri},
MI);
959 case AArch64::SUBSWrr:
961 visitADDSSUBS<uint32_t>({AArch64::SUBWri, AArch64::SUBSWri},
962 {AArch64::ADDWri, AArch64::ADDSWri},
MI);
964 case AArch64::ADDSXrr:
966 visitADDSSUBS<uint64_t>({AArch64::ADDXri, AArch64::ADDSXri},
967 {AArch64::SUBXri, AArch64::SUBSXri},
MI);
969 case AArch64::SUBSXrr:
971 visitADDSSUBS<uint64_t>({AArch64::SUBXri, AArch64::SUBSXri},
972 {AArch64::ADDXri, AArch64::ADDSXri},
MI);
974 case AArch64::CSELWr:
975 case AArch64::CSELXr:
978 case AArch64::INSvi64gpr:
979 Changed |= visitINSviGPR(
MI, AArch64::INSvi64lane);
981 case AArch64::INSvi32gpr:
982 Changed |= visitINSviGPR(
MI, AArch64::INSvi32lane);
984 case AArch64::INSvi16gpr:
985 Changed |= visitINSviGPR(
MI, AArch64::INSvi16lane);
987 case AArch64::INSvi8gpr:
988 Changed |= visitINSviGPR(
MI, AArch64::INSvi8lane);
990 case AArch64::INSvi64lane:
993 case AArch64::FMOVDr:
996 case AArch64::UBFMXri:
1010 return new AArch64MIPeepholeOpt();
unsigned const MachineRegisterInfo * MRI
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
static bool splitDisjointBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc)
static bool is64bitDefwithZeroHigh64bit(MachineInstr *MI, MachineRegisterInfo *MRI)
static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
const HexagonInstrInfo * TII
Register const TargetRegisterInfo * TRI
Promote Memory to Register
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
FunctionPass class - This class is used to implement most global optimizations.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
unsigned getSubReg() const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createAArch64MIPeepholeOptPass()
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
std::optional< UsedNZCV > examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > *CCUseInstrs=nullptr)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
RegState getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.