80#define DEBUG_TYPE "aarch64-mi-peephole-opt"
94 using OpcodePair = std::pair<unsigned, unsigned>;
96 using SplitAndOpcFunc =
97 std::function<std::optional<OpcodePair>(
T,
unsigned,
T &,
T &)>;
99 std::function<void(
MachineInstr &, OpcodePair,
unsigned,
unsigned,
116 template <
typename T>
118 SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr);
123 template <
typename T>
124 bool visitADDSUB(
unsigned PosOpc,
unsigned NegOpc,
MachineInstr &
MI);
125 template <
typename T>
126 bool visitADDSSUBS(OpcodePair PosOpcs, OpcodePair NegOpcs,
MachineInstr &
MI);
129 enum class SplitStrategy {
133 template <
typename T>
135 SplitStrategy Strategy,
unsigned OtherOpc = 0);
147 return "AArch64 MI Peephole Optimization pass";
157char AArch64MIPeepholeOpt::ID = 0;
162 "AArch64 MI Peephole Optimization",
false,
false)
166 T UImm =
static_cast<T>(Imm);
167 assert(UImm && (UImm != ~
static_cast<T>(0)) &&
"Invalid immediate!");
198 assert(Imm && (Imm != ~
static_cast<T>(0)) &&
"Invalid immediate!");
205 unsigned LowestGapBitUnset =
209 assert(LowestGapBitUnset <
sizeof(
T) * CHAR_BIT &&
"Undefined behaviour!");
210 T NewImm1 = (
static_cast<T>(1) << LowestGapBitUnset) -
226 SplitStrategy Strategy,
238 return splitTwoPartImm<T>(
240 [
Opc, Strategy, OtherOpc](
T Imm,
unsigned RegSize,
T &Imm0,
241 T &Imm1) -> std::optional<OpcodePair> {
250 if (Insn.
size() == 1)
253 bool SplitSucc =
false;
255 case SplitStrategy::Intersect:
256 SplitSucc = splitBitmaskImm(Imm,
RegSize, Imm0, Imm1);
258 case SplitStrategy::Disjoint:
263 return std::make_pair(
Opc, !OtherOpc ?
Opc : OtherOpc);
266 [&
TII =
TII](MachineInstr &
MI, OpcodePair Opcode,
unsigned Imm0,
270 MachineBasicBlock *
MBB =
MI.getParent();
280bool AArch64MIPeepholeOpt::visitORR(MachineInstr &
MI) {
285 if (
MI.getOperand(3).getImm() != 0)
288 if (
MI.getOperand(1).getReg() != AArch64::WZR)
291 MachineInstr *SrcMI =
MRI->getUniqueVRegDef(
MI.getOperand(2).getReg());
305 if (SrcMI->
getOpcode() == TargetOpcode::COPY &&
307 const TargetRegisterClass *RC =
312 if (RC != &AArch64::FPR32RegClass &&
313 ((RC != &AArch64::FPR64RegClass && RC != &AArch64::FPR128RegClass &&
314 RC != &AArch64::ZPRRegClass) ||
319 CpySrc =
MRI->createVirtualRegister(&AArch64::FPR32RegClass);
321 TII->get(TargetOpcode::COPY), CpySrc)
331 else if (SrcMI->
getOpcode() <= TargetOpcode::GENERIC_OP_END)
336 MRI->replaceRegWith(DefReg, SrcReg);
337 MRI->clearKillFlags(SrcReg);
339 MI.eraseFromParent();
344bool AArch64MIPeepholeOpt::visitCSEL(MachineInstr &
MI) {
346 if (
MI.getOperand(1).getReg() !=
MI.getOperand(2).getReg())
350 MI.getOpcode() == AArch64::CSELXr ? AArch64::XZR : AArch64::WZR;
352 MI.getOpcode() == AArch64::CSELXr ? AArch64::ORRXrs : AArch64::ORRWrs;
360 MI.eraseFromParent();
364bool AArch64MIPeepholeOpt::visitINSERT(MachineInstr &
MI) {
372 if (!
MI.isRegTiedToDefOperand(1))
376 const TargetRegisterClass *RC =
MRI->getRegClass(DstReg);
377 MachineInstr *SrcMI =
MRI->getUniqueVRegDef(
MI.getOperand(2).getReg());
391 if ((SrcMI->
getOpcode() <= TargetOpcode::GENERIC_OP_END) ||
392 !AArch64::GPR64allRegClass.hasSubClassEq(RC))
396 MachineInstr *SubregMI =
398 TII->get(TargetOpcode::SUBREG_TO_REG), DstReg)
400 .
add(
MI.getOperand(2))
401 .
add(
MI.getOperand(3));
404 MI.eraseFromParent();
413 if ((Imm & 0xfff000) == 0 || (Imm & 0xfff) == 0 ||
414 (Imm & ~
static_cast<T>(0xffffff)) != 0)
420 if (Insn.
size() == 1)
424 Imm0 = (Imm >> 12) & 0xfff;
430bool AArch64MIPeepholeOpt::visitADDSUB(
431 unsigned PosOpc,
unsigned NegOpc, MachineInstr &
MI) {
448 if (
MI.getOperand(1).getReg() == AArch64::XZR ||
449 MI.getOperand(1).getReg() == AArch64::WZR)
452 return splitTwoPartImm<T>(
454 [PosOpc, NegOpc](
T Imm,
unsigned RegSize,
T &Imm0,
455 T &Imm1) -> std::optional<OpcodePair> {
457 return std::make_pair(PosOpc, PosOpc);
459 return std::make_pair(NegOpc, NegOpc);
462 [&
TII =
TII](MachineInstr &
MI, OpcodePair Opcode,
unsigned Imm0,
466 MachineBasicBlock *
MBB =
MI.getParent();
479bool AArch64MIPeepholeOpt::visitADDSSUBS(
480 OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &
MI) {
484 if (
MI.getOperand(1).getReg() == AArch64::XZR ||
485 MI.getOperand(1).getReg() == AArch64::WZR)
488 return splitTwoPartImm<T>(
492 T &Imm1) -> std::optional<OpcodePair> {
502 MachineInstr &SrcMI = *
MRI->getUniqueVRegDef(
MI.getOperand(1).getReg());
504 if (!NZCVUsed || NZCVUsed->C || NZCVUsed->V)
508 [&
TII =
TII](MachineInstr &
MI, OpcodePair Opcode,
unsigned Imm0,
512 MachineBasicBlock *
MBB =
MI.getParent();
526bool AArch64MIPeepholeOpt::checkMovImmInstr(MachineInstr &
MI,
527 MachineInstr *&MovMI,
528 MachineInstr *&SubregToRegMI) {
530 MachineBasicBlock *
MBB =
MI.getParent();
532 if (L && !
L->isLoopInvariant(
MI))
536 MovMI =
MRI->getUniqueVRegDef(
MI.getOperand(2).getReg());
541 SubregToRegMI =
nullptr;
542 if (MovMI->
getOpcode() == TargetOpcode::SUBREG_TO_REG) {
543 SubregToRegMI = MovMI;
549 if (MovMI->
getOpcode() != AArch64::MOVi32imm &&
550 MovMI->
getOpcode() != AArch64::MOVi64imm)
565bool AArch64MIPeepholeOpt::splitTwoPartImm(
567 SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr) {
570 "Invalid RegSize for legal immediate peephole optimization");
573 MachineInstr *MovMI, *SubregToRegMI;
574 if (!checkMovImmInstr(
MI, MovMI, SubregToRegMI))
586 if (
auto R = SplitAndOpc(Imm,
RegSize, Imm0, Imm1))
597 MachineFunction *MF =
MI.getMF();
598 const TargetRegisterClass *FirstInstrDstRC =
599 TII->getRegClass(
TII->get(Opcode.first), 0,
TRI, *MF);
600 const TargetRegisterClass *FirstInstrOperandRC =
601 TII->getRegClass(
TII->get(Opcode.first), 1,
TRI, *MF);
602 const TargetRegisterClass *SecondInstrDstRC =
603 (Opcode.first == Opcode.second)
606 const TargetRegisterClass *SecondInstrOperandRC =
607 (Opcode.first == Opcode.second)
608 ? FirstInstrOperandRC
609 :
TII->getRegClass(
TII->get(Opcode.second), 1,
TRI, *MF);
614 Register NewTmpReg =
MRI->createVirtualRegister(FirstInstrDstRC);
618 ?
MRI->createVirtualRegister(SecondInstrDstRC)
622 MRI->constrainRegClass(SrcReg, FirstInstrOperandRC);
623 MRI->constrainRegClass(NewTmpReg, SecondInstrOperandRC);
624 if (DstReg != NewDstReg)
625 MRI->constrainRegClass(NewDstReg,
MRI->getRegClass(DstReg));
628 BuildInstr(
MI, Opcode, Imm0, Imm1, SrcReg, NewTmpReg, NewDstReg);
632 if (DstReg != NewDstReg) {
633 MRI->replaceRegWith(DstReg, NewDstReg);
634 MI.getOperand(0).setReg(DstReg);
638 MI.eraseFromParent();
646bool AArch64MIPeepholeOpt::visitINSviGPR(MachineInstr &
MI,
unsigned Opc) {
658 MachineInstr *SrcMI =
MRI->getUniqueVRegDef(
MI.getOperand(3).getReg());
663 if (!SrcMI || SrcMI->
getOpcode() != TargetOpcode::COPY)
670 &AArch64::FPR128RegClass) {
678 MachineInstr *INSvilaneMI =
680 .
add(
MI.getOperand(1))
681 .
add(
MI.getOperand(2))
687 MI.eraseFromParent();
695 if (!
MI->getOperand(0).isReg() || !
MI->getOperand(0).isDef())
698 if (RC != &AArch64::FPR64RegClass)
700 return MI->getOpcode() > TargetOpcode::GENERIC_OP_END;
703bool AArch64MIPeepholeOpt::visitINSvi64lane(MachineInstr &
MI) {
711 MachineInstr *Low64MI =
MRI->getUniqueVRegDef(
MI.getOperand(1).getReg());
712 if (Low64MI->
getOpcode() != AArch64::INSERT_SUBREG)
731 MachineInstr *High64MI =
MRI->getUniqueVRegDef(
MI.getOperand(3).getReg());
732 if (!High64MI || High64MI->
getOpcode() != AArch64::INSERT_SUBREG)
735 if (High64MI && High64MI->
getOpcode() == TargetOpcode::COPY)
737 if (!High64MI || (High64MI->
getOpcode() != AArch64::MOVID &&
738 High64MI->
getOpcode() != AArch64::MOVIv2d_ns))
746 MRI->constrainRegClass(NewDef,
MRI->getRegClass(OldDef));
747 MRI->replaceRegWith(OldDef, NewDef);
748 MI.eraseFromParent();
753bool AArch64MIPeepholeOpt::visitFMOVDr(MachineInstr &
MI) {
755 MachineInstr *Low64MI =
MRI->getUniqueVRegDef(
MI.getOperand(1).getReg());
763 MRI->clearKillFlags(OldDef);
764 MRI->clearKillFlags(NewDef);
765 MRI->constrainRegClass(NewDef,
MRI->getRegClass(OldDef));
766 MRI->replaceRegWith(OldDef, NewDef);
767 MI.eraseFromParent();
772bool AArch64MIPeepholeOpt::visitUBFMXri(MachineInstr &
MI) {
775 int64_t Immr =
MI.getOperand(2).getImm();
776 int64_t Imms =
MI.getOperand(3).getImm();
778 bool IsLSR = Imms == 31 && Immr <= Imms;
779 bool IsLSL = Immr == Imms + 33;
780 if (!IsLSR && !IsLSL)
787 const TargetRegisterClass *DstRC64 =
788 TII->getRegClass(
TII->get(
MI.getOpcode()), 0,
TRI, *
MI.getMF());
789 const TargetRegisterClass *DstRC32 =
790 TRI->getSubRegisterClass(DstRC64, AArch64::sub_32);
791 assert(DstRC32 &&
"Destination register class of UBFMXri doesn't have a "
792 "sub_32 subregister class");
794 const TargetRegisterClass *SrcRC64 =
795 TII->getRegClass(
TII->get(
MI.getOpcode()), 1,
TRI, *
MI.getMF());
796 const TargetRegisterClass *SrcRC32 =
797 TRI->getSubRegisterClass(SrcRC64, AArch64::sub_32);
798 assert(SrcRC32 &&
"Source register class of UBFMXri doesn't have a sub_32 "
799 "subregister class");
801 Register DstReg64 =
MI.getOperand(0).getReg();
802 Register DstReg32 =
MRI->createVirtualRegister(DstRC32);
803 Register SrcReg64 =
MI.getOperand(1).getReg();
804 Register SrcReg32 =
MRI->createVirtualRegister(SrcRC32);
808 .
addReg(SrcReg64, 0, AArch64::sub_32);
815 TII->get(AArch64::SUBREG_TO_REG), DstReg64)
819 MI.eraseFromParent();
826bool AArch64MIPeepholeOpt::visitCopy(MachineInstr &
MI) {
827 Register InputReg =
MI.getOperand(1).getReg();
828 if (
MI.getOperand(1).getSubReg() != AArch64::sub_32 ||
829 !
MRI->hasOneNonDBGUse(InputReg))
832 MachineInstr *SrcMI =
MRI->getUniqueVRegDef(InputReg);
833 SmallPtrSet<MachineInstr *, 4> DeadInstrs;
845 auto getSXTWSrcReg = [](MachineInstr *SrcMI) ->
Register {
846 if (SrcMI->
getOpcode() != AArch64::SBFMXri ||
849 return AArch64::NoRegister;
853 auto getUXTWSrcReg = [&](MachineInstr *SrcMI) ->
Register {
854 if (SrcMI->
getOpcode() != AArch64::SUBREG_TO_REG ||
857 return AArch64::NoRegister;
859 if (!Orr || Orr->
getOpcode() != AArch64::ORRWrr ||
862 return AArch64::NoRegister;
864 if (!Cpy || Cpy->
getOpcode() != AArch64::COPY ||
866 return AArch64::NoRegister;
871 Register SrcReg = getSXTWSrcReg(SrcMI);
873 SrcReg = getUXTWSrcReg(SrcMI);
877 MRI->constrainRegClass(SrcReg,
MRI->getRegClass(InputReg));
879 MI.getOperand(1).setReg(SrcReg);
881 for (
auto *DeadMI : DeadInstrs) {
883 DeadMI->eraseFromParent();
888bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
892 TII =
static_cast<const AArch64InstrInfo *
>(MF.
getSubtarget().getInstrInfo());
893 TRI =
static_cast<const AArch64RegisterInfo *
>(
895 MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
898 assert(
MRI->isSSA() &&
"Expected to be run on SSA form!");
902 for (MachineBasicBlock &
MBB : MF) {
904 switch (
MI.getOpcode()) {
907 case AArch64::INSERT_SUBREG:
910 case AArch64::ANDWrr:
911 Changed |= trySplitLogicalImm<uint32_t>(AArch64::ANDWri,
MI,
912 SplitStrategy::Intersect);
914 case AArch64::ANDXrr:
915 Changed |= trySplitLogicalImm<uint64_t>(AArch64::ANDXri,
MI,
916 SplitStrategy::Intersect);
918 case AArch64::ANDSWrr:
919 Changed |= trySplitLogicalImm<uint32_t>(
920 AArch64::ANDWri,
MI, SplitStrategy::Intersect, AArch64::ANDSWri);
922 case AArch64::ANDSXrr:
923 Changed |= trySplitLogicalImm<uint64_t>(
924 AArch64::ANDXri,
MI, SplitStrategy::Intersect, AArch64::ANDSXri);
926 case AArch64::EORWrr:
927 Changed |= trySplitLogicalImm<uint32_t>(AArch64::EORWri,
MI,
928 SplitStrategy::Disjoint);
930 case AArch64::EORXrr:
931 Changed |= trySplitLogicalImm<uint64_t>(AArch64::EORXri,
MI,
932 SplitStrategy::Disjoint);
934 case AArch64::ORRWrr:
935 Changed |= trySplitLogicalImm<uint32_t>(AArch64::ORRWri,
MI,
936 SplitStrategy::Disjoint);
938 case AArch64::ORRXrr:
939 Changed |= trySplitLogicalImm<uint64_t>(AArch64::ORRXri,
MI,
940 SplitStrategy::Disjoint);
942 case AArch64::ORRWrs:
945 case AArch64::ADDWrr:
946 Changed |= visitADDSUB<uint32_t>(AArch64::ADDWri, AArch64::SUBWri,
MI);
948 case AArch64::SUBWrr:
949 Changed |= visitADDSUB<uint32_t>(AArch64::SUBWri, AArch64::ADDWri,
MI);
951 case AArch64::ADDXrr:
952 Changed |= visitADDSUB<uint64_t>(AArch64::ADDXri, AArch64::SUBXri,
MI);
954 case AArch64::SUBXrr:
955 Changed |= visitADDSUB<uint64_t>(AArch64::SUBXri, AArch64::ADDXri,
MI);
957 case AArch64::ADDSWrr:
959 visitADDSSUBS<uint32_t>({AArch64::ADDWri, AArch64::ADDSWri},
960 {AArch64::SUBWri, AArch64::SUBSWri},
MI);
962 case AArch64::SUBSWrr:
964 visitADDSSUBS<uint32_t>({AArch64::SUBWri, AArch64::SUBSWri},
965 {AArch64::ADDWri, AArch64::ADDSWri},
MI);
967 case AArch64::ADDSXrr:
969 visitADDSSUBS<uint64_t>({AArch64::ADDXri, AArch64::ADDSXri},
970 {AArch64::SUBXri, AArch64::SUBSXri},
MI);
972 case AArch64::SUBSXrr:
974 visitADDSSUBS<uint64_t>({AArch64::SUBXri, AArch64::SUBSXri},
975 {AArch64::ADDXri, AArch64::ADDSXri},
MI);
977 case AArch64::CSELWr:
978 case AArch64::CSELXr:
981 case AArch64::INSvi64gpr:
982 Changed |= visitINSviGPR(
MI, AArch64::INSvi64lane);
984 case AArch64::INSvi32gpr:
985 Changed |= visitINSviGPR(
MI, AArch64::INSvi32lane);
987 case AArch64::INSvi16gpr:
988 Changed |= visitINSviGPR(
MI, AArch64::INSvi16lane);
990 case AArch64::INSvi8gpr:
991 Changed |= visitINSviGPR(
MI, AArch64::INSvi8lane);
993 case AArch64::INSvi64lane:
996 case AArch64::FMOVDr:
999 case AArch64::UBFMXri:
1013 return new AArch64MIPeepholeOpt();
unsigned const MachineRegisterInfo * MRI
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
static bool splitDisjointBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc)
static bool is64bitDefwithZeroHigh64bit(MachineInstr *MI, MachineRegisterInfo *MRI)
static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
const HexagonInstrInfo * TII
Register const TargetRegisterInfo * TRI
Promote Memory to Register
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
FunctionPass class - This class is used to implement most global optimizations.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
unsigned getSubReg() const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Define
Register definition.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createAArch64MIPeepholeOptPass()
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
std::optional< UsedNZCV > examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > *CCUseInstrs=nullptr)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.