74#define DEBUG_TYPE "aarch64-mi-peephole-opt"
90 using OpcodePair = std::pair<unsigned, unsigned>;
92 using SplitAndOpcFunc =
93 std::function<std::optional<OpcodePair>(
T,
unsigned,
T &,
T &)>;
95 std::function<void(
MachineInstr &, OpcodePair,
unsigned,
unsigned,
112 template <
typename T>
114 SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr);
119 template <
typename T>
120 bool visitADDSUB(
unsigned PosOpc,
unsigned NegOpc,
MachineInstr &
MI);
121 template <
typename T>
122 bool visitADDSSUBS(OpcodePair PosOpcs, OpcodePair NegOpcs,
MachineInstr &
MI);
124 template <
typename T>
134 return "AArch64 MI Peephole Optimization pass";
144char AArch64MIPeepholeOpt::ID = 0;
149 "AArch64 MI Peephole Optimization",
false,
false)
153 T UImm =
static_cast<T>(Imm);
190bool AArch64MIPeepholeOpt::visitAND(
202 return splitTwoPartImm<T>(
205 T &Imm1) -> std::optional<OpcodePair> {
206 if (splitBitmaskImm(
Imm,
RegSize, Imm0, Imm1))
207 return std::make_pair(Opc, Opc);
229 if (
MI.getOperand(3).getImm() != 0)
232 if (
MI.getOperand(1).getReg() != AArch64::WZR)
249 if (SrcMI->
getOpcode() == TargetOpcode::COPY &&
256 if (RC != &AArch64::FPR32RegClass &&
257 ((RC != &AArch64::FPR64RegClass && RC != &AArch64::FPR128RegClass) ||
262 CpySrc =
MRI->createVirtualRegister(&AArch64::FPR32RegClass);
264 TII->get(TargetOpcode::COPY), CpySrc)
272 else if (SrcMI->
getOpcode() <= TargetOpcode::GENERIC_OP_END)
277 MRI->replaceRegWith(DefReg, SrcReg);
278 MRI->clearKillFlags(SrcReg);
280 MI.eraseFromParent();
293 if (!
MI.isRegTiedToDefOperand(1))
312 if ((SrcMI->
getOpcode() <= TargetOpcode::GENERIC_OP_END) ||
313 !AArch64::GPR64allRegClass.hasSubClassEq(RC))
319 TII->get(TargetOpcode::SUBREG_TO_REG), DstReg)
321 .
add(
MI.getOperand(2))
322 .
add(
MI.getOperand(3));
325 MI.eraseFromParent();
334 if ((Imm & 0xfff000) == 0 || (Imm & 0xfff) == 0 ||
335 (Imm & ~
static_cast<T>(0xffffff)) != 0)
341 if (
Insn.size() == 1)
345 Imm0 = (Imm >> 12) & 0xfff;
351bool AArch64MIPeepholeOpt::visitADDSUB(
369 if (
MI.getOperand(1).getReg() == AArch64::XZR ||
370 MI.getOperand(1).getReg() == AArch64::WZR)
373 return splitTwoPartImm<T>(
375 [PosOpc, NegOpc](
T Imm,
unsigned RegSize,
T &Imm0,
376 T &Imm1) -> std::optional<OpcodePair> {
378 return std::make_pair(PosOpc, PosOpc);
380 return std::make_pair(NegOpc, NegOpc);
400bool AArch64MIPeepholeOpt::visitADDSSUBS(
405 if (
MI.getOperand(1).getReg() == AArch64::XZR ||
406 MI.getOperand(1).getReg() == AArch64::WZR)
409 return splitTwoPartImm<T>(
413 T &Imm1) -> std::optional<OpcodePair> {
425 if (!NZCVUsed || NZCVUsed->C || NZCVUsed->V)
453 if (L && !
L->isLoopInvariant(
MI))
457 MovMI =
MRI->getUniqueVRegDef(
MI.getOperand(2).getReg());
462 SubregToRegMI =
nullptr;
463 if (MovMI->
getOpcode() == TargetOpcode::SUBREG_TO_REG) {
464 SubregToRegMI = MovMI;
470 if (MovMI->
getOpcode() != AArch64::MOVi32imm &&
471 MovMI->
getOpcode() != AArch64::MOVi64imm)
486bool AArch64MIPeepholeOpt::splitTwoPartImm(
488 SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr) {
491 "Invalid RegSize for legal immediate peephole optimization");
495 if (!checkMovImmInstr(
MI, MovMI, SubregToRegMI))
507 if (
auto R = SplitAndOpc(Imm,
RegSize, Imm0, Imm1))
520 TII->getRegClass(
TII->get(Opcode.first), 0,
TRI, *MF);
522 TII->getRegClass(
TII->get(Opcode.first), 1,
TRI, *MF);
524 (Opcode.first == Opcode.second)
526 :
TII->getRegClass(
TII->get(Opcode.second), 0,
TRI, *MF);
528 (Opcode.first == Opcode.second)
529 ? FirstInstrOperandRC
530 :
TII->getRegClass(
TII->get(Opcode.second), 1,
TRI, *MF);
535 Register NewTmpReg =
MRI->createVirtualRegister(FirstInstrDstRC);
539 ?
MRI->createVirtualRegister(SecondInstrDstRC)
543 MRI->constrainRegClass(SrcReg, FirstInstrOperandRC);
544 MRI->constrainRegClass(NewTmpReg, SecondInstrOperandRC);
545 if (DstReg != NewDstReg)
546 MRI->constrainRegClass(NewDstReg,
MRI->getRegClass(DstReg));
549 BuildInstr(
MI, Opcode, Imm0, Imm1, SrcReg, NewTmpReg, NewDstReg);
553 if (DstReg != NewDstReg) {
554 MRI->replaceRegWith(DstReg, NewDstReg);
555 MI.getOperand(0).setReg(DstReg);
559 MI.eraseFromParent();
567bool AArch64MIPeepholeOpt::visitINSviGPR(
MachineInstr &
MI,
unsigned Opc) {
584 if (!SrcMI || SrcMI->
getOpcode() != TargetOpcode::COPY)
591 &AArch64::FPR128RegClass) {
601 .
add(
MI.getOperand(1))
602 .
add(
MI.getOperand(2))
608 MI.eraseFromParent();
616 if (!
MI->getOperand(0).isReg() || !
MI->getOperand(0).isDef())
619 if (RC != &AArch64::FPR64RegClass)
621 return MI->getOpcode() > TargetOpcode::GENERIC_OP_END;
633 if (Low64MI->
getOpcode() != AArch64::INSERT_SUBREG)
653 if (!High64MI || High64MI->
getOpcode() != AArch64::INSERT_SUBREG)
656 if (High64MI && High64MI->
getOpcode() == TargetOpcode::COPY)
658 if (!High64MI || (High64MI->
getOpcode() != AArch64::MOVID &&
659 High64MI->
getOpcode() != AArch64::MOVIv2d_ns))
667 MRI->constrainRegClass(NewDef,
MRI->getRegClass(OldDef));
668 MRI->replaceRegWith(OldDef, NewDef);
669 MI.eraseFromParent();
684 MRI->clearKillFlags(OldDef);
685 MRI->clearKillFlags(NewDef);
686 MRI->constrainRegClass(NewDef,
MRI->getRegClass(OldDef));
687 MRI->replaceRegWith(OldDef, NewDef);
688 MI.eraseFromParent();
700 MLI = &getAnalysis<MachineLoopInfo>();
703 assert(
MRI->isSSA() &&
"Expected to be run on SSA form!");
705 bool Changed =
false;
709 switch (
MI.getOpcode()) {
712 case AArch64::INSERT_SUBREG:
713 Changed |= visitINSERT(
MI);
715 case AArch64::ANDWrr:
716 Changed |= visitAND<uint32_t>(AArch64::ANDWri,
MI);
718 case AArch64::ANDXrr:
719 Changed |= visitAND<uint64_t>(AArch64::ANDXri,
MI);
721 case AArch64::ORRWrs:
722 Changed |= visitORR(
MI);
724 case AArch64::ADDWrr:
725 Changed |= visitADDSUB<uint32_t>(AArch64::ADDWri, AArch64::SUBWri,
MI);
727 case AArch64::SUBWrr:
728 Changed |= visitADDSUB<uint32_t>(AArch64::SUBWri, AArch64::ADDWri,
MI);
730 case AArch64::ADDXrr:
731 Changed |= visitADDSUB<uint64_t>(AArch64::ADDXri, AArch64::SUBXri,
MI);
733 case AArch64::SUBXrr:
734 Changed |= visitADDSUB<uint64_t>(AArch64::SUBXri, AArch64::ADDXri,
MI);
736 case AArch64::ADDSWrr:
738 visitADDSSUBS<uint32_t>({AArch64::ADDWri, AArch64::ADDSWri},
739 {AArch64::SUBWri, AArch64::SUBSWri},
MI);
741 case AArch64::SUBSWrr:
743 visitADDSSUBS<uint32_t>({AArch64::SUBWri, AArch64::SUBSWri},
744 {AArch64::ADDWri, AArch64::ADDSWri},
MI);
746 case AArch64::ADDSXrr:
748 visitADDSSUBS<uint64_t>({AArch64::ADDXri, AArch64::ADDSXri},
749 {AArch64::SUBXri, AArch64::SUBSXri},
MI);
751 case AArch64::SUBSXrr:
753 visitADDSSUBS<uint64_t>({AArch64::SUBXri, AArch64::SUBSXri},
754 {AArch64::ADDXri, AArch64::ADDSXri},
MI);
756 case AArch64::INSvi64gpr:
757 Changed |= visitINSviGPR(
MI, AArch64::INSvi64lane);
759 case AArch64::INSvi32gpr:
760 Changed |= visitINSviGPR(
MI, AArch64::INSvi32lane);
762 case AArch64::INSvi16gpr:
763 Changed |= visitINSviGPR(
MI, AArch64::INSvi16lane);
765 case AArch64::INSvi8gpr:
766 Changed |= visitINSviGPR(
MI, AArch64::INSvi8lane);
768 case AArch64::INSvi64lane:
769 Changed |= visitINSvi64lane(
MI);
771 case AArch64::FMOVDr:
772 Changed |= visitFMOVDr(
MI);
782 return new AArch64MIPeepholeOpt();
unsigned const MachineRegisterInfo * MRI
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
static bool is64bitDefwithZeroHigh64bit(MachineInstr *MI, MachineRegisterInfo *MRI)
static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
const HexagonInstrInfo * TII
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
FunctionPass class - This class is used to implement most global optimizations.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
unsigned getSubReg() const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createAArch64MIPeepholeOptPass()
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
std::optional< UsedNZCV > examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > *CCUseInstrs=nullptr)
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
void initializeAArch64MIPeepholeOptPass(PassRegistry &)