48#define DEBUG_TYPE "gcn-dpp-combine"
50STATISTIC(NumDPPMovsCombined,
"Number of DPP moves combined.");
66 bool IsShrinkable)
const;
70 bool IsShrinkable)
const;
75 int64_t Mask = -1)
const;
97 .
set(MachineFunctionProperties::Property::IsSSA);
101 int getDPPOp(
unsigned Op,
bool IsShrinkable)
const;
109char GCNDPPCombine::
ID = 0;
114 return new GCNDPPCombine();
118 unsigned Op =
MI.getOpcode();
119 if (!
TII->isVOP3(
Op)) {
122 if (!
TII->hasVALU32BitEncoding(
Op)) {
130 if (
const auto *SDst =
TII->getNamedOperand(
MI, AMDGPU::OpName::sdst)) {
135 if (!
MRI->use_nodbg_empty(SDst->getReg()))
140 if (!hasNoImmOrEqual(
MI, AMDGPU::OpName::src0_modifiers, 0, Mask) ||
141 !hasNoImmOrEqual(
MI, AMDGPU::OpName::src1_modifiers, 0, Mask) ||
142 !hasNoImmOrEqual(
MI, AMDGPU::OpName::clamp, 0) ||
143 !hasNoImmOrEqual(
MI, AMDGPU::OpName::omod, 0)) {
150int GCNDPPCombine::getDPPOp(
unsigned Op,
bool IsShrinkable)
const {
157 if (DPP32 != -1 &&
TII->pseudoToMCOpcode(DPP32) != -1)
160 if (
ST->hasVOP3DPP())
162 if (DPP64 != -1 &&
TII->pseudoToMCOpcode(DPP64) != -1)
176 switch(
Def->getOpcode()) {
178 case AMDGPU::IMPLICIT_DEF:
181 case AMDGPU::V_MOV_B32_e32:
182 case AMDGPU::V_MOV_B64_PSEUDO:
183 case AMDGPU::V_MOV_B64_e32:
184 case AMDGPU::V_MOV_B64_e64: {
185 auto &Op1 =
Def->getOperand(1);
196 int16_t RegClass =
MI.getDesc().operands()[
Idx].RegClass;
201 return TRI->getRegSizeInBits(*
TRI->getRegClass(RegClass));
208 bool IsShrinkable)
const {
210 MovMI.
getOpcode() == AMDGPU::V_MOV_B64_dpp ||
211 MovMI.
getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
213 bool HasVOP3DPP =
ST->hasVOP3DPP();
215 auto DPPOp = getDPPOp(OrigOp, IsShrinkable);
222 auto *RowMaskOpnd =
TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask);
223 assert(RowMaskOpnd && RowMaskOpnd->isImm());
224 auto *BankMaskOpnd =
TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask);
225 assert(BankMaskOpnd && BankMaskOpnd->isImm());
226 const bool MaskAllLanes =
227 RowMaskOpnd->getImm() == 0xF && BankMaskOpnd->getImm() == 0xF;
230 !(
TII->isVOPC(DPPOp) || (
TII->isVOP3(DPPOp) && OrigOpE32 != -1 &&
231 TII->isVOPC(OrigOpE32)))) &&
232 "VOPC cannot form DPP unless mask is full");
241 if (
auto *Dst =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst)) {
245 if (
auto *SDst =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::sdst)) {
246 if (
TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, SDst)) {
255 assert(OldIdx == NumOperands);
259 TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst)->getReg()),
265 }
else if (
TII->isVOPC(DPPOp) || (
TII->isVOP3(DPPOp) && OrigOpE32 != -1 &&
266 TII->isVOPC(OrigOpE32))) {
271 LLVM_DEBUG(
dbgs() <<
" failed: no old operand in DPP instruction,"
277 if (
auto *Mod0 =
TII->getNamedOperand(OrigMI,
278 AMDGPU::OpName::src0_modifiers)) {
280 AMDGPU::OpName::src0_modifiers));
283 DPPInst.addImm(Mod0->getImm());
289 auto *Src0 =
TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
291 int Src0Idx = NumOperands;
292 if (!
TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src0)) {
298 DPPInst->getOperand(NumOperands).setIsKill(
false);
301 if (
auto *Mod1 =
TII->getNamedOperand(OrigMI,
302 AMDGPU::OpName::src1_modifiers)) {
304 AMDGPU::OpName::src1_modifiers));
307 DPPInst.addImm(Mod1->getImm());
313 auto *Src1 =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
315 int OpNum = NumOperands;
319 if (!
ST->hasDPPSrc1SGPR()) {
322 "Src0 and Src1 operands should have the same size");
325 if (!
TII->isOperandLegal(*DPPInst.getInstr(), OpNum, Src1)) {
334 TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2_modifiers)) {
339 DPPInst.addImm(Mod2->getImm());
342 auto *Src2 =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2);
344 if (!
TII->getNamedOperand(*DPPInst.getInstr(), AMDGPU::OpName::src2) ||
345 !
TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src2)) {
354 auto *ClampOpr =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::clamp);
356 DPPInst.addImm(ClampOpr->getImm());
358 auto *VdstInOpr =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst_in);
361 DPPInst.add(*VdstInOpr);
363 auto *OmodOpr =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::omod);
365 DPPInst.addImm(OmodOpr->getImm());
370 TII->getNamedOperand(OrigMI, AMDGPU::OpName::op_sel)) {
371 auto OpSel = OpSelOpr->getImm();
378 DPPInst.addImm(OpSel);
380 if (
auto *OpSelHiOpr =
381 TII->getNamedOperand(OrigMI, AMDGPU::OpName::op_sel_hi)) {
382 auto OpSelHi = OpSelHiOpr->getImm();
385 assert(Src2 &&
"Expected vop3p with 3 operands");
387 LLVM_DEBUG(
dbgs() <<
" failed: op_sel_hi must be all set to one\n");
392 DPPInst.addImm(OpSelHi);
394 auto *NegOpr =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::neg_lo);
396 DPPInst.addImm(NegOpr->getImm());
398 auto *NegHiOpr =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::neg_hi);
400 DPPInst.addImm(NegHiOpr->getImm());
403 DPPInst.add(*
TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl));
404 DPPInst.add(*
TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask));
405 DPPInst.add(*
TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask));
406 DPPInst.addImm(CombBCZ ? 1 : 0);
410 DPPInst.getInstr()->eraseFromParent();
414 return DPPInst.getInstr();
421 case AMDGPU::V_ADD_U32_e32:
422 case AMDGPU::V_ADD_U32_e64:
423 case AMDGPU::V_ADD_CO_U32_e32:
424 case AMDGPU::V_ADD_CO_U32_e64:
425 case AMDGPU::V_OR_B32_e32:
426 case AMDGPU::V_OR_B32_e64:
427 case AMDGPU::V_SUBREV_U32_e32:
428 case AMDGPU::V_SUBREV_U32_e64:
429 case AMDGPU::V_SUBREV_CO_U32_e32:
430 case AMDGPU::V_SUBREV_CO_U32_e64:
431 case AMDGPU::V_MAX_U32_e32:
432 case AMDGPU::V_MAX_U32_e64:
433 case AMDGPU::V_XOR_B32_e32:
434 case AMDGPU::V_XOR_B32_e64:
435 if (OldOpnd->
getImm() == 0)
438 case AMDGPU::V_AND_B32_e32:
439 case AMDGPU::V_AND_B32_e64:
440 case AMDGPU::V_MIN_U32_e32:
441 case AMDGPU::V_MIN_U32_e64:
443 std::numeric_limits<uint32_t>::max())
446 case AMDGPU::V_MIN_I32_e32:
447 case AMDGPU::V_MIN_I32_e64:
448 if (
static_cast<int32_t
>(OldOpnd->
getImm()) ==
449 std::numeric_limits<int32_t>::max())
452 case AMDGPU::V_MAX_I32_e32:
453 case AMDGPU::V_MAX_I32_e64:
454 if (
static_cast<int32_t
>(OldOpnd->
getImm()) ==
455 std::numeric_limits<int32_t>::min())
458 case AMDGPU::V_MUL_I32_I24_e32:
459 case AMDGPU::V_MUL_I32_I24_e64:
460 case AMDGPU::V_MUL_U32_U24_e32:
461 case AMDGPU::V_MUL_U32_U24_e64:
462 if (OldOpnd->
getImm() == 1)
471 MachineOperand *OldOpndValue,
bool CombBCZ,
bool IsShrinkable)
const {
473 if (!CombBCZ && OldOpndValue && OldOpndValue->
isImm()) {
474 auto *Src1 =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
475 if (!Src1 || !Src1->isReg()) {
476 LLVM_DEBUG(
dbgs() <<
" failed: no src1 or it isn't a register\n");
480 LLVM_DEBUG(
dbgs() <<
" failed: old immediate isn't an identity\n");
484 auto MovDst =
TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst);
491 return createDPPInst(OrigMI, MovMI, CombOldVGPR, CombBCZ, IsShrinkable);
496bool GCNDPPCombine::hasNoImmOrEqual(
MachineInstr &
MI,
unsigned OpndName,
497 int64_t
Value, int64_t Mask)
const {
498 auto *
Imm =
TII->getNamedOperand(
MI, OpndName);
503 return (
Imm->getImm() & Mask) ==
Value;
506bool GCNDPPCombine::combineDPPMov(
MachineInstr &MovMI)
const {
508 MovMI.
getOpcode() == AMDGPU::V_MOV_B64_dpp ||
509 MovMI.
getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
512 auto *DstOpnd =
TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst);
513 assert(DstOpnd && DstOpnd->isReg());
514 auto DPPMovReg = DstOpnd->getReg();
515 if (DPPMovReg.isPhysical()) {
525 if (MovMI.
getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO ||
526 MovMI.
getOpcode() == AMDGPU::V_MOV_B64_dpp) {
527 auto *
DppCtrl =
TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl);
537 auto *RowMaskOpnd =
TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask);
538 assert(RowMaskOpnd && RowMaskOpnd->isImm());
539 auto *BankMaskOpnd =
TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask);
540 assert(BankMaskOpnd && BankMaskOpnd->isImm());
541 const bool MaskAllLanes = RowMaskOpnd->getImm() == 0xF &&
542 BankMaskOpnd->getImm() == 0xF;
544 auto *BCZOpnd =
TII->getNamedOperand(MovMI, AMDGPU::OpName::bound_ctrl);
545 assert(BCZOpnd && BCZOpnd->isImm());
546 bool BoundCtrlZero = BCZOpnd->getImm();
548 auto *OldOpnd =
TII->getNamedOperand(MovMI, AMDGPU::OpName::old);
549 auto *SrcOpnd =
TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
551 assert(SrcOpnd && SrcOpnd->isReg());
557 auto *
const OldOpndValue = getOldOpndValue(*OldOpnd);
562 assert(!OldOpndValue || OldOpndValue->
isImm() || OldOpndValue == OldOpnd);
564 bool CombBCZ =
false;
566 if (MaskAllLanes && BoundCtrlZero) {
569 if (!OldOpndValue || !OldOpndValue->
isImm()) {
574 if (OldOpndValue->
getImm() == 0) {
579 }
else if (BoundCtrlZero) {
582 " failed: old!=0 and bctrl:0 and not all lanes isn't combinable\n");
591 dbgs() << *OldOpndValue;
592 dbgs() <<
", bound_ctrl=" << CombBCZ <<
'\n');
598 if (CombBCZ && OldOpndValue) {
601 MRI->createVirtualRegister(RC));
603 TII->get(AMDGPU::IMPLICIT_DEF), CombOldVGPR.
Reg);
604 DPPMIs.push_back(UndefInst.getInstr());
607 OrigMIs.push_back(&MovMI);
608 bool Rollback =
true;
611 for (
auto &
Use :
MRI->use_nodbg_operands(DPPMovReg)) {
615 while (!
Uses.empty()) {
619 auto &OrigMI = *
Use->getParent();
624 "There should not be e32 True16 instructions pre-RA");
625 if (OrigOp == AMDGPU::REG_SEQUENCE) {
627 unsigned FwdSubReg = 0;
636 for (OpNo = 1; OpNo <
E; OpNo += 2) {
646 for (
auto &
Op :
MRI->use_nodbg_operands(FwdReg)) {
647 if (
Op.getSubReg() == FwdSubReg)
650 RegSeqWithOpNos[&OrigMI].push_back(OpNo);
654 bool IsShrinkable = isShrinkable(OrigMI);
655 if (!(IsShrinkable ||
656 ((
TII->isVOP3P(OrigOp) ||
TII->isVOPC(OrigOp) ||
657 TII->isVOP3(OrigOp)) &&
659 TII->isVOP1(OrigOp) ||
TII->isVOP2(OrigOp))) {
668 auto *Src0 =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0);
669 auto *Src1 =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
675 auto *Src2 =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2);
676 assert(Src0 &&
"Src1 without Src0?");
677 if ((
Use == Src0 && ((Src1 && Src1->isIdenticalTo(*Src0)) ||
678 (Src2 && Src2->isIdenticalTo(*Src0)))) ||
679 (
Use == Src1 && (Src1->isIdenticalTo(*Src0) ||
680 (Src2 && Src2->isIdenticalTo(*Src1))))) {
684 <<
" failed: DPP register is used more than once per instruction\n");
690 if (
auto *DPPInst = createDPPInst(OrigMI, MovMI, CombOldVGPR,
691 OldOpndValue, CombBCZ, IsShrinkable)) {
692 DPPMIs.push_back(DPPInst);
699 BB->
insert(OrigMI, NewMI);
700 if (
TII->commuteInstruction(*NewMI)) {
703 createDPPInst(*NewMI, MovMI, CombOldVGPR, OldOpndValue, CombBCZ,
705 DPPMIs.push_back(DPPInst);
710 NewMI->eraseFromParent();
714 OrigMIs.push_back(&OrigMI);
717 Rollback |= !
Uses.empty();
719 for (
auto *
MI : *(Rollback? &DPPMIs : &OrigMIs))
720 MI->eraseFromParent();
723 for (
auto &S : RegSeqWithOpNos) {
724 if (
MRI->use_nodbg_empty(S.first->getOperand(0).getReg())) {
725 S.first->eraseFromParent();
728 while (!S.second.empty())
729 S.first->getOperand(S.second.pop_back_val()).setIsUndef();
742 TII =
ST->getInstrInfo();
744 bool Changed =
false;
745 for (
auto &
MBB : MF) {
747 if (
MI.getOpcode() == AMDGPU::V_MOV_B32_dpp && combineDPPMov(
MI)) {
749 ++NumDPPMovsCombined;
750 }
else if (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO ||
751 MI.getOpcode() == AMDGPU::V_MOV_B64_dpp) {
752 if (
ST->hasDPALU_DPP() && combineDPPMov(
MI)) {
754 ++NumDPPMovsCombined;
758 if (M && combineDPPMov(*M))
759 ++NumDPPMovsCombined;
unsigned const MachineRegisterInfo * MRI
Provides AMDGPU specific target descriptions.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static bool isIdentityValue(unsigned OrigMIOp, MachineOperand *OldOpnd)
static unsigned getOperandSize(MachineInstr &MI, unsigned Idx, MachineRegisterInfo &MRI)
Rewrite Partial Register Uses
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
TargetInstrInfo::RegSubRegPair RegSubRegPair
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
This class represents an Operation in the Expression.
FunctionPass class - This class is used to implement most global optimizations.
unsigned getSize(const MachineInstr &MI) const
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register.
bool isCommutable(QueryType Type=IgnoreBundle) const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z,...
void insert(mop_iterator InsertBefore, ArrayRef< MachineOperand > Ops)
Inserts Ops BEFORE It. Can untie/retie tied operands.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
LLVM_READONLY int getVOPe32(uint16_t Opcode)
LLVM_READNONE bool isLegalDPALU_DPPControl(unsigned DC)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
LLVM_READONLY int getDPPOp32(uint16_t Opcode)
bool isTrue16Inst(unsigned Opc)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READONLY int getDPPOp64(uint16_t Opcode)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Undef
Value of the register doesn't matter.
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto reverse(ContainerTy &&C)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
bool isOfRegClass(const TargetInstrInfo::RegSubRegPair &P, const TargetRegisterClass &TRC, MachineRegisterInfo &MRI)
Returns true if a reg:subreg pair P has a TRC class.
void initializeGCNDPPCombinePass(PassRegistry &)
FunctionPass * createGCNDPPCombinePass()
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
A pair composed of a register and a sub-register index.