Go to the documentation of this file.
76 #define DEBUG_TYPE "si-fix-sgpr-copies"
79 "amdgpu-enable-merge-m0",
80 cl::desc(
"Merge and hoist M0 initializations"),
101 StringRef getPassName()
const override {
return "SI Fix SGPR copies"; }
114 "SI Fix SGPR copies",
false,
false)
119 char SIFixSGPRCopies::
ID = 0;
124 return new SIFixSGPRCopies();
131 if (!MO.isReg() || !MO.getReg().isVirtual())
140 static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
144 Register DstReg = Copy.getOperand(0).getReg();
145 Register SrcReg = Copy.getOperand(1).getReg();
149 :
TRI.getPhysRegClass(SrcReg);
156 :
TRI.getPhysRegClass(DstReg);
158 return std::make_pair(SrcRC, DstRC);
164 return SrcRC != &AMDGPU::VReg_1RegClass &&
TRI.isSGPRClass(DstRC) &&
165 TRI.hasVectorRegisters(SrcRC);
171 return DstRC != &AMDGPU::VReg_1RegClass &&
TRI.isSGPRClass(SrcRC) &&
172 TRI.hasVectorRegisters(DstRC);
179 auto &Src =
MI.getOperand(1);
186 const auto *
UseMI = MO.getParent();
195 !
TII->isOperandLegal(*
UseMI, OpIdx, &Src))
248 if (
SubReg != AMDGPU::NoSubRegister)
262 bool IsAGPR =
TRI->isAGPRClass(DstRC);
264 for (
unsigned I = 1,
N =
MI.getNumOperands();
I !=
N;
I += 2) {
266 unsigned SrcSubReg =
MI.getOperand(
I).getSubReg();
270 "Expected SGPR REG_SEQUENCE to only have SGPR inputs");
272 SrcRC =
TRI->getSubRegClass(SrcRC, SrcSubReg);
284 unsigned Opc = NewSrcRC == &AMDGPU::AGPR_32RegClass ?
285 AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::COPY;
292 MI.getOperand(
I).setReg(TmpReg);
304 if (Copy->getOpcode() != AMDGPU::COPY)
307 if (!
MoveImm->isMoveImmediate())
311 TII->getNamedOperand(*
MoveImm, AMDGPU::OpName::src0);
316 if (Copy->getOperand(0).getSubReg())
319 switch (
MoveImm->getOpcode()) {
322 case AMDGPU::V_MOV_B32_e32:
323 SMovOp = AMDGPU::S_MOV_B32;
325 case AMDGPU::V_MOV_B64_PSEUDO:
326 SMovOp = AMDGPU::S_MOV_B64;
333 template <
class UnaryPredicate>
343 while (!Worklist.empty()) {
383 while (
I !=
MBB->
end() &&
TII->isBasicBlockPrologue(*
I))
399 using InitListMap = std::map<unsigned, std::list<MachineInstr *>>;
406 bool Changed =
false;
410 for (
auto &MO :
MI.operands()) {
411 if ((MO.isReg() && ((MO.isDef() && MO.getReg() !=
Reg) || !MO.isDef())) ||
412 (!MO.isImm() && !MO.isReg()) || (MO.isImm() && Imm)) {
415 }
else if (MO.isImm())
419 Inits[Imm->
getImm()].push_front(&
MI);
421 Clobbers.push_back(&
MI);
424 for (
auto &
Init : Inits) {
425 auto &Defs =
Init.second;
427 for (
auto I1 = Defs.begin(),
E = Defs.end();
I1 !=
E; ) {
430 for (
auto I2 = std::next(
I1); I2 !=
E; ) {
443 bool MayClobberTo =
isReachable(Clobber, &*To, MBBTo, MDT);
444 if (!MayClobberFrom && !MayClobberTo)
446 if ((MayClobberFrom && !MayClobberTo) ||
447 (!MayClobberFrom && MayClobberTo))
453 return !((MBBFrom == MBBTo &&
461 return C.first !=
Init.first &&
467 if (!interferes(MI2, MI1)) {
477 if (!interferes(MI1, MI2)) {
495 if (!interferes(MI1,
I) && !interferes(MI2,
I)) {
499 <<
"and moving from "
516 for (
auto &
Init : Inits) {
517 auto &Defs =
Init.second;
518 auto I = Defs.begin();
519 while (
I != Defs.end()) {
520 if (MergedInstrs.
count(*
I)) {
521 (*I)->eraseFromParent();
529 for (
auto &
Init : Inits) {
530 auto &Defs =
Init.second;
531 for (
auto MI : Defs) {
532 auto MBB =
MI->getParent();
537 if (!
TII->isBasicBlockPrologue(*
B))
540 auto R = std::next(
MI->getReverseIterator());
541 const unsigned Threshold = 50;
543 for (
unsigned I = 0; R !=
B &&
I < Threshold; ++R, ++
I)
544 if (R->readsRegister(
Reg,
TRI) || R->definesRegister(
Reg,
TRI) ||
568 TRI =
ST.getRegisterInfo();
569 TII =
ST.getInstrInfo();
570 MDT = &getAnalysis<MachineDominatorTree>();
579 switch (
MI.getOpcode()) {
584 case AMDGPU::STRICT_WQM:
585 case AMDGPU::SOFT_WQM:
586 case AMDGPU::STRICT_WWM: {
599 TII->get(
ST.isWave32() ? AMDGPU::S_CSELECT_B32
600 : AMDGPU::S_CSELECT_B64),
604 I =
BuildMI(*
MI.getParent(), std::next(
I),
I->getDebugLoc(),
605 TII->get(AMDGPU::COPY), DstReg)
607 MI.eraseFromParent();
611 ST.isWave64() ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
616 MI.getDebugLoc(),
TII->get(Opcode))
620 MI.eraseFromParent();
630 if (DstReg ==
AMDGPU::M0 &&
TRI->hasVectorRegisters(SrcRC)) {
635 TII->get(AMDGPU::V_READFIRSTLANE_B32), TmpReg)
636 .
add(
MI.getOperand(1));
637 MI.getOperand(1).setReg(TmpReg);
647 if (NewBB && NewBB !=
MBB) {
653 assert((!NewBB || NewBB ==
I->getParent()) &&
654 "moveToVALU did not return the right basic block");
664 MI.getOperand(1).ChangeToImmediate(Imm);
665 MI.addImplicitDefUseOperands(MF);
666 MI.setDesc(
TII->get(SMovOp));
670 if (NewBB && NewBB !=
MBB) {
676 assert((!NewBB || NewBB ==
I->getParent()) &&
677 "moveToVALU did not return the right basic block");
686 if (NewBB && NewBB !=
MBB) {
692 assert((!NewBB || NewBB ==
I->getParent()) &&
693 "moveToVALU did not return the right basic block");
696 case AMDGPU::REG_SEQUENCE: {
697 if (
TRI->hasVectorRegisters(
TII->getOpRegClass(
MI, 0)) ||
706 if (NewBB && NewBB !=
MBB) {
712 assert((!NewBB || NewBB ==
I->getParent()) &&
713 "moveToVALU did not return the right basic block");
716 case AMDGPU::INSERT_SUBREG: {
721 if (
TRI->isSGPRClass(DstRC) &&
722 (
TRI->hasVectorRegisters(Src0RC) ||
723 TRI->hasVectorRegisters(Src1RC))) {
726 if (NewBB && NewBB !=
MBB) {
732 assert((!NewBB || NewBB ==
I->getParent()) &&
733 "moveToVALU did not return the right basic block");
737 case AMDGPU::V_WRITELANE_B32: {
740 if (
ST.getConstantBusLimit(
MI.getOpcode()) != 1)
768 if (MO->getReg().isVirtual()) {
773 MO->getReg() ==
Def.getReg() &&
774 MO->getSubReg() ==
Def.getSubReg()) {
776 if (Copied.
isImm() &&
809 unsigned numVGPRUses = 0;
810 bool AllAGPRUses =
true;
817 while (!worklist.
empty()) {
839 if (!
TRI->isSGPRReg(*
MRI,
Use.getReg()) &&
840 UseRC != &AMDGPU::VReg_1RegClass)
847 if (!
TRI->isSGPRClass(OpRC) && OpRC != &AMDGPU::VS_32RegClass &&
848 OpRC != &AMDGPU::VS_64RegClass) {
856 if (AllAGPRUses && numVGPRUses && !
TRI->isAGPRClass(RC0)) {
859 for (
unsigned I = 1,
N =
MI.getNumOperands();
I !=
N;
I += 2) {
866 bool hasVGPRInput =
false;
867 for (
unsigned i = 1;
i <
MI.getNumOperands();
i += 2) {
870 if (
TRI->isVectorRegister(*
MRI, InputReg)) {
874 TRI->getRegClassForReg(*
MRI, SrcReg);
875 if (
TRI->isSGPRClass(RC))
881 else if (
Def->isCopy() &&
882 TRI->isVectorRegister(*
MRI,
Def->getOperand(1).getReg())) {
895 Def->getOperand(1).ChangeToImmediate(Imm);
896 Def->addImplicitDefUseOperands(*MF);
897 Def->setDesc(
TII->get(SMovOp));
902 if ((!
TRI->isVectorRegister(*
MRI, PHIRes) &&
903 RC0 != &AMDGPU::VReg_1RegClass) &&
904 (hasVGPRInput || numVGPRUses > 1)) {
906 CreatedBB =
TII->moveToVALU(
MI);
910 TII->legalizeOperands(
MI, MDT);
914 while (!PHIOperands.
empty()) {
MachineBasicBlock * findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B)
findNearestCommonDominator - Find nearest common dominator basic block for basic block A and B.
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
Test if the given instruction should be considered a scheduling boundary.
bool hasProperty(Property P) const
pred_iterator pred_begin()
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
static bool isReachable(const MachineInstr *From, const MachineInstr *To, const MachineBasicBlock *CutOff, MachineDominatorTree &MDT)
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder & UseMI
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
FunctionPass * createSIFixSGPRCopiesPass()
bool properlyDominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const MachineInstrBuilder & add(const MachineOperand &MO) const
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Reg
All possible values of the reg field in the ModR/M byte.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
iterator_range< use_iterator > use_operands(Register Reg) const
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
LLVM_NODISCARD T pop_back_val()
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
std::pair< iterator, bool > insert(const ValueT &V)
unsigned const TargetRegisterInfo * TRI
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
BasicBlockListType::iterator iterator
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
unsigned getDefRegState(bool B)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
TargetInstrInfo - Interface to description of machine instruction set.
void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
(vector float) vec_cmpeq(*A, *B) C
const MachineOperand & getOperand(unsigned i) const
Represent the analysis usage information of a pass.
const MachineFunctionProperties & getProperties() const
Get the function properties.
iterator_range< reg_nodbg_iterator > reg_nodbg_operands(Register Reg) const
const HexagonInstrInfo * TII
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
MachineOperand class - Representation of each machine instruction operand.
static MachineBasicBlock::iterator getFirstNonPrologue(MachineBasicBlock *MBB, const TargetInstrInfo *TII)
unsigned M0(unsigned Val)
INITIALIZE_PASS_BEGIN(SIFixSGPRCopies, DEBUG_TYPE, "SI Fix SGPR copies", false, false) INITIALIZE_PASS_END(SIFixSGPRCopies
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bool empty() const
Determine if the SetVector is empty or not.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
void clearKillFlags(Register Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
Implements a dense probed hash-table based set.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
const TargetRegisterClass * getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Representation of each machine instruction.
use_instr_iterator use_instr_begin(Register RegNo) const
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
iterator_range< def_instr_iterator > def_instructions(Register Reg) const
initializer< Ty > init(const Ty &Val)
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
StandardInstrumentations SI(Debug, VerifyEach)
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Register getReg() const
getReg - Returns the register number.
iterator_range< pred_iterator > predecessors()
Class for arbitrary precision integers.
bool insert(const value_type &X)
Insert a new element into the SetVector.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
StringRef - Represent a constant reference to a string, i.e.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
static bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC, const TargetRegisterClass *DstRC, const SIRegisterInfo &TRI)
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
const MachineBasicBlock * getParent() const
unsigned const MachineRegisterInfo * MRI
Wrapper class representing virtual and physical registers.
unsigned getSubReg() const
static bool hoistAndMergeSGPRInits(unsigned Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo *TRI, MachineDominatorTree &MDT, const TargetInstrInfo *TII)
static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI, const SIRegisterInfo *TRI, const SIInstrInfo *TII, MachineRegisterInfo &MRI)
bool isRegSequence() const
static bool hasVectorOperands(const MachineInstr &MI, const SIRegisterInfo *TRI)
static bool tryChangeVGPRtoSGPRinCopy(MachineInstr &MI, const SIRegisterInfo *TRI, const SIInstrInfo *TII)
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Iterator for intrusive lists based on ilist_node.
static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy, const MachineInstr *MoveImm, const SIInstrInfo *TII, unsigned &SMovOp, int64_t &Imm)
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
static bool isVGPRToSGPRCopy(const TargetRegisterClass *SrcRC, const TargetRegisterClass *DstRC, const SIRegisterInfo &TRI)
bool hasOneUse(Register RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
MachineInstrBuilder MachineInstrBuilder & DefMI
static std::pair< const TargetRegisterClass *, const TargetRegisterClass * > getCopyRegClasses(const MachineInstr &Copy, const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI)
FunctionPass class - This class is used to implement most global optimizations.
@ Resolved
Queried, materialization begun.
AnalysisUsage & addRequired()
BlockVerifier::State From
bool searchPredecessors(const MachineBasicBlock *MBB, const MachineBasicBlock *CutOff, UnaryPredicate Predicate)
@ Kill
The last use of a register.
static cl::opt< bool > EnableM0Merge("amdgpu-enable-merge-m0", cl::desc("Merge and hoist M0 initializations"), cl::init(true))
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
A vector that has set insertion semantics.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_NODISCARD T pop_back_val()
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
A Use represents the edge between a Value definition and its users.