25#define DEBUG_TYPE "amdgpu-global-isel-divergence-lowering"
44 return "AMDGPU GlobalISel divergence lowering";
83DivergenceLoweringHelper::DivergenceLoweringHelper(
89void DivergenceLoweringHelper::markAsLaneMask(
Register DstReg)
const {
92 if (
MRI->getRegClassOrNull(DstReg)) {
93 if (
MRI->constrainRegClass(DstReg, ST->getBoolRC()))
98 MRI->setRegClass(DstReg, ST->getBoolRC());
101void DivergenceLoweringHelper::getCandidatesForLowering(
115void DivergenceLoweringHelper::collectIncomingValuesFromPhi(
117 for (
unsigned i = 1; i <
MI->getNumOperands(); i += 2) {
123void DivergenceLoweringHelper::replaceDstReg(
Register NewReg,
Register OldReg,
136 B.buildCopy(LaneMask, Reg);
163void DivergenceLoweringHelper::buildMergeLaneMasks(
169 Register PrevRegCopy = buildRegCopyToLaneMask(PrevReg);
170 Register CurRegCopy = buildRegCopyToLaneMask(CurReg);
174 B.setInsertPt(
MBB,
I);
175 B.buildInstr(AndN2Op, {PrevMaskedReg}, {PrevRegCopy, ExecReg});
176 B.buildInstr(AndOp, {CurMaskedReg}, {ExecReg, CurRegCopy});
177 B.buildInstr(OrOp, {DstReg}, {PrevMaskedReg, CurMaskedReg});
183void DivergenceLoweringHelper::constrainAsLaneMask(
Incoming &In) {
184 B.setInsertPt(*In.Block, In.Block->getFirstTerminator());
187 MRI->setRegClass(Copy.getReg(0), ST->getBoolRC());
188 In.Reg = Copy.getReg(0);
194 "AMDGPU GlobalISel divergence lowering",
false,
false)
201char AMDGPUGlobalISelDivergenceLowering::
ID = 0;
204 AMDGPUGlobalISelDivergenceLowering::
ID;
207 return new AMDGPUGlobalISelDivergenceLowering();
210bool AMDGPUGlobalISelDivergenceLowering::runOnMachineFunction(
213 getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
215 getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
217 getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo();
219 DivergenceLoweringHelper Helper(&MF, &DT, &PDT, &MUI);
221 return Helper.lowerPhis();
unsigned const MachineRegisterInfo * MRI
AMDGPU GlobalISel divergence lowering
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const HexagonInstrInfo * TII
This file declares the MachineIRBuilder class.
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition of the PhiLoweringHelper class that implements lane mask merging algorithm for d...
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
FunctionPass class - This class is used to implement most global optimizations.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
iterator SkipPHIsAndLabels(iterator I)
Return the first instruction in MBB after I that is not a PHI or a label.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
Helper class to build MachineInstr.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
virtual void getCandidatesForLowering(SmallVectorImpl< MachineInstr * > &Vreg1Phis) const =0
virtual void constrainAsLaneMask(Incoming &In)=0
virtual void collectIncomingValuesFromPhi(const MachineInstr *MI, SmallVectorImpl< Incoming > &Incomings) const =0
virtual void markAsLaneMask(Register DstReg) const =0
virtual void buildMergeLaneMasks(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, Register PrevReg, Register CurReg)=0
virtual void replaceDstReg(Register NewReg, Register OldReg, MachineBasicBlock *MBB)=0
Wrapper class representing virtual and physical registers.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
StringRef - Represent a constant reference to a string, i.e.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
void initializeAMDGPUGlobalISelDivergenceLoweringPass(PassRegistry &)
char & AMDGPUGlobalISelDivergenceLoweringID
Register createLaneMaskReg(MachineRegisterInfo *MRI, MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs)
FunctionPass * createAMDGPUGlobalISelDivergenceLoweringPass()
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...