26#define DEBUG_TYPE "amdgpu-global-isel-divergence-lowering"
42 return "AMDGPU GlobalISel divergence lowering";
66 void markAsLaneMask(
Register DstReg)
const override;
67 void getCandidatesForLowering(
69 void collectIncomingValuesFromPhi(
78 void constrainAsLaneMask(
Incoming &In)
override;
80 bool lowerTemporalDivergence();
81 bool lowerTemporalDivergenceI1();
84DivergenceLoweringHelper::DivergenceLoweringHelper(
90void DivergenceLoweringHelper::markAsLaneMask(
Register DstReg)
const {
93 if (
MRI->getRegClassOrNull(DstReg)) {
94 if (
MRI->constrainRegClass(DstReg, ST->getBoolRC()))
99 MRI->setRegClass(DstReg, ST->getBoolRC());
102void DivergenceLoweringHelper::getCandidatesForLowering(
111 if (
MI.getOpcode() != TargetOpcode::G_PHI)
120void DivergenceLoweringHelper::collectIncomingValuesFromPhi(
122 for (
unsigned i = 1; i <
MI->getNumOperands(); i += 2) {
128void DivergenceLoweringHelper::replaceDstReg(
Register NewReg,
Register OldReg,
140 B.setInsertPt(*
MBB,
MBB->SkipPHIsAndLabels(std::next(Instr->getIterator())));
141 B.buildCopy(LaneMask,
Reg);
168void DivergenceLoweringHelper::buildMergeLaneMasks(
174 Register PrevRegCopy = buildRegCopyToLaneMask(PrevReg);
175 Register CurRegCopy = buildRegCopyToLaneMask(CurReg);
179 B.setInsertPt(
MBB,
I);
180 B.buildInstr(AndN2Op, {PrevMaskedReg}, {PrevRegCopy, ExecReg});
181 B.buildInstr(AndOp, {CurMaskedReg}, {ExecReg, CurRegCopy});
182 B.buildInstr(OrOp, {DstReg}, {PrevMaskedReg, CurMaskedReg});
188void DivergenceLoweringHelper::constrainAsLaneMask(
Incoming &In) {
189 B.setInsertPt(*In.Block, In.Block->getFirstTerminator());
192 MRI->setRegClass(Copy.getReg(0), ST->getBoolRC());
193 In.Reg = Copy.getReg(0);
199 if (
Op.isReg() &&
Op.getReg() ==
Reg)
204bool DivergenceLoweringHelper::lowerTemporalDivergence() {
215 replaceUsesOfRegInInstWith(
Reg, UseInst, CachedTDCopy);
224 B.buildInstr(AMDGPU::COPY, {VgprReg}, {
Reg})
227 replaceUsesOfRegInInstWith(
Reg, UseInst, VgprReg);
228 TDCache[
Reg] = VgprReg;
233bool DivergenceLoweringHelper::lowerTemporalDivergenceI1() {
235 initializeLaneMaskRegisterAttributes(BoolS1);
246 auto &CycleMergedMask = LRCCacheIter->getSecond();
248 if (RegNotCached || LRC->contains(CachedLRC)) {
253 for (
auto &LRCCacheEntry : LRCCache) {
255 auto &CycleMergedMask = LRCCacheEntry.getSecond();
258 Register MergedMask =
MRI->createVirtualRegister(BoolS1);
264 for (
auto Entry :
Cycle->getEntries()) {
266 if (!
Cycle->contains(Pred)) {
267 B.setInsertPt(*Pred, Pred->getFirstTerminator());
268 auto ImplDef =
B.buildInstr(AMDGPU::IMPLICIT_DEF, {BoolS1}, {});
274 buildMergeLaneMasks(*
MBB,
MBB->getFirstTerminator(), {}, MergedMask,
277 CycleMergedMask.second = MergedMask;
284 replaceUsesOfRegInInstWith(
Reg, UseInst, LRCCache.
lookup(
Reg).second);
293 "AMDGPU GlobalISel divergence lowering",
false,
false)
300char AMDGPUGlobalISelDivergenceLowering::
ID = 0;
303 AMDGPUGlobalISelDivergenceLowering::
ID;
306 return new AMDGPUGlobalISelDivergenceLowering();
309bool AMDGPUGlobalISelDivergenceLowering::runOnMachineFunction(
312 getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
314 getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
316 getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo();
318 DivergenceLoweringHelper Helper(&MF, &DT, &PDT, &MUI);
328 Changed |= Helper.lowerTemporalDivergence();
331 Changed |= Helper.lowerTemporalDivergenceI1();
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const HexagonInstrInfo * TII
This file declares the MachineIRBuilder class.
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Interface definition of the PhiLoweringHelper class that implements lane mask merging algorithm for d...
bool isS32S64LaneMask(Register Reg) const
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
FunctionPass class - This class is used to implement most global optimizations.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
iterator_range< pred_iterator > predecessors()
MachineInstrBundleIterator< MachineInstr > iterator
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Helper class to build MachineInstr.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
MachineOperand class - Representation of each machine instruction operand.
MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...
MachineSSAUpdater - This class updates SSA form for a set of virtual registers defined in multiple bl...
Wrapper class representing virtual and physical registers.
Helper class for SSA formation on a set of values defined in multiple blocks.
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
Value * GetValueInMiddleOfBlock(BasicBlock *BB)
Construct SSA form, materializing a value that is live in the middle of the specified block.
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
StringRef - Represent a constant reference to a string, i.e.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
GenericUniformityInfo< MachineSSAContext > MachineUniformityInfo
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
char & AMDGPUGlobalISelDivergenceLoweringID
Register createLaneMaskReg(MachineRegisterInfo *MRI, MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs)
DWARFExpression::Operation Op
FunctionPass * createAMDGPUGlobalISelDivergenceLoweringPass()
MachineCycleInfo::CycleT MachineCycle
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
All attributes(register class or bank and low-level type) a virtual register can have.