30#define DEBUG_TYPE "si-i1-copies"
83 bool lowerCopiesFromI1();
84 bool lowerCopiesToI1();
85 bool cleanConstrainRegs(
bool Changed);
87 return Reg.isVirtual() &&
MRI->getRegClass(Reg) == &AMDGPU::VReg_1RegClass;
96bool Vreg1LoweringHelper::cleanConstrainRegs(
bool Changed) {
99 MRI->constrainRegClass(Reg, &AMDGPU::SReg_1_XEXECRegClass);
100 ConstrainRegs.clear();
127class PhiIncomingAnalysis {
145 return ReachableMap.
find(&
MBB)->second;
152 ReachableMap.
clear();
153 ReachableOrdered.
clear();
154 Predecessors.
clear();
163 if (
MBB == &DefBlock) {
164 ReachableMap[&DefBlock] =
true;
177 while (!
Stack.empty()) {
187 bool HaveReachablePred =
false;
189 if (ReachableMap.
count(Pred)) {
190 HaveReachablePred =
true;
192 Stack.push_back(Pred);
195 if (!HaveReachablePred)
196 ReachableMap[
MBB] =
true;
197 if (HaveReachablePred) {
259 unsigned FoundLoopLevel = ~0
u;
267 : DT(DT), PDT(PDT) {}
271 CommonDominators.
clear();
274 VisitedPostDom =
nullptr;
275 FoundLoopLevel = ~0
u;
291 while (PDNode->
getBlock() != PostDom) {
292 if (PDNode->
getBlock() == VisitedPostDom)
296 if (FoundLoopLevel == Level)
316 if (!inLoopLevel(*Dom, LoopLevel, Incomings)) {
323 if (!inLoopLevel(*Pred, LoopLevel, Incomings))
333 auto DomIt = Visited.
find(&
MBB);
334 if (DomIt != Visited.
end() && DomIt->second <= LoopLevel)
344 void advanceLevel() {
347 if (!VisitedPostDom) {
348 VisitedPostDom = DefBlock;
349 VisitedDom = DefBlock;
350 Stack.push_back(DefBlock);
352 VisitedPostDom = PDT.
getNode(VisitedPostDom)->getIDom()->getBlock();
353 VisitedDom = CommonDominators.
back();
355 for (
unsigned i = 0; i < NextLevel.
size();) {
356 if (PDT.
dominates(VisitedPostDom, NextLevel[i])) {
357 Stack.push_back(NextLevel[i]);
359 NextLevel[i] = NextLevel.
back();
367 unsigned Level = CommonDominators.
size();
368 while (!
Stack.empty()) {
373 Visited[
MBB] = Level;
377 if (Succ == DefBlock) {
378 if (
MBB == VisitedPostDom)
379 FoundLoopLevel = std::min(FoundLoopLevel, Level + 1);
381 FoundLoopLevel = std::min(FoundLoopLevel, Level);
386 if (
MBB == VisitedPostDom)
389 Stack.push_back(Succ);
407char SILowerI1Copies::
ID = 0;
412 return new SILowerI1Copies();
418 return MRI->createVirtualRegister(LaneMaskRegAttrs);
445 MachineFunctionProperties::Property::Selected))
448 Vreg1LoweringHelper Helper(
449 &TheMF, &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(),
450 &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree());
452 bool Changed =
false;
453 Changed |= Helper.lowerCopiesFromI1();
454 Changed |= Helper.lowerPhis();
455 Changed |= Helper.lowerCopiesToI1();
456 return Helper.cleanConstrainRegs(Changed);
463 unsigned Size =
TRI.getRegSizeInBits(Reg,
MRI);
468bool Vreg1LoweringHelper::lowerCopiesFromI1() {
469 bool Changed =
false;
474 if (
MI.getOpcode() != AMDGPU::COPY)
479 if (!isVreg1(SrcReg))
482 if (isLaneMaskReg(DstReg) || isVreg1(DstReg))
492 assert(!
MI.getOperand(0).getSubReg());
494 ConstrainRegs.insert(SrcReg);
505 MI->eraseFromParent();
514 : MF(MF), DT(DT), PDT(PDT) {
523 MovOp = AMDGPU::S_MOV_B32;
524 AndOp = AMDGPU::S_AND_B32;
525 OrOp = AMDGPU::S_OR_B32;
526 XorOp = AMDGPU::S_XOR_B32;
528 OrN2Op = AMDGPU::S_ORN2_B32;
531 MovOp = AMDGPU::S_MOV_B64;
532 AndOp = AMDGPU::S_AND_B64;
533 OrOp = AMDGPU::S_OR_B64;
534 XorOp = AMDGPU::S_XOR_B64;
536 OrN2Op = AMDGPU::S_ORN2_B64;
542 LoopFinder LF(*
DT, *
PDT);
543 PhiIncomingAnalysis PIA(*
PDT,
TII);
548 if (Vreg1Phis.
empty())
555 if (&
MBB != PrevMBB) {
583 std::vector<MachineBasicBlock *> DomBlocks = {&
MBB};
594 unsigned FoundLoopLevel = LF.findLoop(PostDomBound);
598 if (FoundLoopLevel) {
616 PIA.analyze(
MBB, Incomings);
624 if (PIA.isSource(IMBB)) {
645 if (NewReg != DstReg) {
647 MI->eraseFromParent();
655bool Vreg1LoweringHelper::lowerCopiesToI1() {
656 bool Changed =
false;
658 LoopFinder LF(*DT, *PDT);
665 if (
MI.getOpcode() != AMDGPU::IMPLICIT_DEF &&
666 MI.getOpcode() != AMDGPU::COPY)
670 if (!isVreg1(DstReg))
675 if (
MRI->use_empty(DstReg)) {
682 markAsLaneMask(DstReg);
683 initializeLaneMaskRegisterAttributes(DstReg);
685 if (
MI.getOpcode() == AMDGPU::IMPLICIT_DEF)
690 assert(!
MI.getOperand(1).getSubReg());
692 if (!SrcReg.
isVirtual() || (!isLaneMaskReg(SrcReg) && !isVreg1(SrcReg))) {
693 assert(
TII->getRegisterInfo().getRegSizeInBits(SrcReg, *
MRI) == 32);
698 MI.getOperand(1).setReg(TmpReg);
702 MI.getOperand(1).setIsKill(
false);
707 std::vector<MachineBasicBlock *> DomBlocks = {&
MBB};
713 unsigned FoundLoopLevel = LF.findLoop(PostDomBound);
714 if (FoundLoopLevel) {
717 LF.addLoopEntries(FoundLoopLevel,
SSAUpdater, *
MRI, LaneMaskRegAttrs);
719 buildMergeLaneMasks(
MBB,
MI,
DL, DstReg,
726 MI->eraseFromParent();
736 if (
MI->getOpcode() == AMDGPU::IMPLICIT_DEF)
739 if (
MI->getOpcode() != AMDGPU::COPY)
742 Reg =
MI->getOperand(1).getReg();
743 if (!Reg.isVirtual())
752 if (!
MI->getOperand(1).isImm())
755 int64_t Imm =
MI->getOperand(1).getImm();
773 if (MO.isReg() && MO.getReg() == AMDGPU::SCC) {
787 bool TerminatorsUseSCC =
false;
788 for (
auto I = InsertionPt, E =
MBB.
end();
I != E; ++
I) {
791 if (TerminatorsUseSCC || DefsSCC)
795 if (!TerminatorsUseSCC)
798 while (InsertionPt !=
MBB.
begin()) {
812void Vreg1LoweringHelper::markAsLaneMask(
Register DstReg)
const {
813 MRI->setRegClass(DstReg, ST->getBoolRC());
816void Vreg1LoweringHelper::getCandidatesForLowering(
820 if (isVreg1(
MI.getOperand(0).getReg()))
826void Vreg1LoweringHelper::collectIncomingValuesFromPhi(
828 for (
unsigned i = 1; i <
MI->getNumOperands(); i += 2) {
829 assert(i + 1 <
MI->getNumOperands());
830 Register IncomingReg =
MI->getOperand(i).getReg();
834 if (IncomingDef->
getOpcode() == AMDGPU::COPY) {
836 assert(isLaneMaskReg(IncomingReg) || isVreg1(IncomingReg));
838 }
else if (IncomingDef->
getOpcode() == AMDGPU::IMPLICIT_DEF) {
841 assert(IncomingDef->
isPHI() || PhiRegisters.count(IncomingReg));
850 MRI->replaceRegWith(NewReg, OldReg);
858 bool PrevVal =
false;
859 bool PrevConstant = isConstantLaneMask(PrevReg, PrevVal);
861 bool CurConstant = isConstantLaneMask(CurReg, CurVal);
863 if (PrevConstant && CurConstant) {
864 if (PrevVal == CurVal) {
879 if (CurConstant && CurVal) {
880 PrevMaskedReg = PrevReg;
890 if (PrevConstant && PrevVal) {
891 CurMaskedReg = CurReg;
900 if (PrevConstant && !PrevVal) {
903 }
else if (CurConstant && !CurVal) {
906 }
else if (PrevConstant && PrevVal) {
913 .
addReg(CurMaskedReg ? CurMaskedReg : ExecReg);
917void Vreg1LoweringHelper::constrainAsLaneMask(
Incoming &In) {}
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
const HexagonInstrInfo * TII
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static void instrDefsUsesSCC(const MachineInstr &MI, bool &Def, bool &Use)
static Register insertUndefLaneMask(MachineBasicBlock *MBB, MachineRegisterInfo *MRI, MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs)
static bool isVRegCompatibleReg(const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI, Register Reg)
Interface definition of the PhiLoweringHelper class that implements lane mask merging algorithm for d...
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Implements a dense probed hash-table based set.
Base class for the actual dominator tree node.
DomTreeNodeBase * getIDom() const
unsigned getDFSNumIn() const
getDFSNumIn/getDFSNumOut - These return the DFS visitation order for nodes in the dominator tree.
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
void updateDFSNumbers() const
updateDFSNumbers - Assign In and Out numbers to the nodes while walking dominator tree in dfs order.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
FunctionPass class - This class is used to implement most global optimizations.
const SIInstrInfo * getInstrInfo() const override
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
void push_back(MachineInstr *MI)
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineBasicBlock * findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B)
findNearestCommonDominator - Find nearest common dominator basic block for basic block A and B.
MachineDomTreeNode * getNode(MachineBasicBlock *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
MachineDominatorTree & getBase()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasProperty(Property P) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
Register getReg() const
getReg - Returns the register number.
MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...
MachineBasicBlock * findNearestCommonDominator(ArrayRef< MachineBasicBlock * > Blocks) const
Returns the nearest common dominator of the given blocks.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
MachineSSAUpdater - This class updates SSA form for a set of virtual registers defined in multiple bl...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
PhiLoweringHelper(MachineFunction *MF, MachineDominatorTree *DT, MachinePostDominatorTree *PDT)
bool isLaneMaskReg(Register Reg) const
MachineRegisterInfo * MRI
MachineDominatorTree * DT
DenseSet< Register > PhiRegisters
virtual void getCandidatesForLowering(SmallVectorImpl< MachineInstr * > &Vreg1Phis) const =0
virtual void constrainAsLaneMask(Incoming &In)=0
virtual void collectIncomingValuesFromPhi(const MachineInstr *MI, SmallVectorImpl< Incoming > &Incomings) const =0
virtual void markAsLaneMask(Register DstReg) const =0
MachinePostDominatorTree * PDT
MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs
MachineBasicBlock::iterator getSaluInsertionAtEnd(MachineBasicBlock &MBB) const
Return a point at the end of the given MBB to insert SALU instructions for lane mask calculation.
void initializeLaneMaskRegisterAttributes(Register LaneMask)
bool isConstantLaneMask(Register Reg, bool &Val) const
virtual void buildMergeLaneMasks(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, Register PrevReg, Register CurReg)=0
virtual void replaceDstReg(Register NewReg, Register OldReg, MachineBasicBlock *MBB)=0
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Helper class for SSA formation on a set of values defined in multiple blocks.
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
Value * GetValueInMiddleOfBlock(BasicBlock *BB)
Construct SSA form, materializing a value that is live in the middle of the specified block.
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
A Use represents the edge between a Value definition and its users.
std::pair< iterator, bool > insert(const ValueT &V)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
void sort(IteratorTy Start, IteratorTy End)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Register createLaneMaskReg(MachineRegisterInfo *MRI, MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs)
FunctionPass * createSILowerI1CopiesPass()
void initializeSILowerI1CopiesPass(PassRegistry &)
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
MachineBasicBlock * Block
All attributes(register class or bank and low-level type) a virtual register can have.