30#define DEBUG_TYPE "si-i1-copies"
63 bool lowerCopiesFromI1();
64 bool lowerCopiesToI1();
65 bool cleanConstrainRegs(
bool Changed);
67 return Reg.isVirtual() &&
MRI->getRegClass(Reg) == &AMDGPU::VReg_1RegClass;
76bool Vreg1LoweringHelper::cleanConstrainRegs(
bool Changed) {
79 MRI->constrainRegClass(Reg, &AMDGPU::SReg_1_XEXECRegClass);
80 ConstrainRegs.clear();
107class PhiIncomingAnalysis {
125 return ReachableMap.
find(&
MBB)->second;
132 ReachableMap.
clear();
133 ReachableOrdered.
clear();
134 Predecessors.
clear();
143 if (
MBB == &DefBlock) {
144 ReachableMap[&DefBlock] =
true;
157 while (!
Stack.empty()) {
167 bool HaveReachablePred =
false;
169 if (ReachableMap.
count(Pred)) {
170 HaveReachablePred =
true;
172 Stack.push_back(Pred);
175 if (!HaveReachablePred)
176 ReachableMap[
MBB] =
true;
177 if (HaveReachablePred) {
239 unsigned FoundLoopLevel = ~0
u;
247 : DT(DT), PDT(PDT) {}
251 CommonDominators.
clear();
254 VisitedPostDom =
nullptr;
255 FoundLoopLevel = ~0
u;
271 while (PDNode->
getBlock() != PostDom) {
272 if (PDNode->
getBlock() == VisitedPostDom)
276 if (FoundLoopLevel == Level)
296 if (!inLoopLevel(*Dom, LoopLevel, Incomings)) {
303 if (!inLoopLevel(*Pred, LoopLevel, Incomings))
313 auto DomIt = Visited.
find(&
MBB);
314 if (DomIt != Visited.
end() && DomIt->second <= LoopLevel)
324 void advanceLevel() {
327 if (!VisitedPostDom) {
328 VisitedPostDom = DefBlock;
329 VisitedDom = DefBlock;
330 Stack.push_back(DefBlock);
332 VisitedPostDom = PDT.
getNode(VisitedPostDom)->getIDom()->getBlock();
333 VisitedDom = CommonDominators.
back();
335 for (
unsigned i = 0; i < NextLevel.
size();) {
336 if (PDT.
dominates(VisitedPostDom, NextLevel[i])) {
337 Stack.push_back(NextLevel[i]);
339 NextLevel[i] = NextLevel.
back();
347 unsigned Level = CommonDominators.
size();
348 while (!
Stack.empty()) {
353 Visited[
MBB] = Level;
357 if (Succ == DefBlock) {
358 if (
MBB == VisitedPostDom)
359 FoundLoopLevel = std::min(FoundLoopLevel, Level + 1);
361 FoundLoopLevel = std::min(FoundLoopLevel, Level);
366 if (
MBB == VisitedPostDom)
369 Stack.push_back(Succ);
383 return MRI->createVirtualRegister(LaneMaskRegAttrs);
402 unsigned Size =
TRI.getRegSizeInBits(Reg,
MRI);
407bool Vreg1LoweringHelper::lowerCopiesFromI1() {
408 bool Changed =
false;
413 if (
MI.getOpcode() != AMDGPU::COPY)
418 if (!isVreg1(SrcReg))
421 if (isLaneMaskReg(DstReg) || isVreg1(DstReg))
431 assert(!
MI.getOperand(0).getSubReg());
433 ConstrainRegs.insert(SrcReg);
444 MI->eraseFromParent();
453 : MF(MF), DT(DT), PDT(PDT) {
462 MovOp = AMDGPU::S_MOV_B32;
463 AndOp = AMDGPU::S_AND_B32;
464 OrOp = AMDGPU::S_OR_B32;
465 XorOp = AMDGPU::S_XOR_B32;
467 OrN2Op = AMDGPU::S_ORN2_B32;
470 MovOp = AMDGPU::S_MOV_B64;
471 AndOp = AMDGPU::S_AND_B64;
472 OrOp = AMDGPU::S_OR_B64;
473 XorOp = AMDGPU::S_XOR_B64;
475 OrN2Op = AMDGPU::S_ORN2_B64;
481 LoopFinder LF(*
DT, *
PDT);
482 PhiIncomingAnalysis PIA(*
PDT,
TII);
487 if (Vreg1Phis.
empty())
494 if (&
MBB != PrevMBB) {
522 std::vector<MachineBasicBlock *> DomBlocks = {&
MBB};
533 unsigned FoundLoopLevel = LF.findLoop(PostDomBound);
537 if (FoundLoopLevel) {
555 PIA.analyze(
MBB, Incomings);
563 if (PIA.isSource(IMBB)) {
584 if (NewReg != DstReg) {
586 MI->eraseFromParent();
594bool Vreg1LoweringHelper::lowerCopiesToI1() {
595 bool Changed =
false;
597 LoopFinder LF(*DT, *PDT);
604 if (
MI.getOpcode() != AMDGPU::IMPLICIT_DEF &&
605 MI.getOpcode() != AMDGPU::COPY)
609 if (!isVreg1(DstReg))
614 if (
MRI->use_empty(DstReg)) {
621 markAsLaneMask(DstReg);
622 initializeLaneMaskRegisterAttributes(DstReg);
624 if (
MI.getOpcode() == AMDGPU::IMPLICIT_DEF)
629 assert(!
MI.getOperand(1).getSubReg());
631 if (!SrcReg.
isVirtual() || (!isLaneMaskReg(SrcReg) && !isVreg1(SrcReg))) {
632 assert(
TII->getRegisterInfo().getRegSizeInBits(SrcReg, *
MRI) == 32);
637 MI.getOperand(1).setReg(TmpReg);
641 MI.getOperand(1).setIsKill(
false);
646 std::vector<MachineBasicBlock *> DomBlocks = {&
MBB};
652 unsigned FoundLoopLevel = LF.findLoop(PostDomBound);
653 if (FoundLoopLevel) {
656 LF.addLoopEntries(FoundLoopLevel,
SSAUpdater, *
MRI, LaneMaskRegAttrs);
658 buildMergeLaneMasks(
MBB,
MI,
DL, DstReg,
665 MI->eraseFromParent();
675 if (
MI->getOpcode() == AMDGPU::IMPLICIT_DEF)
678 if (
MI->getOpcode() != AMDGPU::COPY)
681 Reg =
MI->getOperand(1).getReg();
682 if (!Reg.isVirtual())
691 if (!
MI->getOperand(1).isImm())
694 int64_t Imm =
MI->getOperand(1).getImm();
712 if (MO.isReg() && MO.getReg() == AMDGPU::SCC) {
726 bool TerminatorsUseSCC =
false;
727 for (
auto I = InsertionPt, E =
MBB.
end();
I != E; ++
I) {
730 if (TerminatorsUseSCC || DefsSCC)
734 if (!TerminatorsUseSCC)
737 while (InsertionPt !=
MBB.
begin()) {
751void Vreg1LoweringHelper::markAsLaneMask(
Register DstReg)
const {
752 MRI->setRegClass(DstReg, ST->getBoolRC());
755void Vreg1LoweringHelper::getCandidatesForLowering(
759 if (isVreg1(
MI.getOperand(0).getReg()))
765void Vreg1LoweringHelper::collectIncomingValuesFromPhi(
767 for (
unsigned i = 1; i <
MI->getNumOperands(); i += 2) {
768 assert(i + 1 <
MI->getNumOperands());
769 Register IncomingReg =
MI->getOperand(i).getReg();
773 if (IncomingDef->
getOpcode() == AMDGPU::COPY) {
775 assert(isLaneMaskReg(IncomingReg) || isVreg1(IncomingReg));
777 }
else if (IncomingDef->
getOpcode() == AMDGPU::IMPLICIT_DEF) {
780 assert(IncomingDef->
isPHI() || PhiRegisters.count(IncomingReg));
789 MRI->replaceRegWith(NewReg, OldReg);
797 bool PrevVal =
false;
798 bool PrevConstant = isConstantLaneMask(PrevReg, PrevVal);
800 bool CurConstant = isConstantLaneMask(CurReg, CurVal);
802 if (PrevConstant && CurConstant) {
803 if (PrevVal == CurVal) {
818 if (CurConstant && CurVal) {
819 PrevMaskedReg = PrevReg;
829 if (PrevConstant && PrevVal) {
830 CurMaskedReg = CurReg;
839 if (PrevConstant && !PrevVal) {
842 }
else if (CurConstant && !CurVal) {
845 }
else if (PrevConstant && PrevVal) {
852 .
addReg(CurMaskedReg ? CurMaskedReg : ExecReg);
856void Vreg1LoweringHelper::constrainAsLaneMask(
Incoming &In) {}
874 Vreg1LoweringHelper Helper(&MF, &MDT, &MPDT);
875 bool Changed =
false;
876 Changed |= Helper.lowerCopiesFromI1();
877 Changed |= Helper.lowerPhis();
878 Changed |= Helper.lowerCopiesToI1();
879 return Helper.cleanConstrainRegs(Changed);
920 getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
922 getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
const HexagonInstrInfo * TII
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static void instrDefsUsesSCC(const MachineInstr &MI, bool &Def, bool &Use)
static Register insertUndefLaneMask(MachineBasicBlock *MBB, MachineRegisterInfo *MRI, MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs)
static bool runFixI1Copies(MachineFunction &MF, MachineDominatorTree &MDT, MachinePostDominatorTree &MPDT)
Lower all instructions that def or use vreg_1 registers.
static bool isVRegCompatibleReg(const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI, Register Reg)
Interface definition of the PhiLoweringHelper class that implements lane mask merging algorithm for d...
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Represents analyses that only rely on functions' control flow.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Implements a dense probed hash-table based set.
Base class for the actual dominator tree node.
DomTreeNodeBase * getIDom() const
unsigned getDFSNumIn() const
getDFSNumIn/getDFSNumOut - These return the DFS visitation order for nodes in the dominator tree.
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
void updateDFSNumbers() const
updateDFSNumbers - Assign In and Out numbers to the nodes while walking dominator tree in dfs order.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
FunctionPass class - This class is used to implement most global optimizations.
const SIInstrInfo * getInstrInfo() const override
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
void push_back(MachineInstr *MI)
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineBasicBlock * findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B)
findNearestCommonDominator - Find nearest common dominator basic block for basic block A and B.
MachineDomTreeNode * getNode(MachineBasicBlock *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
MachineDominatorTree & getBase()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
bool hasProperty(Property P) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
Register getReg() const
getReg - Returns the register number.
MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...
MachineBasicBlock * findNearestCommonDominator(ArrayRef< MachineBasicBlock * > Blocks) const
Returns the nearest common dominator of the given blocks.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
MachineSSAUpdater - This class updates SSA form for a set of virtual registers defined in multiple bl...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PhiLoweringHelper(MachineFunction *MF, MachineDominatorTree *DT, MachinePostDominatorTree *PDT)
bool isLaneMaskReg(Register Reg) const
MachineRegisterInfo * MRI
MachineDominatorTree * DT
DenseSet< Register > PhiRegisters
virtual void getCandidatesForLowering(SmallVectorImpl< MachineInstr * > &Vreg1Phis) const =0
virtual void constrainAsLaneMask(Incoming &In)=0
virtual void collectIncomingValuesFromPhi(const MachineInstr *MI, SmallVectorImpl< Incoming > &Incomings) const =0
virtual void markAsLaneMask(Register DstReg) const =0
MachinePostDominatorTree * PDT
MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs
MachineBasicBlock::iterator getSaluInsertionAtEnd(MachineBasicBlock &MBB) const
Return a point at the end of the given MBB to insert SALU instructions for lane mask calculation.
void initializeLaneMaskRegisterAttributes(Register LaneMask)
bool isConstantLaneMask(Register Reg, bool &Val) const
virtual void buildMergeLaneMasks(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, Register PrevReg, Register CurReg)=0
virtual void replaceDstReg(Register NewReg, Register OldReg, MachineBasicBlock *MBB)=0
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
Helper class for SSA formation on a set of values defined in multiple blocks.
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
Value * GetValueInMiddleOfBlock(BasicBlock *BB)
Construct SSA form, materializing a value that is live in the middle of the specified block.
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
A Use represents the edge between a Value definition and its users.
std::pair< iterator, bool > insert(const ValueT &V)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
void sort(IteratorTy Start, IteratorTy End)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Register createLaneMaskReg(MachineRegisterInfo *MRI, MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs)
void initializeSILowerI1CopiesLegacyPass(PassRegistry &)
FunctionPass * createSILowerI1CopiesLegacyPass()
char & SILowerI1CopiesLegacyID
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
MachineBasicBlock * Block
All attributes(register class or bank and low-level type) a virtual register can have.