Go to the documentation of this file.
33 #define DEBUG_TYPE "si-i1-copies"
47 bool IsWave32 =
false;
72 StringRef getPassName()
const override {
return "SI Lower i1 Copies"; }
82 bool lowerCopiesFromI1();
84 bool lowerCopiesToI1();
85 bool isConstantLaneMask(
Register Reg,
bool &Val)
const;
88 unsigned DstReg,
unsigned PrevReg,
unsigned CurReg);
96 bool isLaneMaskReg(
unsigned Reg)
const {
97 return TII->getRegisterInfo().isSGPRReg(*
MRI,
Reg) &&
98 TII->getRegisterInfo().getRegSizeInBits(
Reg, *
MRI) ==
99 ST->getWavefrontSize();
125 class PhiIncomingAnalysis {
141 return ReachableMap.
find(&
MBB)->second;
149 ReachableMap.
clear();
150 ReachableOrdered.
clear();
151 Predecessors.
clear();
156 ReachableOrdered.push_back(&DefBlock);
159 if (
MBB == &DefBlock) {
160 ReachableMap[&DefBlock] =
true;
165 ReachableOrdered.push_back(
MBB);
169 bool Divergent =
false;
171 if (
MI.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO ||
172 MI.getOpcode() == AMDGPU::SI_IF ||
173 MI.getOpcode() == AMDGPU::SI_ELSE ||
174 MI.getOpcode() == AMDGPU::SI_LOOP) {
184 while (!
Stack.empty()) {
188 ReachableOrdered.push_back(
MBB);
194 bool HaveReachablePred =
false;
196 if (ReachableMap.
count(Pred)) {
197 HaveReachablePred =
true;
199 Stack.push_back(Pred);
202 if (!HaveReachablePred)
203 ReachableMap[
MBB] =
true;
204 if (HaveReachablePred) {
207 Predecessors.push_back(UnreachablePred);
266 unsigned FoundLoopLevel = ~0u;
274 : DT(DT), PDT(PDT) {}
278 CommonDominators.
clear();
281 VisitedPostDom =
nullptr;
282 FoundLoopLevel = ~0u;
298 while (PDNode->
getBlock() != PostDom) {
299 if (PDNode->
getBlock() == VisitedPostDom)
303 if (FoundLoopLevel ==
Level)
315 assert(LoopLevel < CommonDominators.size());
321 if (!inLoopLevel(*Dom, LoopLevel, Blocks)) {
327 if (!inLoopLevel(*Pred, LoopLevel, Blocks))
336 auto DomIt = Visited.
find(&
MBB);
337 if (DomIt != Visited.
end() && DomIt->second <= LoopLevel)
346 void advanceLevel() {
349 if (!VisitedPostDom) {
350 VisitedPostDom = DefBlock;
351 VisitedDom = DefBlock;
352 Stack.push_back(DefBlock);
355 VisitedDom = CommonDominators.back();
357 for (
unsigned i = 0;
i < NextLevel.size();) {
358 if (PDT.
dominates(VisitedPostDom, NextLevel[
i])) {
359 Stack.push_back(NextLevel[
i]);
361 NextLevel[
i] = NextLevel.back();
362 NextLevel.pop_back();
369 unsigned Level = CommonDominators.size();
370 while (!
Stack.empty()) {
373 NextLevel.push_back(
MBB);
379 if (Succ == DefBlock) {
380 if (
MBB == VisitedPostDom)
388 if (
MBB == VisitedPostDom)
389 NextLevel.push_back(Succ);
391 Stack.push_back(Succ);
396 CommonDominators.push_back(VisitedDom);
409 char SILowerI1Copies::
ID = 0;
414 return new SILowerI1Copies();
421 : &AMDGPU::SReg_64RegClass);
451 DT = &getAnalysis<MachineDominatorTree>();
452 PDT = &getAnalysis<MachinePostDominatorTree>();
455 TII =
ST->getInstrInfo();
456 IsWave32 =
ST->isWave32();
459 ExecReg = AMDGPU::EXEC_LO;
460 MovOp = AMDGPU::S_MOV_B32;
461 AndOp = AMDGPU::S_AND_B32;
462 OrOp = AMDGPU::S_OR_B32;
463 XorOp = AMDGPU::S_XOR_B32;
464 AndN2Op = AMDGPU::S_ANDN2_B32;
465 OrN2Op = AMDGPU::S_ORN2_B32;
467 ExecReg = AMDGPU::EXEC;
468 MovOp = AMDGPU::S_MOV_B64;
469 AndOp = AMDGPU::S_AND_B64;
470 OrOp = AMDGPU::S_OR_B64;
471 XorOp = AMDGPU::S_XOR_B64;
472 AndN2Op = AMDGPU::S_ANDN2_B64;
473 OrN2Op = AMDGPU::S_ORN2_B64;
476 bool Changed =
false;
477 Changed |= lowerCopiesFromI1();
478 Changed |= lowerPhis();
479 Changed |= lowerCopiesToI1();
482 for (
unsigned Reg : ConstrainRegs)
484 ConstrainRegs.clear();
494 return Size == 1 || Size == 32;
498 bool SILowerI1Copies::lowerCopiesFromI1() {
499 bool Changed =
false;
504 if (
MI.getOpcode() != AMDGPU::COPY)
509 if (!isVreg1(SrcReg))
512 if (isLaneMaskReg(DstReg) || isVreg1(DstReg))
522 assert(!
MI.getOperand(0).getSubReg());
524 ConstrainRegs.insert(SrcReg);
531 DeadCopies.push_back(&
MI);
535 MI->eraseFromParent();
541 bool SILowerI1Copies::lowerPhis() {
543 LoopFinder LF(*DT, *PDT);
544 PhiIncomingAnalysis PIA(*PDT);
555 if (isVreg1(
MI.getOperand(0).getReg()))
556 Vreg1Phis.push_back(&
MI);
559 if (Vreg1Phis.empty())
565 if (&
MBB != PrevMBB) {
574 : &AMDGPU::SReg_64RegClass);
577 for (
unsigned i = 1;
i <
MI->getNumOperands();
i += 2) {
583 if (IncomingDef->
getOpcode() == AMDGPU::COPY) {
585 assert(isLaneMaskReg(IncomingReg) || isVreg1(IncomingReg));
587 }
else if (IncomingDef->
getOpcode() == AMDGPU::IMPLICIT_DEF) {
593 IncomingBlocks.push_back(IncomingMBB);
594 IncomingRegs.push_back(IncomingReg);
598 PhiRegisters.
insert(DstReg);
603 std::vector<MachineBasicBlock *> DomBlocks = {&
MBB};
614 unsigned FoundLoopLevel = LF.findLoop(PostDomBound);
618 if (FoundLoopLevel) {
619 LF.addLoopEntries(FoundLoopLevel,
SSAUpdater, IncomingBlocks);
621 for (
unsigned i = 0;
i < IncomingRegs.size(); ++
i) {
624 IncomingUpdated.back());
627 for (
unsigned i = 0;
i < IncomingRegs.size(); ++
i) {
630 IMBB, getSaluInsertionAtEnd(IMBB), {}, IncomingUpdated[
i],
636 PIA.analyze(
MBB, IncomingBlocks);
641 for (
unsigned i = 0;
i < IncomingRegs.size(); ++
i) {
643 if (PIA.isSource(IMBB)) {
644 IncomingUpdated.push_back(0);
652 for (
unsigned i = 0;
i < IncomingRegs.size(); ++
i) {
653 if (!IncomingUpdated[
i])
658 IMBB, getSaluInsertionAtEnd(IMBB), {}, IncomingUpdated[
i],
664 if (NewReg != DstReg) {
666 MI->eraseFromParent();
669 IncomingBlocks.
clear();
670 IncomingRegs.
clear();
671 IncomingUpdated.
clear();
676 bool SILowerI1Copies::lowerCopiesToI1() {
677 bool Changed =
false;
679 LoopFinder LF(*DT, *PDT);
686 if (
MI.getOpcode() != AMDGPU::IMPLICIT_DEF &&
687 MI.getOpcode() != AMDGPU::COPY)
691 if (!isVreg1(DstReg))
697 DeadCopies.push_back(&
MI);
704 : &AMDGPU::SReg_64RegClass);
705 if (
MI.getOpcode() == AMDGPU::IMPLICIT_DEF)
710 assert(!
MI.getOperand(1).getSubReg());
712 if (!SrcReg.
isVirtual() || (!isLaneMaskReg(SrcReg) && !isVreg1(SrcReg))) {
713 assert(
TII->getRegisterInfo().getRegSizeInBits(SrcReg, *
MRI) == 32);
718 MI.getOperand(1).setReg(TmpReg);
724 std::vector<MachineBasicBlock *> DomBlocks = {&
MBB};
730 unsigned FoundLoopLevel = LF.findLoop(PostDomBound);
731 if (FoundLoopLevel) {
734 LF.addLoopEntries(FoundLoopLevel,
SSAUpdater);
736 buildMergeLaneMasks(
MBB,
MI,
DL, DstReg,
738 DeadCopies.push_back(&
MI);
743 MI->eraseFromParent();
749 bool SILowerI1Copies::isConstantLaneMask(
Register Reg,
bool &Val)
const {
753 if (
MI->getOpcode() == AMDGPU::IMPLICIT_DEF)
756 if (
MI->getOpcode() != AMDGPU::COPY)
759 Reg =
MI->getOperand(1).getReg();
760 if (!
Reg.isVirtual())
762 if (!isLaneMaskReg(
Reg))
766 if (
MI->getOpcode() != MovOp)
769 if (!
MI->getOperand(1).isImm())
772 int64_t
Imm =
MI->getOperand(1).getImm();
804 bool TerminatorsUseSCC =
false;
805 for (
auto I = InsertionPt,
E =
MBB.
end();
I !=
E; ++
I) {
808 if (TerminatorsUseSCC || DefsSCC)
812 if (!TerminatorsUseSCC)
815 while (InsertionPt !=
MBB.
begin()) {
831 unsigned PrevReg,
unsigned CurReg) {
832 bool PrevVal =
false;
833 bool PrevConstant = isConstantLaneMask(PrevReg, PrevVal);
835 bool CurConstant = isConstantLaneMask(CurReg, CurVal);
837 if (PrevConstant && CurConstant) {
838 if (PrevVal == CurVal) {
850 unsigned PrevMaskedReg = 0;
851 unsigned CurMaskedReg = 0;
853 if (CurConstant && CurVal) {
854 PrevMaskedReg = PrevReg;
864 if (PrevConstant && PrevVal) {
865 CurMaskedReg = CurReg;
874 if (PrevConstant && !PrevVal) {
877 }
else if (CurConstant && !CurVal) {
880 }
else if (PrevConstant && PrevVal) {
887 .
addReg(CurMaskedReg ? CurMaskedReg : ExecReg);
MachineBasicBlock * findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B)
findNearestCommonDominator - Find nearest common dominator basic block for basic block A and B.
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
bool hasProperty(Property P) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
This is an optimization pass for GlobalISel generic memory operations.
static bool isVRegCompatibleReg(const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI, Register Reg)
static unsigned insertUndefLaneMask(MachineBasicBlock &MBB)
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Reg
All possible values of the reg field in the ModR/M byte.
FunctionPass * createSILowerI1CopiesPass()
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void initializeSILowerI1CopiesPass(PassRegistry &)
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Decimal Convert From to National Zoned Signed int_ppc_altivec_bcdcfno i1
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
iterator_range< iterator > terminators()
DomTreeNodeBase * getIDom() const
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
unsigned const TargetRegisterInfo * TRI
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void push_back(MachineInstr *MI)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const MachineOperand & getOperand(unsigned i) const
Represent the analysis usage information of a pass.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const HexagonInstrInfo * TII
MachineOperand class - Representation of each machine instruction operand.
auto predecessors(MachineBasicBlock *BB)
INITIALIZE_PASS_BEGIN(SILowerI1Copies, DEBUG_TYPE, "SI Lower i1 Copies", false, false) INITIALIZE_PASS_END(SILowerI1Copies
MachineDomTreeNode * getNode(MachineBasicBlock *BB) const
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
static unsigned createLaneMaskReg(MachineFunction &MF)
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Representation of each machine instruction.
Value * GetValueInMiddleOfBlock(BasicBlock *BB)
Construct SSA form, materializing a value that is live in the middle of the specified block.
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
iterator find(const_arg_type_t< KeyT > Val)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
StandardInstrumentations SI(Debug, VerifyEach)
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Register getReg() const
getReg - Returns the register number.
iterator_range< pred_iterator > predecessors()
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
iterator_range< succ_iterator > successors()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
StringRef - Represent a constant reference to a string, i.e.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void instrDefsUsesSCC(const MachineInstr &MI, bool &Def, bool &Use)
Base class for the actual dominator tree node.
MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...
unsigned const MachineRegisterInfo * MRI
Wrapper class representing virtual and physical registers.
unsigned getSubReg() const
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
unsigned getRegSizeInBits(const TargetRegisterClass &RC) const
Return the size in bits of a register from class RC.
MachineSSAUpdater - This class updates SSA form for a set of virtual registers defined in multiple bl...
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
FunctionPass class - This class is used to implement most global optimizations.
AnalysisUsage & addRequired()
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
MachineBasicBlock * findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B) const
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Helper class for SSA formation on a set of values defined in multiple blocks.
MachineBasicBlock iterator that automatically skips over MIs that are inside bundles (i....
void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
A Use represents the edge between a Value definition and its users.