29#define DEBUG_TYPE "si-i1-copies" 
   48  void markAsLaneMask(
Register DstReg) 
const override;
 
   49  void getCandidatesForLowering(
 
   51  void collectIncomingValuesFromPhi(
 
   60  void constrainAsLaneMask(
Incoming &In) 
override;
 
   62  bool lowerCopiesFromI1();
 
   63  bool lowerCopiesToI1();
 
   64  bool cleanConstrainRegs(
bool Changed);
 
   66    return Reg.isVirtual() && 
MRI->getRegClass(
Reg) == &AMDGPU::VReg_1RegClass;
 
   75bool Vreg1LoweringHelper::cleanConstrainRegs(
bool Changed) {
 
   78    MRI->constrainRegClass(
Reg, &AMDGPU::SReg_1_XEXECRegClass);
 
   79  ConstrainRegs.clear();
 
  106class PhiIncomingAnalysis {
 
  107  MachinePostDominatorTree &PDT;
 
  108  const SIInstrInfo *
TII;
 
  112  MapVector<MachineBasicBlock *, bool> ReachableMap;
 
  113  SmallVector<MachineBasicBlock *, 4> Stack;
 
  114  SmallVector<MachineBasicBlock *, 4> Predecessors;
 
  117  PhiIncomingAnalysis(MachinePostDominatorTree &PDT, 
const SIInstrInfo *
TII)
 
  122  bool isSource(MachineBasicBlock &
MBB)
 const {
 
  123    return ReachableMap.
find(&
MBB)->second;
 
  130    ReachableMap.
clear();
 
  131    Predecessors.
clear();
 
  137    for (
auto Incoming : Incomings) {
 
  138      MachineBasicBlock *
MBB = Incoming.Block;
 
  139      if (
MBB == &DefBlock) {
 
  140        ReachableMap[&DefBlock] = 
true; 
 
  152    while (!
Stack.empty()) {
 
  153      MachineBasicBlock *
MBB = 
Stack.pop_back_val();
 
  158    for (
auto &[
MBB, Reachable] : ReachableMap) {
 
  159      bool HaveReachablePred = 
false;
 
  161        if (ReachableMap.count(Pred)) {
 
  162          HaveReachablePred = 
true;
 
  164          Stack.push_back(Pred);
 
  167      if (!HaveReachablePred)
 
  169      if (HaveReachablePred) {
 
  170        for (MachineBasicBlock *UnreachablePred : Stack) {
 
  213  MachineDominatorTree &DT;
 
  214  MachinePostDominatorTree &PDT;
 
  219  DenseMap<MachineBasicBlock *, unsigned> Visited;
 
  223  SmallVector<MachineBasicBlock *, 4> CommonDominators;
 
  226  MachineBasicBlock *VisitedPostDom = 
nullptr;
 
  231  unsigned FoundLoopLevel = ~0
u;
 
  233  MachineBasicBlock *DefBlock = 
nullptr;
 
  234  SmallVector<MachineBasicBlock *, 4> 
Stack;
 
  235  SmallVector<MachineBasicBlock *, 4> NextLevel;
 
  238  LoopFinder(MachineDominatorTree &DT, MachinePostDominatorTree &PDT)
 
  239      : DT(DT), PDT(PDT) {}
 
  243    CommonDominators.
clear();
 
  246    VisitedPostDom = 
nullptr;
 
  247    FoundLoopLevel = ~0
u;
 
  256  unsigned findLoop(MachineBasicBlock *PostDom) {
 
  263    while (PDNode->
getBlock() != PostDom) {
 
  264      if (PDNode->
getBlock() == VisitedPostDom)
 
  268      if (FoundLoopLevel == Level)
 
  278  void addLoopEntries(
unsigned LoopLevel, MachineSSAUpdater &SSAUpdater,
 
  279                      MachineRegisterInfo &
MRI,
 
  280                      MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs,
 
  284    MachineBasicBlock *Dom = CommonDominators[LoopLevel];
 
  285    for (
auto &Incoming : Incomings)
 
  288    if (!inLoopLevel(*Dom, LoopLevel, Incomings)) {
 
  295        if (!inLoopLevel(*Pred, LoopLevel, Incomings))
 
  303  bool inLoopLevel(MachineBasicBlock &
MBB, 
unsigned LoopLevel,
 
  305    auto DomIt = Visited.
find(&
MBB);
 
  306    if (DomIt != Visited.
end() && DomIt->second <= LoopLevel)
 
  309    for (
auto &Incoming : Incomings)
 
  310      if (Incoming.Block == &
MBB)
 
  316  void advanceLevel() {
 
  317    MachineBasicBlock *VisitedDom;
 
  319    if (!VisitedPostDom) {
 
  320      VisitedPostDom = DefBlock;
 
  321      VisitedDom = DefBlock;
 
  322      Stack.push_back(DefBlock);
 
  324      VisitedPostDom = PDT.
getNode(VisitedPostDom)->getIDom()->getBlock();
 
  325      VisitedDom = CommonDominators.
back();
 
  327      for (
unsigned i = 0; i < NextLevel.
size();) {
 
  328        if (PDT.
dominates(VisitedPostDom, NextLevel[i])) {
 
  329          Stack.push_back(NextLevel[i]);
 
  331          NextLevel[i] = NextLevel.
back();
 
  339    unsigned Level = CommonDominators.
size();
 
  340    while (!
Stack.empty()) {
 
  341      MachineBasicBlock *
MBB = 
Stack.pop_back_val();
 
  349        if (Succ == DefBlock) {
 
  350          if (
MBB == VisitedPostDom)
 
  351            FoundLoopLevel = std::min(FoundLoopLevel, Level + 1);
 
  353            FoundLoopLevel = std::min(FoundLoopLevel, Level);
 
  358          if (
MBB == VisitedPostDom)
 
  361            Stack.push_back(Succ);
 
  375  return MRI->createVirtualRegister(LaneMaskRegAttrs);
 
 
  385  BuildMI(*
MBB, 
MBB->getFirstTerminator(), {}, 
TII->get(AMDGPU::IMPLICIT_DEF),
 
 
  399bool Vreg1LoweringHelper::lowerCopiesFromI1() {
 
  401  SmallVector<MachineInstr *, 4> DeadCopies;
 
  403  for (MachineBasicBlock &
MBB : *MF) {
 
  404    for (MachineInstr &
MI : 
MBB) {
 
  405      if (
MI.getOpcode() != AMDGPU::COPY)
 
  410      if (!isVreg1(SrcReg))
 
  413      if (isLaneMaskReg(DstReg) || isVreg1(DstReg))
 
  423      assert(!
MI.getOperand(0).getSubReg());
 
  425      ConstrainRegs.insert(SrcReg);
 
  435    for (MachineInstr *
MI : DeadCopies)
 
  436      MI->eraseFromParent();
 
  446  MRI = &
MF->getRegInfo();
 
  449  TII = 
ST->getInstrInfo();
 
  454    MovOp = AMDGPU::S_MOV_B32;
 
  455    AndOp = AMDGPU::S_AND_B32;
 
  456    OrOp = AMDGPU::S_OR_B32;
 
  457    XorOp = AMDGPU::S_XOR_B32;
 
  459    OrN2Op = AMDGPU::S_ORN2_B32;
 
  462    MovOp = AMDGPU::S_MOV_B64;
 
  463    AndOp = AMDGPU::S_AND_B64;
 
  464    OrOp = AMDGPU::S_OR_B64;
 
  465    XorOp = AMDGPU::S_XOR_B64;
 
  467    OrN2Op = AMDGPU::S_ORN2_B64;
 
 
  473  LoopFinder LF(*
DT, *
PDT);
 
  474  PhiIncomingAnalysis PIA(*
PDT, 
TII);
 
  479  if (Vreg1Phis.
empty())
 
  482  DT->updateDFSNumbers();
 
  486    if (&
MBB != PrevMBB) {
 
  504      return DT->getNode(LHS.Block)->getDFSNumIn() <
 
  505             DT->getNode(RHS.Block)->getDFSNumIn();
 
  514    std::vector<MachineBasicBlock *> DomBlocks = {&
MBB};
 
  516      DomBlocks.push_back(
Use.getParent());
 
  519        PDT->findNearestCommonDominator(DomBlocks);
 
  525    unsigned FoundLoopLevel = LF.findLoop(PostDomBound);
 
  529    if (FoundLoopLevel) {
 
  547      PIA.analyze(
MBB, Incomings);
 
  555        if (PIA.isSource(IMBB)) {
 
  576    if (NewReg != DstReg) {
 
  578      MI->eraseFromParent();
 
 
  586bool Vreg1LoweringHelper::lowerCopiesToI1() {
 
  589  LoopFinder LF(*DT, *PDT);
 
  596      if (
MI.getOpcode() != AMDGPU::IMPLICIT_DEF &&
 
  597          MI.getOpcode() != AMDGPU::COPY)
 
  601      if (!isVreg1(DstReg))
 
  606      if (
MRI->use_empty(DstReg)) {
 
  613      markAsLaneMask(DstReg);
 
  614      initializeLaneMaskRegisterAttributes(DstReg);
 
  616      if (
MI.getOpcode() == AMDGPU::IMPLICIT_DEF)
 
  621      assert(!
MI.getOperand(1).getSubReg());
 
  623      if (!SrcReg.
isVirtual() || (!isLaneMaskReg(SrcReg) && !isVreg1(SrcReg))) {
 
  624        assert(
TII->getRegisterInfo().getRegSizeInBits(SrcReg, *
MRI) == 32);
 
  629        MI.getOperand(1).setReg(TmpReg);
 
  633        MI.getOperand(1).setIsKill(
false);
 
  638      std::vector<MachineBasicBlock *> DomBlocks = {&
MBB};
 
  640        DomBlocks.push_back(
Use.getParent());
 
  644      unsigned FoundLoopLevel = LF.findLoop(PostDomBound);
 
  645      if (FoundLoopLevel) {
 
  648        LF.addLoopEntries(FoundLoopLevel, 
SSAUpdater, *
MRI, LaneMaskRegAttrs);
 
  650        buildMergeLaneMasks(
MBB, 
MI, 
DL, DstReg,
 
  657      MI->eraseFromParent();
 
  666    MI = 
MRI->getUniqueVRegDef(Reg);
 
  667    if (
MI->getOpcode() == AMDGPU::IMPLICIT_DEF)
 
  670    if (
MI->getOpcode() != AMDGPU::COPY)
 
  673    Reg = 
MI->getOperand(1).getReg();
 
  674    if (!Reg.isVirtual())
 
  683  if (!
MI->getOperand(1).isImm())
 
  686  int64_t Imm = 
MI->getOperand(1).getImm();
 
 
  704    if (MO.isReg() && MO.getReg() == AMDGPU::SCC) {
 
 
  717  auto InsertionPt = 
MBB.getFirstTerminator();
 
  718  bool TerminatorsUseSCC = 
false;
 
  719  for (
auto I = InsertionPt, E = 
MBB.end(); 
I != E; ++
I) {
 
  722    if (TerminatorsUseSCC || DefsSCC)
 
  726  if (!TerminatorsUseSCC)
 
  729  while (InsertionPt != 
MBB.begin()) {
 
 
  743void Vreg1LoweringHelper::markAsLaneMask(
Register DstReg)
 const {
 
  744  MRI->setRegClass(DstReg, ST->getBoolRC());
 
  747void Vreg1LoweringHelper::getCandidatesForLowering(
 
  751      if (isVreg1(
MI.getOperand(0).getReg()))
 
  757void Vreg1LoweringHelper::collectIncomingValuesFromPhi(
 
  759  for (
unsigned i = 1; i < 
MI->getNumOperands(); i += 2) {
 
  760    assert(i + 1 < 
MI->getNumOperands());
 
  761    Register IncomingReg = 
MI->getOperand(i).getReg();
 
  765    if (IncomingDef->
getOpcode() == AMDGPU::COPY) {
 
  767      assert(isLaneMaskReg(IncomingReg) || isVreg1(IncomingReg));
 
  769    } 
else if (IncomingDef->
getOpcode() == AMDGPU::IMPLICIT_DEF) {
 
  772      assert(IncomingDef->
isPHI() || PhiRegisters.count(IncomingReg));
 
  781  MRI->replaceRegWith(NewReg, OldReg);
 
  789  bool PrevVal = 
false;
 
  790  bool PrevConstant = isConstantLaneMask(PrevReg, PrevVal);
 
  792  bool CurConstant = isConstantLaneMask(CurReg, CurVal);
 
  794  if (PrevConstant && CurConstant) {
 
  795    if (PrevVal == CurVal) {
 
  810    if (CurConstant && CurVal) {
 
  811      PrevMaskedReg = PrevReg;
 
  821    if (PrevConstant && PrevVal) {
 
  822      CurMaskedReg = CurReg;
 
  831  if (PrevConstant && !PrevVal) {
 
  834  } 
else if (CurConstant && !CurVal) {
 
  837  } 
else if (PrevConstant && PrevVal) {
 
  844        .
addReg(CurMaskedReg ? CurMaskedReg : ExecReg);
 
  848void Vreg1LoweringHelper::constrainAsLaneMask(
Incoming &In) {}
 
  865  Vreg1LoweringHelper Helper(&MF, &MDT, &MPDT);
 
  867  Changed |= Helper.lowerCopiesFromI1();
 
  869  Changed |= Helper.lowerCopiesToI1();
 
  870  return Helper.cleanConstrainRegs(
Changed);
 
 
unsigned const MachineRegisterInfo * MRI
 
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
 
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
 
const HexagonInstrInfo * TII
 
Register const TargetRegisterInfo * TRI
 
Promote Memory to Register
 
#define INITIALIZE_PASS_DEPENDENCY(depName)
 
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
 
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
 
static void instrDefsUsesSCC(const MachineInstr &MI, bool &Def, bool &Use)
 
static Register insertUndefLaneMask(MachineBasicBlock *MBB, MachineRegisterInfo *MRI, MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs)
 
static bool runFixI1Copies(MachineFunction &MF, MachineDominatorTree &MDT, MachinePostDominatorTree &MPDT)
Lower all instructions that def or use vreg_1 registers.
 
static bool isVRegCompatibleReg(const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI, Register Reg)
 
Interface definition of the PhiLoweringHelper class that implements lane mask merging algorithm for d...
 
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
 
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
 
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
 
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
 
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
 
Represent the analysis usage information of a pass.
 
AnalysisUsage & addRequired()
 
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
 
Represents analyses that only rely on functions' control flow.
 
iterator find(const_arg_type_t< KeyT > Val)
 
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
 
Implements a dense probed hash-table based set.
 
DomTreeNodeBase * getIDom() const
 
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
 
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
 
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
 
FunctionPass class - This class is used to implement most global optimizations.
 
iterator_range< succ_iterator > successors()
 
iterator_range< pred_iterator > predecessors()
 
MachineInstrBundleIterator< MachineInstr > iterator
 
Analysis pass which computes a MachineDominatorTree.
 
Analysis pass which computes a MachineDominatorTree.
 
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
 
MachineFunctionPass(char &ID)
 
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
 
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
 
const MachineFunctionProperties & getProperties() const
Get the function properties.
 
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
 
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
 
Representation of each machine instruction.
 
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
 
const MachineOperand & getOperand(unsigned i) const
 
MachineOperand class - Representation of each machine instruction operand.
 
unsigned getSubReg() const
 
Register getReg() const
getReg - Returns the register number.
 
MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...
 
LLVM_ABI MachineBasicBlock * findNearestCommonDominator(ArrayRef< MachineBasicBlock * > Blocks) const
Returns the nearest common dominator of the given blocks.
 
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
 
MachineSSAUpdater - This class updates SSA form for a set of virtual registers defined in multiple bl...
 
void AddAvailableValue(MachineBasicBlock *BB, Register V)
AddAvailableValue - Indicate that a rewritten value is available at the end of the specified block wi...
 
iterator find(const KeyT &Key)
 
std::pair< iterator, bool > try_emplace(const KeyT &Key, Ts &&...Args)
 
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
 
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
 
PhiLoweringHelper(MachineFunction *MF, MachineDominatorTree *DT, MachinePostDominatorTree *PDT)
 
bool isLaneMaskReg(Register Reg) const
 
MachineRegisterInfo * MRI
 
MachineDominatorTree * DT
 
DenseSet< Register > PhiRegisters
 
virtual void getCandidatesForLowering(SmallVectorImpl< MachineInstr * > &Vreg1Phis) const =0
 
virtual void constrainAsLaneMask(Incoming &In)=0
 
virtual void collectIncomingValuesFromPhi(const MachineInstr *MI, SmallVectorImpl< Incoming > &Incomings) const =0
 
virtual void markAsLaneMask(Register DstReg) const =0
 
MachinePostDominatorTree * PDT
 
MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs
 
MachineBasicBlock::iterator getSaluInsertionAtEnd(MachineBasicBlock &MBB) const
Return a point at the end of the given MBB to insert SALU instructions for lane mask calculation.
 
void initializeLaneMaskRegisterAttributes(Register LaneMask)
 
bool isConstantLaneMask(Register Reg, bool &Val) const
 
virtual void buildMergeLaneMasks(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, Register PrevReg, Register CurReg)=0
 
virtual void replaceDstReg(Register NewReg, Register OldReg, MachineBasicBlock *MBB)=0
 
A set of analyses that are preserved following a run of a transformation pass.
 
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
 
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
 
Wrapper class representing virtual and physical registers.
 
constexpr bool isValid() const
 
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
 
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
 
Helper class for SSA formation on a set of values defined in multiple blocks.
 
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
 
Value * GetValueInMiddleOfBlock(BasicBlock *BB)
Construct SSA form, materializing a value that is live in the middle of the specified block.
 
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
 
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
 
reference emplace_back(ArgTypes &&... Args)
 
void push_back(const T &Elt)
 
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
 
StringRef - Represent a constant reference to a string, i.e.
 
A Use represents the edge between a Value definition and its users.
 
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
 
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
 
This is an optimization pass for GlobalISel generic memory operations.
 
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
 
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
 
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
 
void sort(IteratorTy Start, IteratorTy End)
 
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
 
Register createLaneMaskReg(MachineRegisterInfo *MRI, MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs)
 
DomTreeNodeBase< MachineBasicBlock > MachineDomTreeNode
 
void initializeSILowerI1CopiesLegacyPass(PassRegistry &)
 
ArrayRef(const T &OneElt) -> ArrayRef< T >
 
FunctionPass * createSILowerI1CopiesLegacyPass()
 
char & SILowerI1CopiesLegacyID
 
auto predecessors(const MachineBasicBlock *BB)
 
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
 
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
 
MachineBasicBlock * Block
 
All attributes(register class or bank and low-level type) a virtual register can have.