doxygen/SIPreEmitPeephole_8cpp_source.html

//===-- SIPreEmitPeephole.cpp ------------------------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

/// \file

/// This pass performs the peephole optimizations before code emission.

///

/// Additionally, this pass also unpacks packed instructions (V_PK_MUL_F32/F16,

/// V_PK_ADD_F32/F16, V_PK_FMA_F32) adjacent to MFMAs such that they can be

/// co-issued. This helps with overlapping MFMA and certain vector instructions

/// in machine schedules and is expected to improve performance. Only those

/// packed instructions are unpacked that are overlapped by the MFMA latency.

/// Rest should remain untouched.

/// TODO: Add support for F16 packed instructions

//===----------------------------------------------------------------------===//


#include "AMDGPU.h"

#include "GCNSubtarget.h"

#include "MCTargetDesc/AMDGPUMCTargetDesc.h"

#include "llvm/ADT/SetVector.h"

#include "llvm/CodeGen/MachineFunctionPass.h"

#include "llvm/CodeGen/TargetSchedule.h"

#include "llvm/Support/BranchProbability.h"


using namespace llvm;


#define DEBUG_TYPE "si-pre-emit-peephole"


namespace {


class SIPreEmitPeephole {

private:

  const SIInstrInfo *TII = nullptr;

  const SIRegisterInfo *TRI = nullptr;


  bool optimizeVccBranch(MachineInstr &MI) const;

  bool optimizeSetGPR(MachineInstr &First, MachineInstr &MI) const;

  bool getBlockDestinations(MachineBasicBlock &SrcMBB,

                            MachineBasicBlock *&TrueMBB,

                            MachineBasicBlock *&FalseMBB,

                            SmallVectorImpl<MachineOperand> &Cond);

  bool mustRetainExeczBranch(const MachineInstr &Branch,

                             const MachineBasicBlock &From,

                             const MachineBasicBlock &To) const;

  bool removeExeczBranch(MachineInstr &MI, MachineBasicBlock &SrcMBB);

  // Creates a list of packed instructions following an MFMA that are suitable

  // for unpacking.

  void collectUnpackingCandidates(MachineInstr &BeginMI,

                                  SetVector<MachineInstr *> &InstrsToUnpack,

                                  uint16_t NumMFMACycles);

  // v_pk_fma_f32 v[0:1], v[0:1], v[2:3], v[2:3] op_sel:[1,1,1]

  // op_sel_hi:[0,0,0]

  // ==>

  // v_fma_f32 v0, v1, v3, v3

  // v_fma_f32 v1, v0, v2, v2

  // Here, we have overwritten v0 before we use it. This function checks if

  // unpacking can lead to such a situation.

  bool canUnpackingClobberRegister(const MachineInstr &MI);

  // Unpack and insert F32 packed instructions, such as V_PK_MUL, V_PK_ADD, and

  // V_PK_FMA. Currently, only V_PK_MUL, V_PK_ADD, V_PK_FMA are supported for

  // this transformation.

  void performF32Unpacking(MachineInstr &I);

  // Select corresponding unpacked instruction

  uint16_t mapToUnpackedOpcode(MachineInstr &I);

  // Creates the unpacked instruction to be inserted. Adds source modifiers to

  // the unpacked instructions based on the source modifiers in the packed

  // instruction.

  MachineInstrBuilder createUnpackedMI(MachineInstr &I, uint16_t UnpackedOpcode,

                                       bool IsHiBits);

  // Process operands/source modifiers from packed instructions and insert the

  // appropriate source modifers and operands into the unpacked instructions.

  void addOperandAndMods(MachineInstrBuilder &NewMI, unsigned SrcMods,

                         bool IsHiBits, const MachineOperand &SrcMO);


public:

  bool run(MachineFunction &MF);

};


class SIPreEmitPeepholeLegacy : public MachineFunctionPass {

public:

  static char ID;


  SIPreEmitPeepholeLegacy() : MachineFunctionPass(ID) {

    initializeSIPreEmitPeepholeLegacyPass(*PassRegistry::getPassRegistry());

  }


  bool runOnMachineFunction(MachineFunction &MF) override {

    return SIPreEmitPeephole().run(MF);

  }

};


} // End anonymous namespace.


INITIALIZE_PASS(SIPreEmitPeepholeLegacy, DEBUG_TYPE,

                "SI peephole optimizations", false, false)


char SIPreEmitPeepholeLegacy::ID = 0;


char &llvm::SIPreEmitPeepholeID = SIPreEmitPeepholeLegacy::ID;


bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {

  // Match:

  // sreg = -1 or 0

  // vcc = S_AND_B64 exec, sreg or S_ANDN2_B64 exec, sreg

  // S_CBRANCH_VCC[N]Z

  // =>

  // S_CBRANCH_EXEC[N]Z

  // We end up with this pattern sometimes after basic block placement.

  // It happens while combining a block which assigns -1 or 0 to a saved mask

  // and another block which consumes that saved mask and then a branch.

  //

  // While searching this also performs the following substitution:

  // vcc = V_CMP

  // vcc = S_AND exec, vcc

  // S_CBRANCH_VCC[N]Z

  // =>

  // vcc = V_CMP

  // S_CBRANCH_VCC[N]Z


  bool Changed = false;

  MachineBasicBlock &MBB = *MI.getParent();

  const GCNSubtarget &ST = MBB.getParent()->getSubtarget<GCNSubtarget>();

  const bool IsWave32 = ST.isWave32();

  const unsigned CondReg = TRI->getVCC();

  const unsigned ExecReg = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;

  const unsigned And = IsWave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;

  const unsigned AndN2 = IsWave32 ? AMDGPU::S_ANDN2_B32 : AMDGPU::S_ANDN2_B64;

  const unsigned Mov = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;


  MachineBasicBlock::reverse_iterator A = MI.getReverseIterator(),

                                      E = MBB.rend();

  bool ReadsCond = false;

  unsigned Threshold = 5;

  for (++A; A != E; ++A) {

    if (!--Threshold)

      return false;

    if (A->modifiesRegister(ExecReg, TRI))

      return false;

    if (A->modifiesRegister(CondReg, TRI)) {

      if (!A->definesRegister(CondReg, TRI) ||

          (A->getOpcode() != And && A->getOpcode() != AndN2))

        return false;

      break;

    }

    ReadsCond |= A->readsRegister(CondReg, TRI);

  }

  if (A == E)

    return false;


  MachineOperand &Op1 = A->getOperand(1);

  MachineOperand &Op2 = A->getOperand(2);

  if (Op1.getReg() != ExecReg && Op2.isReg() && Op2.getReg() == ExecReg) {

    TII->commuteInstruction(*A);

    Changed = true;

  }

  if (Op1.getReg() != ExecReg)

    return Changed;

  if (Op2.isImm() && !(Op2.getImm() == -1 || Op2.getImm() == 0))

    return Changed;


  int64_t MaskValue = 0;

  Register SReg;

  if (Op2.isReg()) {

    SReg = Op2.getReg();

    auto M = std::next(A);

    bool ReadsSreg = false;

    bool ModifiesExec = false;

    for (; M != E; ++M) {

      if (M->definesRegister(SReg, TRI))

        break;

      if (M->modifiesRegister(SReg, TRI))

        return Changed;

      ReadsSreg |= M->readsRegister(SReg, TRI);

      ModifiesExec |= M->modifiesRegister(ExecReg, TRI);

    }

    if (M == E)

      return Changed;

    // If SReg is VCC and SReg definition is a VALU comparison.

    // This means S_AND with EXEC is not required.

    // Erase the S_AND and return.

    // Note: isVOPC is used instead of isCompare to catch V_CMP_CLASS

    if (A->getOpcode() == And && SReg == CondReg && !ModifiesExec &&

        TII->isVOPC(*M)) {

      A->eraseFromParent();

      return true;

    }

    if (!M->isMoveImmediate() || !M->getOperand(1).isImm() ||

        (M->getOperand(1).getImm() != -1 && M->getOperand(1).getImm() != 0))

      return Changed;

    MaskValue = M->getOperand(1).getImm();

    // First if sreg is only used in the AND instruction fold the immediate

    // into the AND.

    if (!ReadsSreg && Op2.isKill()) {

      A->getOperand(2).ChangeToImmediate(MaskValue);

      M->eraseFromParent();

    }

  } else if (Op2.isImm()) {

    MaskValue = Op2.getImm();

  } else {

    llvm_unreachable("Op2 must be register or immediate");

  }


  // Invert mask for s_andn2

  assert(MaskValue == 0 || MaskValue == -1);

  if (A->getOpcode() == AndN2)

    MaskValue = ~MaskValue;


  if (!ReadsCond && A->registerDefIsDead(AMDGPU::SCC, /*TRI=*/nullptr)) {

    if (!MI.killsRegister(CondReg, TRI)) {

      // Replace AND with MOV

      if (MaskValue == 0) {

        BuildMI(*A->getParent(), *A, A->getDebugLoc(), TII->get(Mov), CondReg)

            .addImm(0);

      } else {

        BuildMI(*A->getParent(), *A, A->getDebugLoc(), TII->get(Mov), CondReg)

            .addReg(ExecReg);

      }

    }

    // Remove AND instruction

    A->eraseFromParent();

  }


  bool IsVCCZ = MI.getOpcode() == AMDGPU::S_CBRANCH_VCCZ;

  if (SReg == ExecReg) {

    // EXEC is updated directly

    if (IsVCCZ) {

      MI.eraseFromParent();

      return true;

    }

    MI.setDesc(TII->get(AMDGPU::S_BRANCH));

  } else if (IsVCCZ && MaskValue == 0) {

    // Will always branch

    // Remove all successors shadowed by new unconditional branch

    MachineBasicBlock *Parent = MI.getParent();

    SmallVector<MachineInstr *, 4> ToRemove;

    bool Found = false;

    for (MachineInstr &Term : Parent->terminators()) {

      if (Found) {

        if (Term.isBranch())

          ToRemove.push_back(&Term);

      } else {

        Found = Term.isIdenticalTo(MI);

      }

    }

    assert(Found && "conditional branch is not terminator");

    for (auto *BranchMI : ToRemove) {

      MachineOperand &Dst = BranchMI->getOperand(0);

      assert(Dst.isMBB() && "destination is not basic block");

      Parent->removeSuccessor(Dst.getMBB());

      BranchMI->eraseFromParent();

    }


    if (MachineBasicBlock *Succ = Parent->getFallThrough()) {

      Parent->removeSuccessor(Succ);

    }


    // Rewrite to unconditional branch

    MI.setDesc(TII->get(AMDGPU::S_BRANCH));

  } else if (!IsVCCZ && MaskValue == 0) {

    // Will never branch

    MachineOperand &Dst = MI.getOperand(0);

    assert(Dst.isMBB() && "destination is not basic block");

    MI.getParent()->removeSuccessor(Dst.getMBB());

    MI.eraseFromParent();

    return true;

  } else if (MaskValue == -1) {

    // Depends only on EXEC

    MI.setDesc(

        TII->get(IsVCCZ ? AMDGPU::S_CBRANCH_EXECZ : AMDGPU::S_CBRANCH_EXECNZ));

  }


  MI.removeOperand(MI.findRegisterUseOperandIdx(CondReg, TRI, false /*Kill*/));

  MI.addImplicitDefUseOperands(*MBB.getParent());


  return true;

}


bool SIPreEmitPeephole::optimizeSetGPR(MachineInstr &First,

                                       MachineInstr &MI) const {

  MachineBasicBlock &MBB = *MI.getParent();

  const MachineFunction &MF = *MBB.getParent();

  const MachineRegisterInfo &MRI = MF.getRegInfo();

  MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::src0);

  Register IdxReg = Idx->isReg() ? Idx->getReg() : Register();

  SmallVector<MachineInstr *, 4> ToRemove;

  bool IdxOn = true;


  if (!MI.isIdenticalTo(First))

    return false;


  // Scan back to find an identical S_SET_GPR_IDX_ON

  for (MachineBasicBlock::instr_iterator I = std::next(First.getIterator()),

                                         E = MI.getIterator();

       I != E; ++I) {

    if (I->isBundle() || I->isDebugInstr())

      continue;

    switch (I->getOpcode()) {

    case AMDGPU::S_SET_GPR_IDX_MODE:

      return false;

    case AMDGPU::S_SET_GPR_IDX_OFF:

      IdxOn = false;

      ToRemove.push_back(&*I);

      break;

    default:

      if (I->modifiesRegister(AMDGPU::M0, TRI))

        return false;

      if (IdxReg && I->modifiesRegister(IdxReg, TRI))

        return false;

      if (llvm::any_of(I->operands(), [&MRI, this](const MachineOperand &MO) {

            return MO.isReg() && TRI->isVectorRegister(MRI, MO.getReg());

          })) {

        // The only exception allowed here is another indirect vector move

        // with the same mode.

        if (!IdxOn || !(I->getOpcode() == AMDGPU::V_MOV_B32_indirect_write ||

                        I->getOpcode() == AMDGPU::V_MOV_B32_indirect_read))

          return false;

      }

    }

  }


  MI.eraseFromBundle();

  for (MachineInstr *RI : ToRemove)

    RI->eraseFromBundle();

  return true;

}


bool SIPreEmitPeephole::getBlockDestinations(

    MachineBasicBlock &SrcMBB, MachineBasicBlock *&TrueMBB,

    MachineBasicBlock *&FalseMBB, SmallVectorImpl<MachineOperand> &Cond) {

  if (TII->analyzeBranch(SrcMBB, TrueMBB, FalseMBB, Cond))

    return false;


  if (!FalseMBB)

    FalseMBB = SrcMBB.getNextNode();


  return true;

}


namespace {

class BranchWeightCostModel {

  const SIInstrInfo &TII;

  const TargetSchedModel &SchedModel;

  BranchProbability BranchProb;

  static constexpr uint64_t BranchNotTakenCost = 1;

  uint64_t BranchTakenCost;

  uint64_t ThenCyclesCost = 0;


public:

  BranchWeightCostModel(const SIInstrInfo &TII, const MachineInstr &Branch,

                        const MachineBasicBlock &Succ)

      : TII(TII), SchedModel(TII.getSchedModel()) {

    const MachineBasicBlock &Head = *Branch.getParent();

    const auto *FromIt = find(Head.successors(), &Succ);

    assert(FromIt != Head.succ_end());


    BranchProb = Head.getSuccProbability(FromIt);

    if (BranchProb.isUnknown())

      BranchProb = BranchProbability::getZero();

    BranchTakenCost = SchedModel.computeInstrLatency(&Branch);

  }


  bool isProfitable(const MachineInstr &MI) {

    if (TII.isWaitcnt(MI.getOpcode()))

      return false;


    ThenCyclesCost += SchedModel.computeInstrLatency(&MI);


    // Consider `P = N/D` to be the probability of execz being false (skipping

    // the then-block) The transformation is profitable if always executing the

    // 'then' block is cheaper than executing sometimes 'then' and always

    // executing s_cbranch_execz:

    // * ThenCost <= P*ThenCost + (1-P)*BranchTakenCost + P*BranchNotTakenCost

    // * (1-P) * ThenCost <= (1-P)*BranchTakenCost + P*BranchNotTakenCost

    // * (D-N)/D * ThenCost <= (D-N)/D * BranchTakenCost + N/D *

    // BranchNotTakenCost

    uint64_t Numerator = BranchProb.getNumerator();

    uint64_t Denominator = BranchProb.getDenominator();

    return (Denominator - Numerator) * ThenCyclesCost <=

           ((Denominator - Numerator) * BranchTakenCost +

            Numerator * BranchNotTakenCost);

  }

};


bool SIPreEmitPeephole::mustRetainExeczBranch(

    const MachineInstr &Branch, const MachineBasicBlock &From,

    const MachineBasicBlock &To) const {

  assert(is_contained(Branch.getParent()->successors(), &From));

  BranchWeightCostModel CostModel{*TII, Branch, From};


  const MachineFunction *MF = From.getParent();

  for (MachineFunction::const_iterator MBBI(&From), ToI(&To), End = MF->end();

       MBBI != End && MBBI != ToI; ++MBBI) {

    const MachineBasicBlock &MBB = *MBBI;


    for (const MachineInstr &MI : MBB) {

      // When a uniform loop is inside non-uniform control flow, the branch

      // leaving the loop might never be taken when EXEC = 0.

      // Hence we should retain cbranch out of the loop lest it become infinite.

      if (MI.isConditionalBranch())

        return true;


      if (MI.isUnconditionalBranch() &&

          TII->getBranchDestBlock(MI) != MBB.getNextNode())

        return true;


      if (MI.isMetaInstruction())

        continue;


      if (TII->hasUnwantedEffectsWhenEXECEmpty(MI))

        return true;


      if (!CostModel.isProfitable(MI))

        return true;

    }

  }


  return false;

}

} // namespace


// Returns true if the skip branch instruction is removed.

bool SIPreEmitPeephole::removeExeczBranch(MachineInstr &MI,

                                          MachineBasicBlock &SrcMBB) {


  if (!TII->getSchedModel().hasInstrSchedModel())

    return false;


  MachineBasicBlock *TrueMBB = nullptr;

  MachineBasicBlock *FalseMBB = nullptr;

  SmallVector<MachineOperand, 1> Cond;


  if (!getBlockDestinations(SrcMBB, TrueMBB, FalseMBB, Cond))

    return false;


  // Consider only the forward branches.

  if (SrcMBB.getNumber() >= TrueMBB->getNumber())

    return false;


  // Consider only when it is legal and profitable

  if (mustRetainExeczBranch(MI, *FalseMBB, *TrueMBB))

    return false;


  LLVM_DEBUG(dbgs() << "Removing the execz branch: " << MI);

  MI.eraseFromParent();

  SrcMBB.removeSuccessor(TrueMBB);


  return true;

}


bool SIPreEmitPeephole::canUnpackingClobberRegister(const MachineInstr &MI) {

  unsigned OpCode = MI.getOpcode();

  Register DstReg = MI.getOperand(0).getReg();

  // Only the first register in the register pair needs to be checked due to the

  // unpacking order. Packed instructions are unpacked such that the lower 32

  // bits (i.e., the first register in the pair) are written first. This can

  // introduce dependencies if the first register is written in one instruction

  // and then read as part of the higher 32 bits in the subsequent instruction.

  // Such scenarios can arise due to specific combinations of op_sel and

  // op_sel_hi modifiers.

  Register UnpackedDstReg = TRI->getSubReg(DstReg, AMDGPU::sub0);


  const MachineOperand *Src0MO = TII->getNamedOperand(MI, AMDGPU::OpName::src0);

  if (Src0MO && Src0MO->isReg()) {

    Register SrcReg0 = Src0MO->getReg();

    unsigned Src0Mods =

        TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm();

    Register HiSrc0Reg = (Src0Mods & SISrcMods::OP_SEL_1)

                             ? TRI->getSubReg(SrcReg0, AMDGPU::sub1)

                             : TRI->getSubReg(SrcReg0, AMDGPU::sub0);

    // Check if the register selected by op_sel_hi is the same as the first

    // register in the destination register pair.

    if (TRI->regsOverlap(UnpackedDstReg, HiSrc0Reg))

      return true;

  }


  const MachineOperand *Src1MO = TII->getNamedOperand(MI, AMDGPU::OpName::src1);

  if (Src1MO && Src1MO->isReg()) {

    Register SrcReg1 = Src1MO->getReg();

    unsigned Src1Mods =

        TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers)->getImm();

    Register HiSrc1Reg = (Src1Mods & SISrcMods::OP_SEL_1)

                             ? TRI->getSubReg(SrcReg1, AMDGPU::sub1)

                             : TRI->getSubReg(SrcReg1, AMDGPU::sub0);

    if (TRI->regsOverlap(UnpackedDstReg, HiSrc1Reg))

      return true;

  }


  // Applicable for packed instructions with 3 source operands, such as

  // V_PK_FMA.

  if (AMDGPU::hasNamedOperand(OpCode, AMDGPU::OpName::src2)) {

    const MachineOperand *Src2MO =

        TII->getNamedOperand(MI, AMDGPU::OpName::src2);

    if (Src2MO && Src2MO->isReg()) {

      Register SrcReg2 = Src2MO->getReg();

      unsigned Src2Mods =

          TII->getNamedOperand(MI, AMDGPU::OpName::src2_modifiers)->getImm();

      Register HiSrc2Reg = (Src2Mods & SISrcMods::OP_SEL_1)

                               ? TRI->getSubReg(SrcReg2, AMDGPU::sub1)

                               : TRI->getSubReg(SrcReg2, AMDGPU::sub0);

      if (TRI->regsOverlap(UnpackedDstReg, HiSrc2Reg))

        return true;

    }

  }

  return false;

}


uint16_t SIPreEmitPeephole::mapToUnpackedOpcode(MachineInstr &I) {

  unsigned Opcode = I.getOpcode();

  // Use 64 bit encoding to allow use of VOP3 instructions.

  // VOP3 e64 instructions allow source modifiers

  // e32 instructions don't allow source modifiers.

  switch (Opcode) {

  case AMDGPU::V_PK_ADD_F32:

    return AMDGPU::V_ADD_F32_e64;

  case AMDGPU::V_PK_MUL_F32:

    return AMDGPU::V_MUL_F32_e64;

  case AMDGPU::V_PK_FMA_F32:

    return AMDGPU::V_FMA_F32_e64;

  default:

    return std::numeric_limits<uint16_t>::max();

  }

  llvm_unreachable("Fully covered switch");

}


void SIPreEmitPeephole::addOperandAndMods(MachineInstrBuilder &NewMI,

                                          unsigned SrcMods, bool IsHiBits,

                                          const MachineOperand &SrcMO) {

  unsigned NewSrcMods = 0;

  unsigned NegModifier = IsHiBits ? SISrcMods::NEG_HI : SISrcMods::NEG;

  unsigned OpSelModifier = IsHiBits ? SISrcMods::OP_SEL_1 : SISrcMods::OP_SEL_0;

  // Packed instructions (VOP3P) do not support ABS. Hence, no checks are done

  // for ABS modifiers.

  // If NEG or NEG_HI is true, we need to negate the corresponding 32 bit

  // lane.

  // NEG_HI shares the same bit position with ABS. But packed instructions do

  // not support ABS. Therefore, NEG_HI must be translated to NEG source

  // modifier for the higher 32 bits. Unpacked VOP3 instructions support

  // ABS, but do not support NEG_HI. Therefore we need to explicitly add the

  // NEG modifier if present in the packed instruction.

  if (SrcMods & NegModifier)

    NewSrcMods |= SISrcMods::NEG;

  // Src modifiers. Only negative modifiers are added if needed. Unpacked

  // operations do not have op_sel, therefore it must be handled explicitly as

  // done below.

  NewMI.addImm(NewSrcMods);

  if (SrcMO.isImm()) {

    NewMI.addImm(SrcMO.getImm());

    return;

  }

  // If op_sel == 0, select register 0 of reg:sub0_sub1.

  Register UnpackedSrcReg = (SrcMods & OpSelModifier)

                                ? TRI->getSubReg(SrcMO.getReg(), AMDGPU::sub1)

                                : TRI->getSubReg(SrcMO.getReg(), AMDGPU::sub0);


  MachineOperand UnpackedSrcMO =

      MachineOperand::CreateReg(UnpackedSrcReg, /*isDef=*/false);

  if (SrcMO.isKill()) {

    // For each unpacked instruction, mark its source registers as killed if the

    // corresponding source register in the original packed instruction was

    // marked as killed.

    //

    // Exception:

    // If the op_sel and op_sel_hi modifiers require both unpacked instructions

    // to use the same register (e.g., due to overlapping access to low/high

    // bits of the same packed register), then only the *second* (latter)

    // instruction should mark the register as killed. This is because the

    // second instruction handles the higher bits and is effectively the last

    // user of the full register pair.


    bool OpSel = SrcMods & SISrcMods::OP_SEL_0;

    bool OpSelHi = SrcMods & SISrcMods::OP_SEL_1;

    bool KillState = true;

    if ((OpSel == OpSelHi) && !IsHiBits)

      KillState = false;

    UnpackedSrcMO.setIsKill(KillState);

  }

  NewMI.add(UnpackedSrcMO);

}


void SIPreEmitPeephole::collectUnpackingCandidates(

    MachineInstr &BeginMI, SetVector<MachineInstr *> &InstrsToUnpack,

    uint16_t NumMFMACycles) {

  auto *BB = BeginMI.getParent();

  auto E = BB->end();

  int TotalCyclesBetweenCandidates = 0;

  auto SchedModel = TII->getSchedModel();

  Register MFMADef = BeginMI.getOperand(0).getReg();


  for (auto I = std::next(BeginMI.getIterator()); I != E; ++I) {

    MachineInstr &Instr = *I;

    uint16_t UnpackedOpCode = mapToUnpackedOpcode(Instr);

    bool IsUnpackable =

        !(UnpackedOpCode == std::numeric_limits<uint16_t>::max());

    if (Instr.isMetaInstruction())

      continue;

    if ((Instr.isTerminator()) ||

        (TII->isNeverCoissue(Instr) && !IsUnpackable) ||

        (SIInstrInfo::modifiesModeRegister(Instr) &&

         Instr.modifiesRegister(AMDGPU::EXEC, TRI)))

      return;


    const MCSchedClassDesc *InstrSchedClassDesc =

        SchedModel.resolveSchedClass(&Instr);

    uint16_t Latency =

        SchedModel.getWriteProcResBegin(InstrSchedClassDesc)->ReleaseAtCycle;

    TotalCyclesBetweenCandidates += Latency;


    if (TotalCyclesBetweenCandidates >= NumMFMACycles - 1)

      return;

    // Identify register dependencies between those used by the MFMA

    // instruction and the following packed instructions. Also checks for

    // transitive dependencies between the MFMA def and candidate instruction

    // def and uses. Conservatively ensures that we do not incorrectly

    // read/write registers.

    for (const MachineOperand &InstrMO : Instr.operands()) {

      if (!InstrMO.isReg() || !InstrMO.getReg().isValid())

        continue;

      if (TRI->regsOverlap(MFMADef, InstrMO.getReg()))

        return;

    }

    if (!IsUnpackable)

      continue;


    if (canUnpackingClobberRegister(Instr))

      return;

    // If it's a packed instruction, adjust latency: remove the packed

    // latency, add latency of two unpacked instructions (currently estimated

    // as 2 cycles).

    TotalCyclesBetweenCandidates -= Latency;

    // TODO: improve latency handling based on instruction modeling.

    TotalCyclesBetweenCandidates += 2;

    // Subtract 1 to account for MFMA issue latency.

    if (TotalCyclesBetweenCandidates < NumMFMACycles - 1)

      InstrsToUnpack.insert(&Instr);

  }

}


void SIPreEmitPeephole::performF32Unpacking(MachineInstr &I) {

  const MachineOperand &DstOp = I.getOperand(0);


  uint16_t UnpackedOpcode = mapToUnpackedOpcode(I);

  assert(UnpackedOpcode != std::numeric_limits<uint16_t>::max() &&

         "Unsupported Opcode");


  MachineInstrBuilder Op0LOp1L =

      createUnpackedMI(I, UnpackedOpcode, /*IsHiBits=*/false);

  MachineOperand LoDstOp = Op0LOp1L->getOperand(0);


  LoDstOp.setIsUndef(DstOp.isUndef());


  MachineInstrBuilder Op0HOp1H =

      createUnpackedMI(I, UnpackedOpcode, /*IsHiBits=*/true);

  MachineOperand HiDstOp = Op0HOp1H->getOperand(0);


  uint32_t IFlags = I.getFlags();

  Op0LOp1L->setFlags(IFlags);

  Op0HOp1H->setFlags(IFlags);

  LoDstOp.setIsRenamable(DstOp.isRenamable());

  HiDstOp.setIsRenamable(DstOp.isRenamable());


  I.eraseFromParent();

}


MachineInstrBuilder SIPreEmitPeephole::createUnpackedMI(MachineInstr &I,

                                                        uint16_t UnpackedOpcode,

                                                        bool IsHiBits) {

  MachineBasicBlock &MBB = *I.getParent();

  const DebugLoc &DL = I.getDebugLoc();

  const MachineOperand *SrcMO0 = TII->getNamedOperand(I, AMDGPU::OpName::src0);

  const MachineOperand *SrcMO1 = TII->getNamedOperand(I, AMDGPU::OpName::src1);

  Register DstReg = I.getOperand(0).getReg();

  unsigned OpCode = I.getOpcode();

  Register UnpackedDstReg = IsHiBits ? TRI->getSubReg(DstReg, AMDGPU::sub1)

                                     : TRI->getSubReg(DstReg, AMDGPU::sub0);


  int64_t ClampVal = TII->getNamedOperand(I, AMDGPU::OpName::clamp)->getImm();

  unsigned Src0Mods =

      TII->getNamedOperand(I, AMDGPU::OpName::src0_modifiers)->getImm();

  unsigned Src1Mods =

      TII->getNamedOperand(I, AMDGPU::OpName::src1_modifiers)->getImm();


  MachineInstrBuilder NewMI = BuildMI(MBB, I, DL, TII->get(UnpackedOpcode));

  NewMI.addDef(UnpackedDstReg); // vdst

  addOperandAndMods(NewMI, Src0Mods, IsHiBits, *SrcMO0);

  addOperandAndMods(NewMI, Src1Mods, IsHiBits, *SrcMO1);


  if (AMDGPU::hasNamedOperand(OpCode, AMDGPU::OpName::src2)) {

    const MachineOperand *SrcMO2 =

        TII->getNamedOperand(I, AMDGPU::OpName::src2);

    unsigned Src2Mods =

        TII->getNamedOperand(I, AMDGPU::OpName::src2_modifiers)->getImm();

    addOperandAndMods(NewMI, Src2Mods, IsHiBits, *SrcMO2);

  }

  NewMI.addImm(ClampVal); // clamp

  // Packed instructions do not support output modifiers. safe to assign them 0

  // for this use case

  NewMI.addImm(0); // omod

  return NewMI;

}


PreservedAnalyses


llvm::SIPreEmitPeepholePass::run(MachineFunction &MF,

                                 MachineFunctionAnalysisManager &MFAM) {

  if (!SIPreEmitPeephole().run(MF))

    return PreservedAnalyses::all();


  return getMachineFunctionPassPreservedAnalyses();

}


bool SIPreEmitPeephole::run(MachineFunction &MF) {

  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  TII = ST.getInstrInfo();

  TRI = &TII->getRegisterInfo();

  bool Changed = false;


  MF.RenumberBlocks();


  for (MachineBasicBlock &MBB : MF) {

    MachineBasicBlock::iterator TermI = MBB.getFirstTerminator();

    // Check first terminator for branches to optimize

    if (TermI != MBB.end()) {

      MachineInstr &MI = *TermI;

      switch (MI.getOpcode()) {

      case AMDGPU::S_CBRANCH_VCCZ:

      case AMDGPU::S_CBRANCH_VCCNZ:

        Changed |= optimizeVccBranch(MI);

        break;

      case AMDGPU::S_CBRANCH_EXECZ:

        Changed |= removeExeczBranch(MI, MBB);

        break;

      }

    }


    if (!ST.hasVGPRIndexMode())

      continue;


    MachineInstr *SetGPRMI = nullptr;

    const unsigned Threshold = 20;

    unsigned Count = 0;

    // Scan the block for two S_SET_GPR_IDX_ON instructions to see if a

    // second is not needed. Do expensive checks in the optimizeSetGPR()

    // and limit the distance to 20 instructions for compile time purposes.

    // Note: this needs to work on bundles as S_SET_GPR_IDX* instructions

    // may be bundled with the instructions they modify.

    for (auto &MI : make_early_inc_range(MBB.instrs())) {

      if (Count == Threshold)

        SetGPRMI = nullptr;

      else

        ++Count;


      if (MI.getOpcode() != AMDGPU::S_SET_GPR_IDX_ON)

        continue;


      Count = 0;

      if (!SetGPRMI) {

        SetGPRMI = &MI;

        continue;

      }


      if (optimizeSetGPR(*SetGPRMI, MI))

        Changed = true;

      else

        SetGPRMI = &MI;

    }

  }


  // TODO: Fold this into previous block, if possible. Evaluate and handle any

  // side effects.


  // Perform the extra MF scans only for supported archs

  if (!ST.hasGFX940Insts())

    return Changed;

  for (MachineBasicBlock &MBB : MF) {

    // Unpack packed instructions overlapped by MFMAs. This allows the

    // compiler to co-issue unpacked instructions with MFMA

    auto SchedModel = TII->getSchedModel();

    SetVector<MachineInstr *> InstrsToUnpack;

    for (auto &MI : make_early_inc_range(MBB.instrs())) {

      if (!SIInstrInfo::isMFMA(MI))

        continue;

      const MCSchedClassDesc *SchedClassDesc =

          SchedModel.resolveSchedClass(&MI);

      uint16_t NumMFMACycles =

          SchedModel.getWriteProcResBegin(SchedClassDesc)->ReleaseAtCycle;

      collectUnpackingCandidates(MI, InstrsToUnpack, NumMFMACycles);

    }

    for (MachineInstr *MI : InstrsToUnpack) {

      performF32Unpacking(*MI);

    }

  }


  return Changed;

}

MRI
unsigned const MachineRegisterInfo * MRI
Definition AArch64AdvSIMDScalarPass.cpp:103

for
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
Definition AArch64ExpandPseudoInsts.cpp:123

assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

TII
const TargetInstrInfo & TII
Definition AArch64RedundantCondBranchPass.cpp:49

AMDGPUMCTargetDesc.h
Provides AMDGPU specific target descriptions.

AMDGPU.h

ToRemove
ReachingDefInfo InstSet & ToRemove
Definition ARMLowOverheadLoops.cpp:527

MBB
MachineBasicBlock & MBB
Definition ARMSLSHardening.cpp:71

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition ARMSLSHardening.cpp:73

MBBI
MachineBasicBlock MachineBasicBlock::iterator MBBI
Definition ARMSLSHardening.cpp:72

BranchProbability.h

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

GCNSubtarget.h
AMD GCN specific subclass of TargetSubtarget.

DEBUG_TYPE
#define DEBUG_TYPE
Definition GenericCycleImpl.h:31

MI
IRTranslator LLVM IR MI
Definition IRTranslator.cpp:110

I
#define I(x, y, z)
Definition MD5.cpp:57

MachineFunctionPass.h

TRI
Register const TargetRegisterInfo * TRI
Definition MachineSink.cpp:2118

Register
Promote Memory to Register
Definition Mem2Reg.cpp:110

if
if(PassOpts->AAPipeline)
Definition PassBuilderBindings.cpp:64

INITIALIZE_PASS
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56

Cond
const SmallVectorImpl< MachineOperand > & Cond
Definition RISCVRedundantCopyElimination.cpp:71

SetVector.h
This file implements a set that has insertion order iteration characteristics.

isProfitable
static bool isProfitable(const StableFunctionMap::StableFunctionEntries &SFS)
Definition StableFunctionMap.cpp:203

LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition Debug.h:114

TargetSchedule.h

char

llvm::BranchProbability::getDenominator
static uint32_t getDenominator()
Definition BranchProbability.h:75

llvm::BranchProbability::getNumerator
uint32_t getNumerator() const
Definition BranchProbability.h:74

llvm::BranchProbability::isUnknown
bool isUnknown() const
Definition BranchProbability.h:49

llvm::BranchProbability::getZero
static BranchProbability getZero()
Definition BranchProbability.h:51

llvm::GCNSubtarget
Definition GCNSubtarget.h:34

llvm::MCInstrInfo::get
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90

llvm::MachineBasicBlock
Definition MachineBasicBlock.h:122

llvm::MachineBasicBlock::getFallThrough
LLVM_ABI MachineBasicBlock * getFallThrough(bool JumpToFallThrough=true)
Return the fallthrough block if the block can implicitly transfer control to the block after it by fa...
Definition MachineBasicBlock.cpp:995

llvm::MachineBasicBlock::getSuccProbability
LLVM_ABI BranchProbability getSuccProbability(const_succ_iterator Succ) const
Return probability of the edge from this block to MBB.
Definition MachineBasicBlock.cpp:1612

llvm::MachineBasicBlock::getNumber
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
Definition MachineBasicBlock.h:1267

llvm::MachineBasicBlock::succ_end
succ_iterator succ_end()
Definition MachineBasicBlock.h:445

llvm::MachineBasicBlock::instrs
instr_range instrs()
Definition MachineBasicBlock.h:372

llvm::MachineBasicBlock::removeSuccessor
LLVM_ABI void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
Definition MachineBasicBlock.cpp:854

llvm::MachineBasicBlock::instr_iterator
Instructions::iterator instr_iterator
Definition MachineBasicBlock.h:336

llvm::MachineBasicBlock::reverse_iterator
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
Definition MachineBasicBlock.h:343

llvm::MachineBasicBlock::end
iterator end()
Definition MachineBasicBlock.h:379

llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition MachineBasicBlock.h:323

llvm::MachineBasicBlock::terminators
iterator_range< iterator > terminators()
Definition MachineBasicBlock.h:397

llvm::MachineBasicBlock::successors
iterator_range< succ_iterator > successors()
Definition MachineBasicBlock.h:466

llvm::MachineBasicBlock::iterator
MachineInstrBundleIterator< MachineInstr > iterator
Definition MachineBasicBlock.h:341

llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition MachineFunctionPass.h:31

llvm::MachineFunction
Definition MachineFunction.h:286

llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition MachineFunction.h:762

llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition MachineFunction.h:772

llvm::MachineFunction::end
iterator end()
Definition MachineFunction.h:986

llvm::MachineFunction::RenumberBlocks
void RenumberBlocks(MachineBasicBlock *MBBFrom=nullptr)
RenumberBlocks - This discards all of the MachineBasicBlock numbers and recomputes them.
Definition MachineFunction.cpp:343

llvm::MachineFunction::const_iterator
BasicBlockListType::const_iterator const_iterator
Definition MachineFunction.h:967

llvm::MachineInstrBuilder
Definition MachineInstrBuilder.h:113

llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition MachineInstrBuilder.h:175

llvm::MachineInstrBuilder::add
const MachineInstrBuilder & add(const MachineOperand &MO) const
Definition MachineInstrBuilder.h:268

llvm::MachineInstrBuilder::addDef
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Definition MachineInstrBuilder.h:160

llvm::MachineInstr
Representation of each machine instruction.
Definition MachineInstr.h:72

llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition MachineInstr.h:370

llvm::MachineInstr::setFlags
void setFlags(unsigned flags)
Definition MachineInstr.h:433

llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition MachineInstr.h:606

llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition MachineOperand.h:49

llvm::MachineOperand::isUndef
bool isUndef() const
Definition MachineOperand.h:407

llvm::MachineOperand::getImm
int64_t getImm() const
Definition MachineOperand.h:560

llvm::MachineOperand::isKill
bool isKill() const
Definition MachineOperand.h:402

llvm::MachineOperand::setIsRenamable
LLVM_ABI void setIsRenamable(bool Val=true)
Definition MachineOperand.cpp:141

llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition MachineOperand.h:331

llvm::MachineOperand::isImm
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Definition MachineOperand.h:333

llvm::MachineOperand::setIsKill
void setIsKill(bool Val=true)
Definition MachineOperand.h:523

llvm::MachineOperand::isRenamable
LLVM_ABI bool isRenamable() const
isRenamable - Returns true if this register may be renamed, i.e.
Definition MachineOperand.cpp:123

llvm::MachineOperand::setIsUndef
void setIsUndef(bool Val=true)
Definition MachineOperand.h:534

llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition MachineOperand.h:372

llvm::MachineOperand::CreateReg
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
Definition MachineOperand.h:851

llvm::PassRegistry::getPassRegistry
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition PassRegistry.cpp:23

llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118

llvm::Register
Wrapper class representing virtual and physical registers.
Definition Register.h:20

llvm::SIInstrInfo
Definition SIInstrInfo.h:90

llvm::SIInstrInfo::isMFMA
static bool isMFMA(const MachineInstr &MI)
Definition SIInstrInfo.h:934

llvm::SIInstrInfo::modifiesModeRegister
static bool modifiesModeRegister(const MachineInstr &MI)
Return true if the instruction modifies the mode register.q.
Definition SIInstrInfo.cpp:4485

llvm::SIPreEmitPeepholePass::run
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
Definition SIPreEmitPeephole.cpp:706

llvm::SIRegisterInfo
Definition SIRegisterInfo.h:40

llvm::SetVector
A vector that has set insertion semantics.
Definition SetVector.h:57

llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151

llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition SmallVector.h:574

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition SmallVector.h:1203

llvm::TargetInstrInfo::analyzeBranch
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
Definition TargetInstrInfo.h:710

llvm::TargetInstrInfo::getBranchDestBlock
virtual MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const
Definition TargetInstrInfo.h:667

llvm::TargetSchedModel::resolveSchedClass
LLVM_ABI const MCSchedClassDesc * resolveSchedClass(const MachineInstr *MI) const
Return the MCSchedClassDesc for this instruction.
Definition TargetSchedule.cpp:117

llvm::TargetSchedModel::getWriteProcResBegin
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
Definition TargetSchedule.h:146

llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition ilist_node.h:123

llvm::ilist_node_with_parent::getNextNode
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348

uint16_t

Changed
Changed
Definition ObjCARCOpts.cpp:2369

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition ErrorHandling.h:164

llvm::AMDGPU::VOP3PEncoding::OpSel
OpSel
Definition SIDefines.h:1063

llvm::AMDGPU::hasNamedOperand
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
Definition AMDGPUBaseInfo.h:414

llvm::ARM_MB::ST
@ ST
Definition ARMBaseInfo.h:73

llvm::ARM_PROC::IFlags
IFlags
Definition ARMBaseInfo.h:31

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24

llvm::M68kBeads::Term
@ Term
Definition M68kBaseInfo.h:116

llvm::MCID::Branch
@ Branch
Definition MCInstrDesc.h:160

llvm::SISrcMods::OP_SEL_0
@ OP_SEL_0
Definition SIDefines.h:277

llvm::SISrcMods::NEG_HI
@ NEG_HI
Definition SIDefines.h:276

llvm::SISrcMods::OP_SEL_1
@ OP_SEL_1
Definition SIDefines.h:278

llvm::SISrcMods::NEG
@ NEG
Definition SIDefines.h:273

llvm::dwarf_linker::DebugSectionKind::DebugLoc
@ DebugLoc
Definition DWARFLinkerBase.h:34

llvm::dxil::OpCode
OpCode
Definition DXILConstants.h:18

llvm::rdf::Instr
NodeAddr< InstrNode * > Instr
Definition RDFGraph.h:389

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition AddressRanges.h:18

llvm::find
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751

llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition MachineInstrBuilder.h:391

llvm::Latency
@ Latency
Definition SIMachineScheduler.h:34

llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632

llvm::MachineFunctionAnalysisManager
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
Definition MachineFunctionAnalysisManager.h:24

llvm::getMachineFunctionPassPreservedAnalyses
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
Definition MachinePassManager.cpp:162

llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732

llvm::dbgs
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207

llvm::Count
FunctionAddr VTableAddr Count
Definition InstrProf.h:139

llvm::SmallVector
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
Definition SmallVector.h:1129

llvm::SIPreEmitPeepholeID
char & SIPreEmitPeepholeID

llvm::IRMemLocation::First
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74

llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897

llvm::initializeSIPreEmitPeepholeLegacyPass
void initializeSIPreEmitPeepholeLegacyPass(PassRegistry &)

llvm::MCWriteProcResEntry::ReleaseAtCycle
uint16_t ReleaseAtCycle
Cycle at which the resource will be released by an instruction, relatively to the cycle in which the ...
Definition MCSchedule.h:73

llvm::MIPatternMatch::And
Matching combinators.
Definition MIPatternMatch.h:314