32#define DEBUG_TYPE "si-peephole-sdwa"
34STATISTIC(NumSDWAPatternsFound,
"Number of SDWA patterns found.");
36 "Number of instruction converted to SDWA.");
71 bool convertToSDWA(
MachineInstr &
MI,
const SDWAOperandsVector &SDWAOperands);
89 :
Target(TargetOp), Replaced(ReplacedOp) {
94 virtual ~SDWAOperand() =
default;
104 return &getParentInst()->getParent()->getParent()->getRegInfo();
107#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
113using namespace AMDGPU::SDWA;
115class SDWASrcOperand :
public SDWAOperand {
124 SdwaSel SrcSel_ =
DWORD,
bool Abs_ =
false,
bool Neg_ =
false,
126 : SDWAOperand(TargetOp, ReplacedOp),
127 SrcSel(SrcSel_), Abs(Abs_), Neg(Neg_), Sext(Sext_) {}
132 SdwaSel getSrcSel()
const {
return SrcSel; }
133 bool getAbs()
const {
return Abs; }
134 bool getNeg()
const {
return Neg; }
135 bool getSext()
const {
return Sext; }
140#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
145class SDWADstOperand :
public SDWAOperand {
154 : SDWAOperand(TargetOp, ReplacedOp), DstSel(DstSel_), DstUn(DstUn_) {}
159 SdwaSel getDstSel()
const {
return DstSel; }
160 DstUnused getDstUnused()
const {
return DstUn; }
162#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
167class SDWADstPreserveOperand :
public SDWADstOperand {
175 Preserve(PreserveOp) {}
181#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
190char SIPeepholeSDWA::
ID = 0;
195 return new SIPeepholeSDWA();
199#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
208 case DWORD:
OS <<
"DWORD";
break;
224 OS <<
"SDWA src: " << *getTargetOperand()
225 <<
" src_sel:" << getSrcSel()
226 <<
" abs:" << getAbs() <<
" neg:" << getNeg()
227 <<
" sext:" << getSext() <<
'\n';
232 OS <<
"SDWA dst: " << *getTargetOperand()
233 <<
" dst_sel:" << getDstSel()
234 <<
" dst_unused:" << getDstUnused() <<
'\n';
239 OS <<
"SDWA preserve dst: " << *getTargetOperand()
240 <<
" dst_sel:" << getDstSel()
241 <<
" preserve:" << *getPreservedOperand() <<
'\n';
259 return LHS.isReg() &&
261 LHS.getReg() ==
RHS.getReg() &&
262 LHS.getSubReg() ==
RHS.getSubReg();
267 if (!Reg->isReg() || !Reg->isDef())
296 for (
auto &DefMO : DefInstr->
defs()) {
297 if (DefMO.isReg() && DefMO.getReg() == Reg->getReg())
308 const auto *
MI =
SrcOp->getParent();
309 if (
TII->getNamedOperand(*
MI, AMDGPU::OpName::src0) ==
SrcOp) {
310 if (
auto *
Mod =
TII->getNamedOperand(*
MI, AMDGPU::OpName::src0_modifiers)) {
311 Mods =
Mod->getImm();
313 }
else if (
TII->getNamedOperand(*
MI, AMDGPU::OpName::src1) ==
SrcOp) {
314 if (
auto *
Mod =
TII->getNamedOperand(*
MI, AMDGPU::OpName::src1_modifiers)) {
315 Mods =
Mod->getImm();
320 "Float and integer src modifiers can't be set simultaneously");
343 bool IsPreserveSrc =
false;
347 TII->getNamedOperand(
MI, AMDGPU::OpName::src0_modifiers);
348 assert(Src && (Src->isReg() || Src->isImm()));
349 if (!
isSameReg(*Src, *getReplacedOperand())) {
351 Src =
TII->getNamedOperand(
MI, AMDGPU::OpName::src1);
352 SrcSel =
TII->getNamedOperand(
MI, AMDGPU::OpName::src1_sel);
353 SrcMods =
TII->getNamedOperand(
MI, AMDGPU::OpName::src1_modifiers);
356 !
isSameReg(*Src, *getReplacedOperand())) {
365 TII->getNamedOperand(
MI, AMDGPU::OpName::dst_unused);
368 DstUnused->getImm() == AMDGPU::SDWA::DstUnused::UNUSED_PRESERVE) {
374 TII->getNamedImmOperand(
MI, AMDGPU::OpName::dst_sel));
375 if (DstSel == AMDGPU::SDWA::SdwaSel::WORD_1 &&
376 getSrcSel() == AMDGPU::SDWA::SdwaSel::WORD_0) {
377 IsPreserveSrc =
true;
379 AMDGPU::OpName::vdst);
380 auto TiedIdx =
MI.findTiedOperandIdx(DstIdx);
381 Src = &
MI.getOperand(TiedIdx);
390 assert(Src && Src->isReg());
392 if ((
MI.getOpcode() == AMDGPU::V_FMAC_F16_sdwa ||
393 MI.getOpcode() == AMDGPU::V_FMAC_F32_sdwa ||
394 MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||
395 MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) &&
396 !
isSameReg(*Src, *getReplacedOperand())) {
403 (IsPreserveSrc || (SrcSel && SrcMods)));
406 if (!IsPreserveSrc) {
407 SrcSel->
setImm(getSrcSel());
410 getTargetOperand()->setIsKill(
false);
426 if (&UseInst != ParentMI)
436 if ((
MI.getOpcode() == AMDGPU::V_FMAC_F16_sdwa ||
437 MI.getOpcode() == AMDGPU::V_FMAC_F32_sdwa ||
438 MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||
439 MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) &&
448 isSameReg(*Operand, *getReplacedOperand()));
452 DstSel->
setImm(getDstSel());
459 getParentInst()->eraseFromParent();
471 getMRI()->clearKillFlags(MO.getReg());
475 auto MBB =
MI.getParent();
481 MIB.addReg(getPreservedOperand()->
getReg(),
483 getPreservedOperand()->getSubReg());
487 MI.getNumOperands() - 1);
490 return SDWADstOperand::convertToSDWA(
MI,
TII);
493std::optional<int64_t>
507 if (!
TII->isFoldableCopy(*DefInst))
521std::unique_ptr<SDWAOperand>
523 unsigned Opcode =
MI.getOpcode();
525 case AMDGPU::V_LSHRREV_B32_e32:
526 case AMDGPU::V_ASHRREV_I32_e32:
527 case AMDGPU::V_LSHLREV_B32_e32:
528 case AMDGPU::V_LSHRREV_B32_e64:
529 case AMDGPU::V_ASHRREV_I32_e64:
530 case AMDGPU::V_LSHLREV_B32_e64: {
540 auto Imm = foldToImm(*Src0);
544 if (*Imm != 16 && *Imm != 24)
552 if (Opcode == AMDGPU::V_LSHLREV_B32_e32 ||
553 Opcode == AMDGPU::V_LSHLREV_B32_e64) {
554 return std::make_unique<SDWADstOperand>(
557 return std::make_unique<SDWASrcOperand>(
559 Opcode != AMDGPU::V_LSHRREV_B32_e32 &&
560 Opcode != AMDGPU::V_LSHRREV_B32_e64);
565 case AMDGPU::V_LSHRREV_B16_e32:
566 case AMDGPU::V_ASHRREV_I16_e32:
567 case AMDGPU::V_LSHLREV_B16_e32:
568 case AMDGPU::V_LSHRREV_B16_e64:
569 case AMDGPU::V_ASHRREV_I16_e64:
570 case AMDGPU::V_LSHLREV_B16_e64: {
580 auto Imm = foldToImm(*Src0);
581 if (!Imm || *Imm != 8)
590 if (Opcode == AMDGPU::V_LSHLREV_B16_e32 ||
591 Opcode == AMDGPU::V_LSHLREV_B16_e64) {
594 return std::make_unique<SDWASrcOperand>(
595 Src1, Dst,
BYTE_1,
false,
false,
596 Opcode != AMDGPU::V_LSHRREV_B16_e32 &&
597 Opcode != AMDGPU::V_LSHRREV_B16_e64);
602 case AMDGPU::V_BFE_I32_e64:
603 case AMDGPU::V_BFE_U32_e64: {
619 auto Offset = foldToImm(*Src1);
624 auto Width = foldToImm(*Src2);
630 if (*
Offset == 0 && *Width == 8)
632 else if (*
Offset == 0 && *Width == 16)
634 else if (*
Offset == 0 && *Width == 32)
636 else if (*
Offset == 8 && *Width == 8)
638 else if (*
Offset == 16 && *Width == 8)
640 else if (*
Offset == 16 && *Width == 16)
642 else if (*
Offset == 24 && *Width == 8)
653 return std::make_unique<SDWASrcOperand>(
654 Src0, Dst, SrcSel,
false,
false, Opcode != AMDGPU::V_BFE_U32_e64);
657 case AMDGPU::V_AND_B32_e32:
658 case AMDGPU::V_AND_B32_e64: {
666 auto Imm = foldToImm(*Src0);
669 Imm = foldToImm(*Src1);
673 if (!Imm || (*Imm != 0x0000ffff && *Imm != 0x000000ff))
678 if (ValSrc->getReg().isPhysical() || Dst->getReg().isPhysical())
681 return std::make_unique<SDWASrcOperand>(
685 case AMDGPU::V_OR_B32_e32:
686 case AMDGPU::V_OR_B32_e64: {
697 std::optional<std::pair<MachineOperand *, MachineOperand *>>;
698 auto CheckOROperandsForSDWA =
700 if (!Op1 || !Op1->
isReg() || !Op2 || !Op2->isReg())
701 return CheckRetType(std::nullopt);
705 return CheckRetType(std::nullopt);
708 if (!
TII->isSDWA(*Op1Inst))
709 return CheckRetType(std::nullopt);
713 return CheckRetType(std::nullopt);
715 return CheckRetType(std::pair(Op1Def, Op2Def));
720 assert(OrSDWA && OrOther);
721 auto Res = CheckOROperandsForSDWA(OrSDWA, OrOther);
723 OrSDWA =
TII->getNamedOperand(
MI, AMDGPU::OpName::src1);
724 OrOther =
TII->getNamedOperand(
MI, AMDGPU::OpName::src0);
725 assert(OrSDWA && OrOther);
726 Res = CheckOROperandsForSDWA(OrSDWA, OrOther);
733 assert(OrSDWADef && OrOtherDef);
758 if (!
TII->isSDWA(*OtherInst))
762 TII->getNamedImmOperand(*SDWAInst, AMDGPU::OpName::dst_sel));
764 TII->getNamedImmOperand(*OtherInst, AMDGPU::OpName::dst_sel));
766 bool DstSelAgree =
false;
769 (OtherDstSel ==
BYTE_3) ||
773 (OtherDstSel ==
BYTE_1) ||
777 (OtherDstSel ==
BYTE_2) ||
778 (OtherDstSel ==
BYTE_3) ||
782 (OtherDstSel ==
BYTE_2) ||
783 (OtherDstSel ==
BYTE_3) ||
787 (OtherDstSel ==
BYTE_1) ||
788 (OtherDstSel ==
BYTE_3) ||
792 (OtherDstSel ==
BYTE_1) ||
793 (OtherDstSel ==
BYTE_2) ||
796 default: DstSelAgree =
false;
804 TII->getNamedImmOperand(*OtherInst, AMDGPU::OpName::dst_unused));
805 if (OtherDstUnused != DstUnused::UNUSED_PAD)
812 return std::make_unique<SDWADstPreserveOperand>(
813 OrDst, OrSDWADef, OrOtherDef, DstSel);
818 return std::unique_ptr<SDWAOperand>(
nullptr);
830 if (
auto Operand = matchSDWAOperand(
MI)) {
832 SDWAOperands[&
MI] = std::move(Operand);
833 ++NumSDWAPatternsFound;
858 int Opc =
MI.getOpcode();
859 assert((Opc == AMDGPU::V_ADD_CO_U32_e64 || Opc == AMDGPU::V_SUB_CO_U32_e64) &&
860 "Currently only handles V_ADD_CO_U32_e64 or V_SUB_CO_U32_e64");
892 if (
I->modifiesRegister(AMDGPU::VCC,
TRI))
898 .
add(*
TII->getNamedOperand(
MI, AMDGPU::OpName::vdst))
899 .
add(*
TII->getNamedOperand(
MI, AMDGPU::OpName::src0))
900 .
add(*
TII->getNamedOperand(
MI, AMDGPU::OpName::src1))
903 MI.eraseFromParent();
913 unsigned Opc =
MI.getOpcode();
914 if (
TII->isSDWA(Opc))
924 if (!
ST.hasSDWAOmod() &&
TII->hasModifiersSet(
MI, AMDGPU::OpName::omod))
927 if (
TII->isVOPC(Opc)) {
928 if (!
ST.hasSDWASdst()) {
930 if (SDst && (SDst->
getReg() != AMDGPU::VCC &&
931 SDst->
getReg() != AMDGPU::VCC_LO))
935 if (!
ST.hasSDWAOutModsVOPC() &&
936 (
TII->hasModifiersSet(
MI, AMDGPU::OpName::clamp) ||
937 TII->hasModifiersSet(
MI, AMDGPU::OpName::omod)))
940 }
else if (
TII->getNamedOperand(
MI, AMDGPU::OpName::sdst) ||
941 !
TII->getNamedOperand(
MI, AMDGPU::OpName::vdst)) {
945 if (!
ST.hasSDWAMac() && (Opc == AMDGPU::V_FMAC_F16_e32 ||
946 Opc == AMDGPU::V_FMAC_F32_e32 ||
947 Opc == AMDGPU::V_MAC_F16_e32 ||
948 Opc == AMDGPU::V_MAC_F32_e32))
952 if (
TII->pseudoToMCOpcode(Opc) == -1)
956 if (Opc == AMDGPU::V_CNDMASK_B32_e32)
973 const SDWAOperandsVector &SDWAOperands) {
979 unsigned Opcode =
MI.getOpcode();
980 if (
TII->isSDWA(Opcode)) {
984 if (SDWAOpcode == -1)
1001 }
else if ((Dst =
TII->getNamedOperand(
MI, AMDGPU::OpName::sdst))) {
1014 if (
auto *
Mod =
TII->getNamedOperand(
MI, AMDGPU::OpName::src0_modifiers))
1018 SDWAInst.
add(*Src0);
1025 if (
auto *
Mod =
TII->getNamedOperand(
MI, AMDGPU::OpName::src1_modifiers))
1029 SDWAInst.
add(*Src1);
1032 if (SDWAOpcode == AMDGPU::V_FMAC_F16_sdwa ||
1033 SDWAOpcode == AMDGPU::V_FMAC_F32_sdwa ||
1034 SDWAOpcode == AMDGPU::V_MAC_F16_sdwa ||
1035 SDWAOpcode == AMDGPU::V_MAC_F32_sdwa) {
1039 SDWAInst.
add(*Src2);
1046 SDWAInst.
add(*Clamp);
1055 SDWAInst.
add(*OMod);
1065 SDWAInst.
add(*DstSel);
1067 SDWAInst.
addImm(AMDGPU::SDWA::SdwaSel::DWORD);
1077 SDWAInst.
addImm(AMDGPU::SDWA::DstUnused::UNUSED_PAD);
1085 SDWAInst.
add(*Src0Sel);
1087 SDWAInst.
addImm(AMDGPU::SDWA::SdwaSel::DWORD);
1095 SDWAInst.
add(*Src1Sel);
1097 SDWAInst.
addImm(AMDGPU::SDWA::SdwaSel::DWORD);
1102 auto DstUnused =
TII->getNamedOperand(
MI, AMDGPU::OpName::dst_unused);
1104 DstUnused->getImm() == AMDGPU::SDWA::DstUnused::UNUSED_PRESERVE) {
1107 assert(Dst && Dst->isTied());
1108 assert(Opcode ==
static_cast<unsigned int>(SDWAOpcode));
1111 assert(PreserveDstIdx != -1);
1113 auto TiedIdx =
MI.findTiedOperandIdx(PreserveDstIdx);
1114 auto Tied =
MI.getOperand(TiedIdx);
1121 bool Converted =
false;
1122 for (
auto &Operand : SDWAOperands) {
1134 if (PotentialMatches.
count(Operand->getParentInst()) == 0)
1135 Converted |= Operand->convertToSDWA(*SDWAInst,
TII);
1138 ConvertedInstructions.
push_back(SDWAInst);
1145 ++NumSDWAInstructionsPeepholed;
1147 MI.eraseFromParent();
1156 unsigned ConstantBusCount = 0;
1158 if (!
Op.isImm() && !(
Op.isReg() && !
TRI->isVGPR(*
MRI,
Op.getReg())))
1161 unsigned I =
Op.getOperandNo();
1162 if (
Desc.operands()[
I].RegClass == -1 ||
1163 !
TRI->isVSSuperClass(
TRI->getRegClass(
Desc.operands()[
I].RegClass)))
1166 if (
ST.hasSDWAScalar() && ConstantBusCount == 0 &&
Op.isReg() &&
1167 TRI->isSGPRReg(*
MRI,
Op.getReg())) {
1172 Register VGPR =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1174 TII->get(AMDGPU::V_MOV_B32_e32), VGPR);
1176 Copy.addImm(
Op.getImm());
1177 else if (
Op.isReg())
1180 Op.ChangeToRegister(VGPR,
false);
1191 TRI =
ST.getRegisterInfo();
1192 TII =
ST.getInstrInfo();
1197 bool Changed =
false;
1203 matchSDWAOperands(
MBB);
1204 for (
const auto &OperandPair : SDWAOperands) {
1205 const auto &Operand = OperandPair.second;
1208 (PotentialMI->
getOpcode() == AMDGPU::V_ADD_CO_U32_e64 ||
1209 PotentialMI->
getOpcode() == AMDGPU::V_SUB_CO_U32_e64))
1210 pseudoOpConvertToVOP2(*PotentialMI, ST);
1212 SDWAOperands.clear();
1215 matchSDWAOperands(
MBB);
1217 for (
const auto &OperandPair : SDWAOperands) {
1218 const auto &Operand = OperandPair.second;
1220 if (PotentialMI && isConvertibleToSDWA(*PotentialMI, ST)) {
1221 PotentialMatches[PotentialMI].push_back(Operand.get());
1225 for (
auto &PotentialPair : PotentialMatches) {
1227 convertToSDWA(PotentialMI, PotentialPair.second);
1230 PotentialMatches.clear();
1231 SDWAOperands.clear();
1233 Changed = !ConvertedInstructions.
empty();
1237 while (!ConvertedInstructions.
empty())
1238 legalizeScalarOperands(*ConvertedInstructions.
pop_back_val(), ST);
unsigned const MachineRegisterInfo * MRI
Provides AMDGPU specific target descriptions.
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
BlockVerifier::State From
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
unsigned const TargetRegisterInfo * TRI
This file implements a map that provides insertion order iteration.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static MachineOperand * findSingleRegDef(const MachineOperand *Reg, const MachineRegisterInfo *MRI)
static void copyRegOperand(MachineOperand &To, const MachineOperand &From)
static MachineOperand * findSingleRegUse(const MachineOperand *Reg, const MachineRegisterInfo *MRI)
static bool isSameReg(const MachineOperand &LHS, const MachineOperand &RHS)
static raw_ostream & operator<<(raw_ostream &OS, SdwaSel Sel)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
This class represents an Operation in the Expression.
FunctionPass class - This class is used to implement most global optimizations.
Describe properties that are true of each instruction in the target description file.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
MachineInstr * remove(MachineInstr *I)
Remove the unbundled instruction from the instruction list without deleting it.
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
@ LQR_Dead
Register is known to be fully dead.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
void substituteRegister(Register FromReg, Register ToReg, unsigned SubIdx, const TargetRegisterInfo &RegInfo)
Replace all occurrences of FromReg with ToReg:SubIdx, properly composing subreg indices where necessa...
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
iterator_range< mop_iterator > defs()
Returns a range over all explicit operands that are register definitions.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
This class implements a map that also provides access to all stored values in a deterministic order.
size_type count(const KeyT &Key) const
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Target - Wrapper for Target specific information.
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
LLVM_READONLY int getVOPe32(uint16_t Opcode)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
LLVM_READONLY int getSDWAOp(uint16_t Opcode)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Define
Register definition.
@ Kill
The last use of a register.
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createSIPeepholeSDWAPass()
void initializeSIPeepholeSDWAPass(PassRegistry &)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Description of the encoding of one expression Op.