35#define DEBUG_TYPE "amdgpu-regbanklegalize"
44template <
typename SrcTy>
46m_GAMDGPUReadAnyLane(
const SrcTy &Src) {
60 return "AMDGPU Register Bank Legalize";
81 "AMDGPU Register Bank Legalize",
false,
false)
89char AMDGPURegBankLegalize::
ID = 0;
94 return new AMDGPURegBankLegalize();
99 static std::mutex GlobalMutex;
102 std::lock_guard<std::mutex> Lock(GlobalMutex);
103 auto [It, Inserted] = CacheForRuleSet.
try_emplace(ST.getGeneration());
105 It->second = std::make_unique<RegBankLegalizeRules>(ST, MRI);
107 It->second->refreshRefs(ST, MRI);
127 : B(B), MRI(*B.getMRI()), TRI(TRI),
128 SgprRB(&RBI.getRegBank(
AMDGPU::SGPRRegBankID)),
129 VgprRB(&RBI.getRegBank(
AMDGPU::VGPRRegBankID)),
130 VccRB(&RBI.getRegBank(
AMDGPU::VCCRegBankID)) {};
133 std::pair<MachineInstr *, Register>
tryMatch(
Register Src,
unsigned Opcode);
146 if (RB && RB->
getID() == AMDGPU::VCCRegBankID)
150 return RC && TRI.isSGPRClass(RC) && MRI.getType(Reg) ==
LLT::scalar(1);
153std::pair<MachineInstr *, Register>
166 if (!UnMerge || UnMerge->getNumDefs() != DefRegs.
size())
168 for (
unsigned I = 1;
I < DefRegs.
size(); ++
I) {
169 if (UnMerge->getReg(
I) != DefRegs[
I])
172 return UnMerge->getSourceReg();
180 for (
unsigned i = 0; i <
Merge->getNumSources(); ++i) {
183 m_GAMDGPUReadAnyLane(
m_Reg(Src))))
187 return ReadAnyLaneSrcs;
220 if (ReadAnyLaneSrcs.
empty())
228 return ReadAnyLaneSrcs;
238 int Idx = UnMerge->findRegisterDefOperandIdx(Src,
nullptr);
240 if (!
Merge || UnMerge->getNumDefs() !=
Merge->getNumSources())
244 if (MRI.getType(Src) != MRI.getType(SrcRegIdx))
247 auto [RALEl, RALElSrc] =
tryMatch(SrcRegIdx, AMDGPU::G_AMDGPU_READANYLANE);
257 MRI.replaceRegWith(Dst, Src);
259 B.buildCopy(Dst, Src);
264 Register Dst = Copy.getOperand(0).getReg();
265 Register Src = Copy.getOperand(1).getReg();
268 if (Dst.isVirtual() ? (MRI.getRegBankOrNull(Dst) != VgprRB)
269 : !TRI.isVGPR(MRI, Dst))
273 if (!Src.isVirtual() || MRI.getRegClassOrNull(Src))
278 if (SrcMI.
getOpcode() == AMDGPU::G_BITCAST)
281 B.setInstrAndDebugLoc(Copy);
283 if (ReadAnyLaneSrcRegs.
empty())
287 if (ReadAnyLaneSrcRegs.
size() == 1) {
288 ReadAnyLaneSrc = ReadAnyLaneSrcRegs[0];
291 auto Merge = B.buildMergeLikeInstr({VgprRB, MRI.getType(RALDst)},
293 ReadAnyLaneSrc =
Merge.getReg(0);
296 if (SrcMI.
getOpcode() != AMDGPU::G_BITCAST) {
309 auto Bitcast = B.buildBitcast({VgprRB, MRI.getType(Src)}, ReadAnyLaneSrc);
324 if (!Dst.isVirtual() || !Src.isVirtual())
334 if (
isLaneMask(Dst) && MRI.getRegBankOrNull(Src) == SgprRB) {
335 auto [Trunc, TruncS32Src] =
tryMatch(Src, AMDGPU::G_TRUNC);
336 assert(Trunc && MRI.getType(TruncS32Src) == S32 &&
337 "sgpr S1 must be result of G_TRUNC of sgpr S32");
341 auto One = B.buildConstant({SgprRB, S32}, 1);
342 auto BoolSrc = B.buildAnd({SgprRB, S32}, TruncS32Src, One);
343 B.buildInstr(AMDGPU::G_AMDGPU_COPY_VCC_SCC, {Dst}, {BoolSrc});
355 if (MRI.getType(Src) != S1)
358 auto [Trunc, TruncSrc] =
tryMatch(Src, AMDGPU::G_TRUNC);
362 LLT DstTy = MRI.getType(Dst);
363 LLT TruncSrcTy = MRI.getType(TruncSrc);
365 if (DstTy == TruncSrcTy) {
366 MRI.replaceRegWith(Dst, TruncSrc);
373 if (DstTy == S32 && TruncSrcTy == S64) {
374 auto Unmerge = B.buildUnmerge({SgprRB, S32}, TruncSrc);
375 MRI.replaceRegWith(Dst, Unmerge.getReg(0));
380 if (DstTy == S64 && TruncSrcTy == S32) {
381 B.buildMergeLikeInstr(
MI.getOperand(0).getReg(),
382 {TruncSrc, B.buildUndef({SgprRB, S32})});
387 if (DstTy ==
S32 && TruncSrcTy ==
S16) {
388 B.buildAnyExt(Dst, TruncSrc);
393 if (DstTy ==
S16 && TruncSrcTy ==
S32) {
394 B.buildTrunc(Dst, TruncSrc);
411 if (RB && RB->
getID() == AMDGPU::SGPRRegBankID) {
426 const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
427 GISelCSEAnalysisWrapper &
Wrapper =
428 getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
430 GISelObserverWrapper Observer;
434 B.setCSEInfo(&CSEInfo);
435 B.setChangeObserver(Observer);
437 RAIIDelegateInstaller DelegateInstaller(MF, &Observer);
438 RAIIMFObserverInstaller MFObserverInstaller(MF, Observer);
442 const RegisterBankInfo &RBI = *
ST.getRegBankInfo();
444 getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo();
445 GISelValueTracking &VT =
446 getAnalysis<GISelValueTrackingAnalysisLegacy>().get(MF);
449 const RegBankLegalizeRules &RBLRules =
getRules(ST, MRI);
452 RegBankLegalizeHelper RBLHelper(
B, MUI, &VT, RBI, RBLRules);
456 for (MachineBasicBlock &
MBB : MF) {
457 for (MachineInstr &
MI :
MBB) {
462 for (MachineInstr *
MI : AllInst) {
463 if (!
MI->isPreISelOpcode())
466 if (!RBLHelper.findRuleAndApplyMapping(*
MI))
493 AMDGPURegBankLegalizeCombiner Combiner(
B, *
ST.getRegisterInfo(), RBI);
495 for (MachineBasicBlock &
MBB : MF) {
497 if (
MI.getOpcode() == AMDGPU::COPY) {
498 Combiner.tryCombineCopy(
MI);
501 if (
MI.getOpcode() == AMDGPU::G_ANYEXT) {
502 Combiner.tryCombineS1AnyExt(
MI);
509 "Registers with sgpr reg bank and S1 LLT are not legal after "
510 "AMDGPURegBankLegalize. Should lower to sgpr S32");
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
static Register getAnySgprS1(const MachineRegisterInfo &MRI)
const RegBankLegalizeRules & getRules(const GCNSubtarget &ST, MachineRegisterInfo &MRI)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Provides analysis for continuously CSEing during GISel passes.
This file implements a version of MachineIRBuilder which CSEs insts within a MachineBasicBlock.
AMD GCN specific subclass of TargetSubtarget.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
Contains matchers for matching SSA Machine Instructions.
Promote Memory to Register
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
Target-Independent Code Generator Pass Configuration Options pass.
Register tryMatchUnmergeDefs(SmallVectorImpl< Register > &DefRegs)
void replaceRegWithOrBuildCopy(Register Dst, Register Src)
AMDGPURegBankLegalizeCombiner(MachineIRBuilder &B, const SIRegisterInfo &TRI, const RegisterBankInfo &RBI)
bool isLaneMask(Register Reg)
void tryCombineS1AnyExt(MachineInstr &MI)
std::pair< MachineInstr *, Register > tryMatch(Register Src, unsigned Opcode)
SmallVector< Register > tryMatchMergeReadAnyLane(GMergeLikeInstr *Merge)
void tryCombineCopy(MachineInstr &MI)
bool tryEliminateReadAnyLane(MachineInstr &Copy)
SmallVector< Register > getReadAnyLaneSrcs(Register Src)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
FunctionPass class - This class is used to implement most global optimizations.
The actual analysis pass wrapper.
void addObserver(GISelChangeObserver *O)
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelValueTrackingInfoAnal...
Represents G_BUILD_VECTOR, G_CONCAT_VECTORS or G_MERGE_VALUES.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineFunctionProperties & getProperties() const
Get the function properties.
Helper class to build MachineInstr.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
bool def_empty(Register RegNo) const
def_empty - Return true if there are no instructions defining the specified register (it may be live-...
const RegisterBank * getRegBankOrNull(Register Reg) const
Return the register bank of Reg, or null if Reg has not been assigned a register bank or has been ass...
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
Holds all the information related to register banks.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
Target-Independent Code Generator Pass Configuration Options.
virtual std::unique_ptr< CSEConfigBase > getCSEConfig() const
Returns the CSEConfig object to use for the current optimization level.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
operand_type_match m_Reg()
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
This is an optimization pass for GlobalISel generic memory operations.
GenericUniformityInfo< MachineSSAContext > MachineUniformityInfo
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
FunctionPass * createAMDGPURegBankLegalizePass()
LLVM_ABI void eraseInstr(MachineInstr &MI, MachineRegisterInfo &MRI, LostDebugLocObserver *LocObserver=nullptr)
char & AMDGPURegBankLegalizeID