34#define DEBUG_TYPE "amdgpu-regbanklegalize"
43template <
typename SrcTy>
45m_GAMDGPUReadAnyLane(
const SrcTy &Src) {
59 return "AMDGPU Register Bank Legalize";
79 "AMDGPU Register Bank Legalize",
false,
false)
86char AMDGPURegBankLegalize::
ID = 0;
91 return new AMDGPURegBankLegalize();
96 static std::mutex GlobalMutex;
99 std::lock_guard<std::mutex> Lock(GlobalMutex);
100 auto [It, Inserted] = CacheForRuleSet.
try_emplace(ST.getGeneration());
102 It->second = std::make_unique<RegBankLegalizeRules>(ST,
MRI);
104 It->second->refreshRefs(ST,
MRI);
124 : B(B), MRI(*B.getMRI()), TRI(TRI),
125 SgprRB(&RBI.getRegBank(
AMDGPU::SGPRRegBankID)),
126 VgprRB(&RBI.getRegBank(
AMDGPU::VGPRRegBankID)),
127 VccRB(&RBI.getRegBank(
AMDGPU::VCCRegBankID)) {};
130 std::pair<MachineInstr *, Register>
tryMatch(
Register Src,
unsigned Opcode);
142 if (RB && RB->
getID() == AMDGPU::VCCRegBankID)
146 return RC && TRI.isSGPRClass(RC) && MRI.getType(Reg) ==
LLT::scalar(1);
149std::pair<MachineInstr *, Register>
157std::pair<GUnmerge *, int>
160 if (ReadAnyLane->
getOpcode() != AMDGPU::G_AMDGPU_READANYLANE)
161 return {
nullptr, -1};
165 return {UnMerge, UnMerge->findRegisterDefOperandIdx(RALSrc,
nullptr)};
167 return {
nullptr, -1};
198 unsigned NumElts =
Merge->getNumSources();
200 if (!Unmerge || Unmerge->getNumDefs() != NumElts || Idx != 0)
204 for (
unsigned i = 1; i < NumElts; ++i) {
206 if (UnmergeI != Unmerge || (
unsigned)IdxI != i)
209 return Unmerge->getSourceReg();
219 int Idx = UnMerge->findRegisterDefOperandIdx(Src,
nullptr);
221 if (!
Merge || UnMerge->getNumDefs() !=
Merge->getNumSources())
225 if (MRI.getType(Src) != MRI.getType(SrcRegIdx))
228 auto [RALEl, RALElSrc] =
tryMatch(SrcRegIdx, AMDGPU::G_AMDGPU_READANYLANE);
238 MRI.replaceRegWith(Dst, Src);
240 B.buildCopy(Dst, Src);
245 Register Dst = Copy.getOperand(0).getReg();
246 Register Src = Copy.getOperand(1).getReg();
249 if (Dst.isVirtual() ? (MRI.getRegBankOrNull(Dst) != VgprRB)
250 : !TRI.isVGPR(MRI, Dst))
254 if (!Src.isVirtual() || MRI.getRegClassOrNull(Src))
259 if (SrcMI.
getOpcode() == AMDGPU::G_BITCAST)
267 if (SrcMI.
getOpcode() != AMDGPU::G_BITCAST) {
280 auto Bitcast = B.buildBitcast({VgprRB, MRI.getType(Src)}, RALSrc);
295 if (!Dst.isVirtual() || !Src.isVirtual())
305 if (
isLaneMask(Dst) && MRI.getRegBankOrNull(Src) == SgprRB) {
306 auto [Trunc, TruncS32Src] =
tryMatch(Src, AMDGPU::G_TRUNC);
307 assert(Trunc && MRI.getType(TruncS32Src) == S32 &&
308 "sgpr S1 must be result of G_TRUNC of sgpr S32");
312 auto One = B.buildConstant({SgprRB, S32}, 1);
313 auto BoolSrc = B.buildAnd({SgprRB, S32}, TruncS32Src, One);
314 B.buildInstr(AMDGPU::G_AMDGPU_COPY_VCC_SCC, {Dst}, {BoolSrc});
326 if (MRI.getType(Src) != S1)
329 auto [Trunc, TruncSrc] =
tryMatch(Src, AMDGPU::G_TRUNC);
333 LLT DstTy = MRI.getType(Dst);
334 LLT TruncSrcTy = MRI.getType(TruncSrc);
336 if (DstTy == TruncSrcTy) {
337 MRI.replaceRegWith(Dst, TruncSrc);
344 if (DstTy == S32 && TruncSrcTy == S64) {
345 auto Unmerge = B.buildUnmerge({SgprRB, S32}, TruncSrc);
346 MRI.replaceRegWith(Dst, Unmerge.getReg(0));
351 if (DstTy == S64 && TruncSrcTy == S32) {
352 B.buildMergeLikeInstr(
MI.getOperand(0).getReg(),
353 {TruncSrc, B.buildUndef({SgprRB, S32})});
358 if (DstTy ==
S32 && TruncSrcTy ==
S16) {
359 B.buildAnyExt(Dst, TruncSrc);
364 if (DstTy ==
S16 && TruncSrcTy ==
S32) {
365 B.buildTrunc(Dst, TruncSrc);
376 for (
unsigned i = 0; i <
MRI.getNumVirtRegs(); ++i) {
382 if (RB && RB->
getID() == AMDGPU::SGPRRegBankID) {
397 const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
398 GISelCSEAnalysisWrapper &
Wrapper =
399 getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
401 GISelObserverWrapper Observer;
405 B.setCSEInfo(&CSEInfo);
406 B.setChangeObserver(Observer);
408 RAIIDelegateInstaller DelegateInstaller(MF, &Observer);
409 RAIIMFObserverInstaller MFObserverInstaller(MF, Observer);
413 const RegisterBankInfo &RBI = *
ST.getRegBankInfo();
415 getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo();
418 const RegBankLegalizeRules &RBLRules =
getRules(ST,
MRI);
421 RegBankLegalizeHelper RBLHelper(
B, MUI, RBI, RBLRules);
425 for (MachineBasicBlock &
MBB : MF) {
426 for (MachineInstr &
MI :
MBB) {
431 for (MachineInstr *
MI : AllInst) {
432 if (!
MI->isPreISelOpcode())
435 unsigned Opc =
MI->getOpcode();
437 if (
Opc == AMDGPU::G_PHI) {
438 RBLHelper.applyMappingPHI(*
MI);
444 if (
Opc == AMDGPU::G_BUILD_VECTOR ||
Opc == AMDGPU::G_UNMERGE_VALUES ||
445 Opc == AMDGPU::G_MERGE_VALUES ||
Opc == AMDGPU::G_BITCAST) {
446 RBLHelper.applyMappingTrivial(*
MI);
451 if (
Opc == G_FREEZE &&
453 RBLHelper.applyMappingTrivial(*
MI);
457 if ((
Opc == AMDGPU::G_CONSTANT ||
Opc == AMDGPU::G_FCONSTANT ||
458 Opc == AMDGPU::G_IMPLICIT_DEF)) {
462 assert(
MRI.getRegBank(Dst)->getID() == AMDGPU::SGPRRegBankID);
469 RBLHelper.findRuleAndApplyMapping(*
MI);
495 AMDGPURegBankLegalizeCombiner Combiner(
B, *
ST.getRegisterInfo(), RBI);
497 for (MachineBasicBlock &
MBB : MF) {
499 if (
MI.getOpcode() == AMDGPU::COPY) {
500 Combiner.tryCombineCopy(
MI);
503 if (
MI.getOpcode() == AMDGPU::G_ANYEXT) {
504 Combiner.tryCombineS1AnyExt(
MI);
511 "Registers with sgpr reg bank and S1 LLT are not legal after "
512 "AMDGPURegBankLegalize. Should lower to sgpr S32");
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
static Register getAnySgprS1(const MachineRegisterInfo &MRI)
const RegBankLegalizeRules & getRules(const GCNSubtarget &ST, MachineRegisterInfo &MRI)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Provides analysis for continuously CSEing during GISel passes.
This file implements a version of MachineIRBuilder which CSEs insts within a MachineBasicBlock.
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
Contains matchers for matching SSA Machine Instructions.
Promote Memory to Register
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
Target-Independent Code Generator Pass Configuration Options pass.
std::pair< GUnmerge *, int > tryMatchRALFromUnmerge(Register Src)
void replaceRegWithOrBuildCopy(Register Dst, Register Src)
AMDGPURegBankLegalizeCombiner(MachineIRBuilder &B, const SIRegisterInfo &TRI, const RegisterBankInfo &RBI)
bool isLaneMask(Register Reg)
void tryCombineS1AnyExt(MachineInstr &MI)
std::pair< MachineInstr *, Register > tryMatch(Register Src, unsigned Opcode)
Register getReadAnyLaneSrc(Register Src)
void tryCombineCopy(MachineInstr &MI)
bool tryEliminateReadAnyLane(MachineInstr &Copy)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
FunctionPass class - This class is used to implement most global optimizations.
The actual analysis pass wrapper.
void addObserver(GISelChangeObserver *O)
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineFunctionProperties & getProperties() const
Get the function properties.
Helper class to build MachineInstr.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Holds all the information related to register banks.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
void push_back(const T &Elt)
StringRef - Represent a constant reference to a string, i.e.
Target-Independent Code Generator Pass Configuration Options.
virtual std::unique_ptr< CSEConfigBase > getCSEConfig() const
Returns the CSEConfig object to use for the current optimization level.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
operand_type_match m_Reg()
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
This is an optimization pass for GlobalISel generic memory operations.
GenericUniformityInfo< MachineSSAContext > MachineUniformityInfo
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
FunctionPass * createAMDGPURegBankLegalizePass()
LLVM_ABI void eraseInstr(MachineInstr &MI, MachineRegisterInfo &MRI, LostDebugLocObserver *LocObserver=nullptr)
char & AMDGPURegBankLegalizeID