44#define DEBUG_TYPE "rewrite-partial-reg-uses" 
   48class GCNRewritePartialRegUsesImpl {
 
   67                                           SubRegMap &SubRegs) 
const;
 
   84                                unsigned CoverSubregIdx,
 
   85                                SubRegMap &SubRegs) 
const;
 
   90                           SubRegMap &SubRegs) 
const;
 
   96  unsigned shiftSubReg(
unsigned SubReg, 
unsigned RShift) 
const;
 
  100  unsigned getSubReg(
unsigned Offset, 
unsigned Size) 
const;
 
  108                                       unsigned SubRegIdx) 
const;
 
  119  getAllocatableAndAlignedRegClassMask(
unsigned AlignNumBits) 
const;
 
  136    return "Rewrite Partial Register Uses";
 
  152unsigned GCNRewritePartialRegUsesImpl::getSubReg(
unsigned Offset,
 
  153                                                 unsigned Size)
 const {
 
  156    for (
unsigned Idx = 1, 
E = 
TRI->getNumSubRegIndices(); Idx < 
E; ++Idx) {
 
  157      if (
TRI->getSubRegIdxOffset(Idx) == 
Offset &&
 
  158          TRI->getSubRegIdxSize(Idx) == 
Size) {
 
  167unsigned GCNRewritePartialRegUsesImpl::shiftSubReg(
unsigned SubReg,
 
  168                                                   unsigned RShift)
 const {
 
  173const uint32_t *GCNRewritePartialRegUsesImpl::getSuperRegClassMask(
 
  174    const TargetRegisterClass *RC, 
unsigned SubRegIdx)
 const {
 
  176      SuperRegMasks.try_emplace({RC, SubRegIdx}, 
nullptr);
 
  178    for (SuperRegClassIterator RCI(RC, 
TRI); RCI.isValid(); ++RCI) {
 
  179      if (RCI.getSubReg() == SubRegIdx) {
 
  180        I->second = RCI.getMask();
 
  189GCNRewritePartialRegUsesImpl::getAllocatableAndAlignedRegClassMask(
 
  190    unsigned AlignNumBits)
 const {
 
  192      AllocatableAndAlignedRegClassMasks.try_emplace(AlignNumBits);
 
  194    BitVector &BV = 
I->second;
 
  196    for (
unsigned ClassID = 0; ClassID < 
TRI->getNumRegClasses(); ++ClassID) {
 
  197      auto *RC = 
TRI->getRegClass(ClassID);
 
  205const TargetRegisterClass *
 
  206GCNRewritePartialRegUsesImpl::getRegClassWithShiftedSubregs(
 
  207    const TargetRegisterClass *RC, 
unsigned RShift, 
unsigned CoverSubregIdx,
 
  208    SubRegMap &SubRegs)
 const {
 
  210  unsigned RCAlign = 
TRI->getRegClassAlignmentNumBits(RC);
 
  211  LLVM_DEBUG(
dbgs() << 
"  Shift " << RShift << 
", reg align " << RCAlign
 
  214  BitVector ClassMask(getAllocatableAndAlignedRegClassMask(RCAlign));
 
  215  for (
auto &[OldSubReg, NewSubReg] : SubRegs) {
 
  218    auto *SubRegRC = 
TRI->getSubRegisterClass(RC, OldSubReg);
 
  224                      << (SubRegRC->isAllocatable() ? 
"" : 
" not alloc")
 
  227    if (OldSubReg == CoverSubregIdx) {
 
  229      assert(SubRegRC->isAllocatable());
 
  230      NewSubReg = AMDGPU::NoSubRegister;
 
  233      NewSubReg = shiftSubReg(OldSubReg, RShift);
 
  241    const uint32_t *
Mask = NewSubReg ? getSuperRegClassMask(SubRegRC, NewSubReg)
 
  242                                     : SubRegRC->getSubClassMask();
 
  246    ClassMask.clearBitsNotInMask(Mask);
 
  249    LLVM_DEBUG(
dbgs() << 
", num regclasses " << ClassMask.count() << 
'\n');
 
  256  const TargetRegisterClass *MinRC = 
nullptr;
 
  257  unsigned MinNumBits = std::numeric_limits<unsigned>::max();
 
  258  for (
unsigned ClassID : ClassMask.set_bits()) {
 
  259    auto *RC = 
TRI->getRegClass(ClassID);
 
  260    unsigned NumBits = 
TRI->getRegSizeInBits(*RC);
 
  261    if (NumBits < MinNumBits) {
 
  262      MinNumBits = NumBits;
 
  269    for (
auto [OldSubReg, NewSubReg] : SubRegs)
 
  271      assert(MinRC == 
TRI->getSubClassWithSubReg(MinRC, NewSubReg));
 
  276  return (MinRC != RC || RShift != 0) ? MinRC : 
nullptr;
 
  279const TargetRegisterClass *
 
  280GCNRewritePartialRegUsesImpl::getMinSizeReg(
const TargetRegisterClass *RC,
 
  281                                            SubRegMap &SubRegs)
 const {
 
  282  unsigned CoverSubreg = AMDGPU::NoSubRegister;
 
  283  unsigned Offset = std::numeric_limits<unsigned>::max();
 
  285  for (
auto [
SubReg, SRI] : SubRegs) {
 
  286    unsigned SubRegOffset = 
TRI->getSubRegIdxOffset(
SubReg);
 
  287    unsigned SubRegEnd = SubRegOffset + 
TRI->getSubRegIdxSize(
SubReg);
 
  288    if (SubRegOffset < 
Offset) {
 
  290      CoverSubreg = AMDGPU::NoSubRegister;
 
  292    if (SubRegEnd > End) {
 
  294      CoverSubreg = AMDGPU::NoSubRegister;
 
  296    if (SubRegOffset == 
Offset && SubRegEnd == End)
 
  301  if (CoverSubreg != AMDGPU::NoSubRegister)
 
  302    return getRegClassWithShiftedSubregs(RC, 
Offset, CoverSubreg, SubRegs);
 
  307  unsigned MaxAlign = 0;
 
  308  for (
auto [
SubReg, SRI] : SubRegs)
 
  309    MaxAlign = std::max(MaxAlign, 
TRI->getSubRegAlignmentNumBits(RC, 
SubReg));
 
  311  unsigned FirstMaxAlignedSubRegOffset = std::numeric_limits<unsigned>::max();
 
  312  for (
auto [
SubReg, SRI] : SubRegs) {
 
  313    if (
TRI->getSubRegAlignmentNumBits(RC, 
SubReg) != MaxAlign)
 
  315    FirstMaxAlignedSubRegOffset =
 
  316        std::min(FirstMaxAlignedSubRegOffset, 
TRI->getSubRegIdxOffset(
SubReg));
 
  317    if (FirstMaxAlignedSubRegOffset == 
Offset)
 
  321  unsigned NewOffsetOfMaxAlignedSubReg =
 
  324  if (NewOffsetOfMaxAlignedSubReg > FirstMaxAlignedSubRegOffset)
 
  327  unsigned RShift = FirstMaxAlignedSubRegOffset - NewOffsetOfMaxAlignedSubReg;
 
  328  return getRegClassWithShiftedSubregs(RC, RShift, 0, SubRegs);
 
  333void GCNRewritePartialRegUsesImpl::updateLiveIntervals(
 
  342  NewLI.setWeight(OldLI.weight());
 
  344  for (
auto &SR : OldLI.subranges()) {
 
  346      return SR.LaneMask == 
TRI->getSubRegIndexLaneMask(
P.first);
 
  349    if (
I == SubRegs.end()) {
 
  374    if (
unsigned NewSubReg = 
I->second)
 
  376                               TRI->getSubRegIndexLaneMask(NewSubReg), SR);
 
  388bool GCNRewritePartialRegUsesImpl::rewriteReg(
Register Reg)
 const {
 
  392  for (MachineOperand &MO : 
MRI->reg_nodbg_operands(
Reg)) {
 
  393    if (MO.getSubReg() == AMDGPU::NoSubRegister)
 
  395    SubRegs.try_emplace(MO.getSubReg());
 
  401  auto *RC = 
MRI->getRegClass(
Reg);
 
  403                    << 
':' << 
TRI->getRegClassName(RC) << 
'\n');
 
  405  auto *NewRC = getMinSizeReg(RC, SubRegs);
 
  411  Register NewReg = 
MRI->createVirtualRegister(NewRC);
 
  413                    << 
TRI->getRegClassName(RC) << 
" -> " 
  415                    << 
TRI->getRegClassName(NewRC) << 
'\n');
 
  421    if (MO.isDebug() && MO.getSubReg() == 0)
 
  423    unsigned NewSubReg = SubRegs[MO.getSubReg()];
 
  424    MO.setSubReg(NewSubReg);
 
  425    if (NewSubReg == AMDGPU::NoSubRegister && MO.isDef())
 
  426      MO.setIsUndef(
false);
 
  430    updateLiveIntervals(
Reg, NewReg, SubRegs);
 
  435bool GCNRewritePartialRegUsesImpl::run(MachineFunction &MF) {
 
  437  TRI = 
static_cast<const SIRegisterInfo *
>(
MRI->getTargetRegisterInfo());
 
  440  for (
size_t I = 0, 
E = 
MRI->getNumVirtRegs(); 
I < 
E; ++
I) {
 
  441    Changed |= rewriteReg(Register::index2VirtReg(
I));
 
  446bool GCNRewritePartialRegUsesLegacy::runOnMachineFunction(MachineFunction &MF) {
 
  447  LiveIntervalsWrapperPass *LISWrapper =
 
  448      getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
 
  449  LiveIntervals *LIS = LISWrapper ? &LISWrapper->
getLIS() : 
nullptr;
 
  450  GCNRewritePartialRegUsesImpl Impl(LIS);
 
  458  if (!GCNRewritePartialRegUsesImpl(LIS).
run(MF))
 
 
  468char GCNRewritePartialRegUsesLegacy::ID;
 
  473                      "Rewrite Partial Register Uses", 
false, 
false)
 
unsigned const MachineRegisterInfo * MRI
 
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
 
Provides AMDGPU specific target descriptions.
 
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
 
const HexagonInstrInfo * TII
 
Register const TargetRegisterInfo * TRI
 
Promote Memory to Register
 
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
 
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
 
Interface definition for SIRegisterInfo.
 
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
 
Represent the analysis usage information of a pass.
 
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
 
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
 
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
 
Represents analyses that only rely on functions' control flow.
 
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
 
bool hasInterval(Register Reg) const
 
VNInfo::Allocator & getVNInfoAllocator()
 
LiveInterval & getInterval(Register Reg)
 
void removeInterval(Register Reg)
Interval removal.
 
LiveInterval & createEmptyInterval(Register Reg)
Interval creation.
 
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
 
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
 
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
 
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
 
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
 
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
 
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
 
Wrapper class representing virtual and physical registers.
 
StringRef - Represent a constant reference to a string, i.e.
 
TargetInstrInfo - Interface to description of machine instruction set.
 
bool isAllocatable() const
Return true if this register class may be used to create virtual registers.
 
virtual const TargetInstrInfo * getInstrInfo() const
 
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
 
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
 
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
 
This is an optimization pass for GlobalISel generic memory operations.
 
char & GCNRewritePartialRegUsesID
 
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
 
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
 
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
 
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
 
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
 
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
 
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.