45#define DEBUG_TYPE "rewrite-partial-reg-uses"
55 return "Rewrite Partial Register Uses";
86 unsigned SubReg = AMDGPU::NoSubRegister;
98 SubRegMap &SubRegs)
const;
117 unsigned RegNumBits,
unsigned CoverSubregIdx,
118 SubRegMap &SubRegs)
const;
123 SubRegMap &SubRegs)
const;
132 unsigned shiftSubReg(
unsigned SubReg,
unsigned RShift)
const;
136 unsigned getSubReg(
unsigned Offset,
unsigned Size)
const;
144 unsigned SubRegIdx)
const;
155 getAllocatableAndAlignedRegClassMask(
unsigned AlignNumBits)
const;
165unsigned GCNRewritePartialRegUses::getSubReg(
unsigned Offset,
166 unsigned Size)
const {
169 for (
unsigned Idx = 1, E =
TRI->getNumSubRegIndices();
Idx < E; ++
Idx) {
180unsigned GCNRewritePartialRegUses::shiftSubReg(
unsigned SubReg,
181 unsigned RShift)
const {
188 unsigned SubRegIdx)
const {
190 SuperRegMasks.try_emplace({RC, SubRegIdx},
nullptr);
193 if (RCI.getSubReg() == SubRegIdx) {
194 I->second = RCI.getMask();
202const BitVector &GCNRewritePartialRegUses::getAllocatableAndAlignedRegClassMask(
203 unsigned AlignNumBits)
const {
205 AllocatableAndAlignedRegClassMasks.try_emplace(AlignNumBits);
209 for (
unsigned ClassID = 0; ClassID <
TRI->getNumRegClasses(); ++ClassID) {
210 auto *RC =
TRI->getRegClass(ClassID);
219GCNRewritePartialRegUses::getRegClassWithShiftedSubregs(
221 unsigned CoverSubregIdx, SubRegMap &SubRegs)
const {
223 unsigned RCAlign =
TRI->getRegClassAlignmentNumBits(RC);
224 LLVM_DEBUG(
dbgs() <<
" Shift " << RShift <<
", reg align " << RCAlign
227 BitVector ClassMask(getAllocatableAndAlignedRegClassMask(RCAlign));
228 for (
auto &[OldSubReg, SRI] : SubRegs) {
229 auto &[SubRegRC, NewSubReg] = SRI;
233 <<
TRI->getRegClassName(SubRegRC)
234 << (SubRegRC->isAllocatable() ?
"" :
" not alloc")
237 if (OldSubReg == CoverSubregIdx) {
239 assert(SubRegRC->isAllocatable());
240 NewSubReg = AMDGPU::NoSubRegister;
243 NewSubReg = shiftSubReg(OldSubReg, RShift);
251 const uint32_t *
Mask = NewSubReg ? getSuperRegClassMask(SubRegRC, NewSubReg)
252 : SubRegRC->getSubClassMask();
256 ClassMask.clearBitsNotInMask(Mask);
259 LLVM_DEBUG(
dbgs() <<
", num regclasses " << ClassMask.count() <<
'\n');
269 unsigned MinNumBits = std::numeric_limits<unsigned>::max();
270 for (
unsigned ClassID : ClassMask.set_bits()) {
271 auto *RC =
TRI->getRegClass(ClassID);
272 unsigned NumBits =
TRI->getRegSizeInBits(*RC);
273 if (NumBits < MinNumBits && NumBits >= RegNumBits) {
274 MinNumBits = NumBits;
277 if (MinNumBits == RegNumBits)
283 for (
auto [
SubReg, SRI] : SubRegs)
285 assert(MinRC ==
TRI->getSubClassWithSubReg(MinRC, SRI.SubReg));
290 return (MinRC != RC || RShift != 0) ? MinRC :
nullptr;
295 SubRegMap &SubRegs)
const {
296 unsigned CoverSubreg = AMDGPU::NoSubRegister;
297 unsigned Offset = std::numeric_limits<unsigned>::max();
299 for (
auto [
SubReg, SRI] : SubRegs) {
300 unsigned SubRegOffset =
TRI->getSubRegIdxOffset(
SubReg);
301 unsigned SubRegEnd = SubRegOffset +
TRI->getSubRegIdxSize(
SubReg);
302 if (SubRegOffset <
Offset) {
304 CoverSubreg = AMDGPU::NoSubRegister;
306 if (SubRegEnd >
End) {
308 CoverSubreg = AMDGPU::NoSubRegister;
310 if (SubRegOffset ==
Offset && SubRegEnd ==
End)
315 if (CoverSubreg != AMDGPU::NoSubRegister)
316 return getRegClassWithShiftedSubregs(RC,
Offset,
End -
Offset, CoverSubreg,
322 unsigned MaxAlign = 0;
323 for (
auto [
SubReg, SRI] : SubRegs)
324 MaxAlign = std::max(MaxAlign,
TRI->getSubRegAlignmentNumBits(RC,
SubReg));
326 unsigned FirstMaxAlignedSubRegOffset = std::numeric_limits<unsigned>::max();
327 for (
auto [
SubReg, SRI] : SubRegs) {
328 if (
TRI->getSubRegAlignmentNumBits(RC,
SubReg) != MaxAlign)
330 FirstMaxAlignedSubRegOffset =
331 std::min(FirstMaxAlignedSubRegOffset,
TRI->getSubRegIdxOffset(
SubReg));
332 if (FirstMaxAlignedSubRegOffset ==
Offset)
336 unsigned NewOffsetOfMaxAlignedSubReg =
339 if (NewOffsetOfMaxAlignedSubReg > FirstMaxAlignedSubRegOffset)
342 unsigned RShift = FirstMaxAlignedSubRegOffset - NewOffsetOfMaxAlignedSubReg;
343 return getRegClassWithShiftedSubregs(RC, RShift,
End - RShift, 0, SubRegs);
348void GCNRewritePartialRegUses::updateLiveIntervals(
Register OldReg,
350 SubRegMap &SubRegs)
const {
351 if (!LIS->hasInterval(OldReg))
354 auto &OldLI = LIS->getInterval(OldReg);
355 auto &NewLI = LIS->createEmptyInterval(NewReg);
357 auto &
Allocator = LIS->getVNInfoAllocator();
358 NewLI.setWeight(OldLI.weight());
360 for (
auto &SR : OldLI.subranges()) {
362 return SR.LaneMask ==
TRI->getSubRegIndexLaneMask(
P.first);
365 if (
I == SubRegs.end()) {
384 LIS->removeInterval(OldReg);
385 LIS->removeInterval(NewReg);
386 LIS->createAndComputeVirtRegInterval(NewReg);
390 if (
unsigned NewSubReg =
I->second.SubReg)
391 NewLI.createSubRangeFrom(Allocator,
392 TRI->getSubRegIndexLaneMask(NewSubReg), SR);
394 NewLI.assign(SR, Allocator);
399 NewLI.assign(OldLI, Allocator);
401 LIS->removeInterval(OldReg);
405GCNRewritePartialRegUses::getOperandRegClass(
MachineOperand &MO)
const {
407 return TII->getRegClass(
TII->get(
MI->getOpcode()),
MI->getOperandNo(&MO),
TRI,
408 *
MI->getParent()->getParent());
411bool GCNRewritePartialRegUses::rewriteReg(
Register Reg)
const {
412 auto Range =
MRI->reg_nodbg_operands(Reg);
414 return MO.
getSubReg() == AMDGPU::NoSubRegister;
418 auto *RC =
MRI->getRegClass(Reg);
420 <<
':' <<
TRI->getRegClassName(RC) <<
'\n');
434 SubRegRC =
TRI->getSubRegisterClass(RC,
SubReg);
439 <<
TRI->getRegClassName(OpDescRC) <<
" = ");
440 SubRegRC =
TRI->getCommonSubClass(SubRegRC, OpDescRC);
451 auto *NewRC = getMinSizeReg(RC, SubRegs);
457 Register NewReg =
MRI->createVirtualRegister(NewRC);
459 <<
TRI->getRegClassName(RC) <<
" -> "
461 <<
TRI->getRegClassName(NewRC) <<
'\n');
476 updateLiveIntervals(Reg, NewReg, SubRegs);
481bool GCNRewritePartialRegUses::runOnMachineFunction(
MachineFunction &MF) {
485 auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
486 LIS = LISWrapper ? &LISWrapper->getLIS() :
nullptr;
487 bool Changed =
false;
488 for (
size_t I = 0, E =
MRI->getNumVirtRegs();
I < E; ++
I) {
494char GCNRewritePartialRegUses::ID;
499 "Rewrite Partial Register Uses",
false,
false)
unsigned const MachineRegisterInfo * MRI
Provides AMDGPU specific target descriptions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Rewrite Partial Register Uses
const HexagonInstrInfo * TII
unsigned const TargetRegisterInfo * TRI
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
Represent the analysis usage information of a pass.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setIsUndef(bool Val=true)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
StringRef - Represent a constant reference to a string, i.e.
bool isValid() const
Returns true if this iterator is still pointing at a valid entry.
TargetInstrInfo - Interface to description of machine instruction set.
bool isAllocatable() const
Return true if this register class may be used to create virtual registers.
virtual const TargetInstrInfo * getInstrInfo() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
char & GCNRewritePartialRegUsesID
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.