45#define DEBUG_TYPE "rewrite-partial-reg-uses"
55 return "Rewrite Partial Register Uses";
86 unsigned SubReg = AMDGPU::NoSubRegister;
98 SubRegMap &SubRegs)
const;
118 unsigned RegNumBits,
unsigned CoverSubregIdx,
119 SubRegMap &SubRegs)
const;
124 SubRegMap &SubRegs)
const;
133 unsigned shiftSubReg(
unsigned SubReg,
unsigned RShift)
const;
137 unsigned getSubReg(
unsigned Offset,
unsigned Size)
const;
145 unsigned SubRegIdx)
const;
156 getAllocatableAndAlignedRegClassMask(
unsigned AlignNumBits)
const;
166unsigned GCNRewritePartialRegUses::getSubReg(
unsigned Offset,
167 unsigned Size)
const {
170 for (
unsigned Idx = 1,
E =
TRI->getNumSubRegIndices();
Idx <
E; ++
Idx) {
181unsigned GCNRewritePartialRegUses::shiftSubReg(
unsigned SubReg,
182 unsigned RShift)
const {
189 unsigned SubRegIdx)
const {
191 SuperRegMasks.try_emplace({RC, SubRegIdx},
nullptr);
194 if (RCI.getSubReg() == SubRegIdx) {
195 I->second = RCI.getMask();
203const BitVector &GCNRewritePartialRegUses::getAllocatableAndAlignedRegClassMask(
204 unsigned AlignNumBits)
const {
206 AllocatableAndAlignedRegClassMasks.try_emplace(AlignNumBits);
210 for (
unsigned ClassID = 0; ClassID <
TRI->getNumRegClasses(); ++ClassID) {
211 auto *RC =
TRI->getRegClass(ClassID);
220GCNRewritePartialRegUses::getRegClassWithShiftedSubregs(
222 unsigned CoverSubregIdx, SubRegMap &SubRegs)
const {
224 unsigned RCAlign =
TRI->getRegClassAlignmentNumBits(RC);
225 LLVM_DEBUG(
dbgs() <<
" Shift " << RShift <<
", reg align " << RCAlign
228 BitVector ClassMask(getAllocatableAndAlignedRegClassMask(RCAlign));
229 for (
auto &[OldSubReg, SRI] : SubRegs) {
230 auto &[SubRegRC, NewSubReg] = SRI;
240 SubRegRC =
TRI->getSubRegisterClass(RC, OldSubReg);
246 <<
TRI->getRegClassName(SubRegRC)
247 << (SubRegRC->isAllocatable() ?
"" :
" not alloc")
250 if (OldSubReg == CoverSubregIdx) {
251 NewSubReg = AMDGPU::NoSubRegister;
254 NewSubReg = shiftSubReg(OldSubReg, RShift);
262 const uint32_t *
Mask = NewSubReg ? getSuperRegClassMask(SubRegRC, NewSubReg)
263 : SubRegRC->getSubClassMask();
267 ClassMask.clearBitsNotInMask(Mask);
270 LLVM_DEBUG(
dbgs() <<
", num regclasses " << ClassMask.count() <<
'\n');
280 unsigned MinNumBits = std::numeric_limits<unsigned>::max();
281 for (
unsigned ClassID : ClassMask.set_bits()) {
282 auto *RC =
TRI->getRegClass(ClassID);
283 unsigned NumBits =
TRI->getRegSizeInBits(*RC);
284 if (NumBits < MinNumBits && NumBits >= RegNumBits) {
285 MinNumBits = NumBits;
288 if (MinNumBits == RegNumBits)
294 for (
auto [
SubReg, SRI] : SubRegs)
296 assert(MinRC ==
TRI->getSubClassWithSubReg(MinRC, SRI.SubReg));
301 return (MinRC != RC || RShift != 0) ? MinRC :
nullptr;
306 SubRegMap &SubRegs)
const {
307 unsigned CoverSubreg = AMDGPU::NoSubRegister;
308 unsigned Offset = std::numeric_limits<unsigned>::max();
310 for (
auto [
SubReg, SRI] : SubRegs) {
311 unsigned SubRegOffset =
TRI->getSubRegIdxOffset(
SubReg);
312 unsigned SubRegEnd = SubRegOffset +
TRI->getSubRegIdxSize(
SubReg);
313 if (SubRegOffset <
Offset) {
315 CoverSubreg = AMDGPU::NoSubRegister;
317 if (SubRegEnd >
End) {
319 CoverSubreg = AMDGPU::NoSubRegister;
321 if (SubRegOffset ==
Offset && SubRegEnd ==
End)
326 if (CoverSubreg != AMDGPU::NoSubRegister)
327 return getRegClassWithShiftedSubregs(RC,
Offset,
End -
Offset, CoverSubreg,
333 unsigned MaxAlign = 0;
334 for (
auto [
SubReg, SRI] : SubRegs)
335 MaxAlign = std::max(MaxAlign,
TRI->getSubRegAlignmentNumBits(RC,
SubReg));
337 unsigned FirstMaxAlignedSubRegOffset = std::numeric_limits<unsigned>::max();
338 for (
auto [
SubReg, SRI] : SubRegs) {
339 if (
TRI->getSubRegAlignmentNumBits(RC,
SubReg) != MaxAlign)
341 FirstMaxAlignedSubRegOffset =
342 std::min(FirstMaxAlignedSubRegOffset,
TRI->getSubRegIdxOffset(
SubReg));
343 if (FirstMaxAlignedSubRegOffset ==
Offset)
347 unsigned NewOffsetOfMaxAlignedSubReg =
350 if (NewOffsetOfMaxAlignedSubReg > FirstMaxAlignedSubRegOffset)
353 unsigned RShift = FirstMaxAlignedSubRegOffset - NewOffsetOfMaxAlignedSubReg;
354 return getRegClassWithShiftedSubregs(RC, RShift,
End - RShift, 0, SubRegs);
359void GCNRewritePartialRegUses::updateLiveIntervals(
Register OldReg,
361 SubRegMap &SubRegs)
const {
362 if (!LIS->hasInterval(OldReg))
365 auto &OldLI = LIS->getInterval(OldReg);
366 auto &NewLI = LIS->createEmptyInterval(NewReg);
368 auto &
Allocator = LIS->getVNInfoAllocator();
369 NewLI.setWeight(OldLI.weight());
371 for (
auto &SR : OldLI.subranges()) {
373 return SR.LaneMask ==
TRI->getSubRegIndexLaneMask(
P.first);
376 if (
I == SubRegs.end()) {
395 LIS->removeInterval(OldReg);
396 LIS->removeInterval(NewReg);
397 LIS->createAndComputeVirtRegInterval(NewReg);
401 if (
unsigned NewSubReg =
I->second.SubReg)
402 NewLI.createSubRangeFrom(Allocator,
403 TRI->getSubRegIndexLaneMask(NewSubReg), SR);
405 NewLI.assign(SR, Allocator);
410 NewLI.assign(OldLI, Allocator);
412 LIS->removeInterval(OldReg);
416GCNRewritePartialRegUses::getOperandRegClass(
MachineOperand &MO)
const {
418 return TII->getRegClass(
TII->get(
MI->getOpcode()),
MI->getOperandNo(&MO),
TRI,
419 *
MI->getParent()->getParent());
422bool GCNRewritePartialRegUses::rewriteReg(
Register Reg)
const {
423 auto Range =
MRI->reg_nodbg_operands(Reg);
428 if (MO.
getSubReg() == AMDGPU::NoSubRegister)
432 auto *RC =
MRI->getRegClass(Reg);
434 <<
':' <<
TRI->getRegClassName(RC) <<
'\n');
441 auto *OpDescRC = getOperandRegClass(MO);
443 if (!Inserted && OpDescRC) {
444 SubRegInfo &SRI =
I->second;
445 SRI.RC = SRI.RC ?
TRI->getCommonSubClass(SRI.RC, OpDescRC) : OpDescRC;
453 auto *NewRC = getMinSizeReg(RC, SubRegs);
459 Register NewReg =
MRI->createVirtualRegister(NewRC);
461 <<
TRI->getRegClassName(RC) <<
" -> "
463 <<
TRI->getRegClassName(NewRC) <<
'\n');
478 updateLiveIntervals(Reg, NewReg, SubRegs);
483bool GCNRewritePartialRegUses::runOnMachineFunction(
MachineFunction &MF) {
487 LIS = getAnalysisIfAvailable<LiveIntervals>();
488 bool Changed =
false;
489 for (
size_t I = 0,
E =
MRI->getNumVirtRegs();
I <
E; ++
I) {
495char GCNRewritePartialRegUses::ID;
500 "Rewrite Partial Register Uses",
false,
false)
unsigned const MachineRegisterInfo * MRI
Provides AMDGPU specific target descriptions.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Rewrite Partial Register Uses
const HexagonInstrInfo * TII
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
Represent the analysis usage information of a pass.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setIsUndef(bool Val=true)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
StringRef - Represent a constant reference to a string, i.e.
bool isValid() const
Returns true if this iterator is still pointing at a valid entry.
TargetInstrInfo - Interface to description of machine instruction set.
bool isAllocatable() const
Return true if this register class may be used to create virtual registers.
virtual const TargetInstrInfo * getInstrInfo() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
char & GCNRewritePartialRegUsesID
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.