43#define DEBUG_TYPE "rewrite-partial-reg-uses"
53 return "Rewrite Partial Register Uses";
84 unsigned SubReg = AMDGPU::NoSubRegister;
96 SubRegMap &SubRegs)
const;
115 unsigned RegNumBits,
unsigned CoverSubregIdx,
116 SubRegMap &SubRegs)
const;
121 SubRegMap &SubRegs)
const;
130 unsigned shiftSubReg(
unsigned SubReg,
unsigned RShift)
const;
134 unsigned getSubReg(
unsigned Offset,
unsigned Size)
const;
142 unsigned SubRegIdx)
const;
153 getAllocatableAndAlignedRegClassMask(
unsigned AlignNumBits)
const;
163unsigned GCNRewritePartialRegUses::getSubReg(
unsigned Offset,
164 unsigned Size)
const {
167 for (
unsigned Idx = 1, E =
TRI->getNumSubRegIndices();
Idx < E; ++
Idx) {
178unsigned GCNRewritePartialRegUses::shiftSubReg(
unsigned SubReg,
179 unsigned RShift)
const {
186 unsigned SubRegIdx)
const {
188 SuperRegMasks.try_emplace({RC, SubRegIdx},
nullptr);
191 if (RCI.getSubReg() == SubRegIdx) {
192 I->second = RCI.getMask();
200const BitVector &GCNRewritePartialRegUses::getAllocatableAndAlignedRegClassMask(
201 unsigned AlignNumBits)
const {
203 AllocatableAndAlignedRegClassMasks.try_emplace(AlignNumBits);
207 for (
unsigned ClassID = 0; ClassID <
TRI->getNumRegClasses(); ++ClassID) {
208 auto *RC =
TRI->getRegClass(ClassID);
217GCNRewritePartialRegUses::getRegClassWithShiftedSubregs(
219 unsigned CoverSubregIdx, SubRegMap &SubRegs)
const {
221 unsigned RCAlign =
TRI->getRegClassAlignmentNumBits(RC);
222 LLVM_DEBUG(
dbgs() <<
" Shift " << RShift <<
", reg align " << RCAlign
225 BitVector ClassMask(getAllocatableAndAlignedRegClassMask(RCAlign));
226 for (
auto &[OldSubReg, SRI] : SubRegs) {
227 auto &[SubRegRC, NewSubReg] = SRI;
231 <<
TRI->getRegClassName(SubRegRC)
232 << (SubRegRC->isAllocatable() ?
"" :
" not alloc")
235 if (OldSubReg == CoverSubregIdx) {
237 assert(SubRegRC->isAllocatable());
238 NewSubReg = AMDGPU::NoSubRegister;
241 NewSubReg = shiftSubReg(OldSubReg, RShift);
249 const uint32_t *
Mask = NewSubReg ? getSuperRegClassMask(SubRegRC, NewSubReg)
250 : SubRegRC->getSubClassMask();
254 ClassMask.clearBitsNotInMask(Mask);
257 LLVM_DEBUG(
dbgs() <<
", num regclasses " << ClassMask.count() <<
'\n');
267 unsigned MinNumBits = std::numeric_limits<unsigned>::max();
268 for (
unsigned ClassID : ClassMask.set_bits()) {
269 auto *RC =
TRI->getRegClass(ClassID);
270 unsigned NumBits =
TRI->getRegSizeInBits(*RC);
271 if (NumBits < MinNumBits && NumBits >= RegNumBits) {
272 MinNumBits = NumBits;
275 if (MinNumBits == RegNumBits)
281 for (
auto [
SubReg, SRI] : SubRegs)
283 assert(MinRC ==
TRI->getSubClassWithSubReg(MinRC, SRI.SubReg));
288 return (MinRC != RC || RShift != 0) ? MinRC :
nullptr;
293 SubRegMap &SubRegs)
const {
294 unsigned CoverSubreg = AMDGPU::NoSubRegister;
295 unsigned Offset = std::numeric_limits<unsigned>::max();
297 for (
auto [
SubReg, SRI] : SubRegs) {
298 unsigned SubRegOffset =
TRI->getSubRegIdxOffset(
SubReg);
299 unsigned SubRegEnd = SubRegOffset +
TRI->getSubRegIdxSize(
SubReg);
300 if (SubRegOffset <
Offset) {
302 CoverSubreg = AMDGPU::NoSubRegister;
304 if (SubRegEnd >
End) {
306 CoverSubreg = AMDGPU::NoSubRegister;
308 if (SubRegOffset ==
Offset && SubRegEnd ==
End)
313 if (CoverSubreg != AMDGPU::NoSubRegister)
314 return getRegClassWithShiftedSubregs(RC,
Offset,
End -
Offset, CoverSubreg,
320 unsigned MaxAlign = 0;
321 for (
auto [
SubReg, SRI] : SubRegs)
322 MaxAlign = std::max(MaxAlign,
TRI->getSubRegAlignmentNumBits(RC,
SubReg));
324 unsigned FirstMaxAlignedSubRegOffset = std::numeric_limits<unsigned>::max();
325 for (
auto [
SubReg, SRI] : SubRegs) {
326 if (
TRI->getSubRegAlignmentNumBits(RC,
SubReg) != MaxAlign)
328 FirstMaxAlignedSubRegOffset =
329 std::min(FirstMaxAlignedSubRegOffset,
TRI->getSubRegIdxOffset(
SubReg));
330 if (FirstMaxAlignedSubRegOffset ==
Offset)
334 unsigned NewOffsetOfMaxAlignedSubReg =
337 if (NewOffsetOfMaxAlignedSubReg > FirstMaxAlignedSubRegOffset)
340 unsigned RShift = FirstMaxAlignedSubRegOffset - NewOffsetOfMaxAlignedSubReg;
341 return getRegClassWithShiftedSubregs(RC, RShift,
End - RShift, 0, SubRegs);
346void GCNRewritePartialRegUses::updateLiveIntervals(
Register OldReg,
348 SubRegMap &SubRegs)
const {
349 if (!LIS->hasInterval(OldReg))
352 auto &OldLI = LIS->getInterval(OldReg);
353 auto &NewLI = LIS->createEmptyInterval(NewReg);
355 auto &
Allocator = LIS->getVNInfoAllocator();
356 NewLI.setWeight(OldLI.weight());
358 for (
auto &SR : OldLI.subranges()) {
360 return SR.LaneMask ==
TRI->getSubRegIndexLaneMask(
P.first);
363 if (
I == SubRegs.end()) {
382 LIS->removeInterval(OldReg);
383 LIS->removeInterval(NewReg);
384 LIS->createAndComputeVirtRegInterval(NewReg);
388 if (
unsigned NewSubReg =
I->second.SubReg)
389 NewLI.createSubRangeFrom(Allocator,
390 TRI->getSubRegIndexLaneMask(NewSubReg), SR);
392 NewLI.assign(SR, Allocator);
397 NewLI.assign(OldLI, Allocator);
399 LIS->removeInterval(OldReg);
403GCNRewritePartialRegUses::getOperandRegClass(
MachineOperand &MO)
const {
405 return TII->getRegClass(
TII->get(
MI->getOpcode()),
MI->getOperandNo(&MO),
TRI,
406 *
MI->getParent()->getParent());
409bool GCNRewritePartialRegUses::rewriteReg(
Register Reg)
const {
410 auto Range =
MRI->reg_nodbg_operands(Reg);
412 return MO.
getSubReg() == AMDGPU::NoSubRegister;
416 auto *RC =
MRI->getRegClass(Reg);
418 <<
':' <<
TRI->getRegClassName(RC) <<
'\n');
432 SubRegRC =
TRI->getSubRegisterClass(RC,
SubReg);
437 <<
TRI->getRegClassName(OpDescRC) <<
" = ");
438 SubRegRC =
TRI->getCommonSubClass(SubRegRC, OpDescRC);
449 auto *NewRC = getMinSizeReg(RC, SubRegs);
455 Register NewReg =
MRI->createVirtualRegister(NewRC);
457 <<
TRI->getRegClassName(RC) <<
" -> "
459 <<
TRI->getRegClassName(NewRC) <<
'\n');
474 updateLiveIntervals(Reg, NewReg, SubRegs);
479bool GCNRewritePartialRegUses::runOnMachineFunction(
MachineFunction &MF) {
483 auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
484 LIS = LISWrapper ? &LISWrapper->getLIS() :
nullptr;
485 bool Changed =
false;
486 for (
size_t I = 0, E =
MRI->getNumVirtRegs();
I < E; ++
I) {
492char GCNRewritePartialRegUses::ID;
497 "Rewrite Partial Register Uses",
false,
false)
unsigned const MachineRegisterInfo * MRI
Provides AMDGPU specific target descriptions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Rewrite Partial Register Uses
const HexagonInstrInfo * TII
unsigned const TargetRegisterInfo * TRI
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
Represent the analysis usage information of a pass.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setIsUndef(bool Val=true)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
StringRef - Represent a constant reference to a string, i.e.
bool isValid() const
Returns true if this iterator is still pointing at a valid entry.
TargetInstrInfo - Interface to description of machine instruction set.
bool isAllocatable() const
Return true if this register class may be used to create virtual registers.
virtual const TargetInstrInfo * getInstrInfo() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
char & GCNRewritePartialRegUsesID
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.