53 #define DEBUG_TYPE "si-load-store-opt"
65 static bool offsetsCanBeCombined(
unsigned Offset0,
72 void updateRegDefsUses(
unsigned SrcReg,
89 SILoadStoreOptimizer()
101 const char *getPassName()
const override {
102 return "SI Load / Store Optimizer";
119 "SI Load / Store Optimizer",
false,
false)
126 char SILoadStoreOptimizer::
ID = 0;
131 return new SILoadStoreOptimizer(TM);
134 bool SILoadStoreOptimizer::offsetsCanBeCombined(
unsigned Offset0,
139 if (Offset0 == Offset1)
143 if ((Offset0 % Size != 0) || (Offset1 % Size != 0))
146 unsigned EltOffset0 = Offset0 / Size;
147 unsigned EltOffset1 = Offset1 / Size;
155 if ((EltOffset0 % 64 != 0) || (EltOffset1 % 64) != 0)
168 if (MBBI->getOpcode() != I->getOpcode())
172 if (MBBI->hasOrderedMemoryRef())
181 if (AddrReg0.getReg() == AddrReg1.
getReg() &&
182 AddrReg0.getSubReg() == AddrReg1.
getSubReg()) {
184 AMDGPU::OpName::offset);
185 unsigned Offset0 = I->getOperand(OffsetIdx).getImm() & 0xffff;
186 unsigned Offset1 = MBBI->getOperand(OffsetIdx).getImm() & 0xffff;
189 if (offsetsCanBeCombined(Offset0, Offset1, EltSize))
196 void SILoadStoreOptimizer::updateRegDefsUses(
unsigned SrcReg,
200 E = MRI->reg_end(); I != E; ) {
218 const MachineOperand *Dest1 =
TII->getNamedOperand(*Paired, AMDGPU::OpName::vdst);
221 =
TII->getNamedOperand(*I, AMDGPU::OpName::offset)->getImm() & 0xffff;
223 =
TII->getNamedOperand(*Paired, AMDGPU::OpName::offset)->getImm() & 0xffff;
225 unsigned NewOffset0 = Offset0 / EltSize;
226 unsigned NewOffset1 = Offset1 / EltSize;
227 unsigned Opc = (EltSize == 4) ? AMDGPU::DS_READ2_B32 : AMDGPU::DS_READ2_B64;
231 bool UseST64 = (NewOffset0 % 64 == 0) && (NewOffset1 % 64 == 0);
235 Opc = (EltSize == 4) ? AMDGPU::DS_READ2ST64_B32 : AMDGPU::DS_READ2ST64_B64;
239 (NewOffset0 != NewOffset1) &&
240 "Computed offset doesn't fit");
245 = (EltSize == 4) ? &AMDGPU::VReg_64RegClass : &AMDGPU::VReg_128RegClass;
246 unsigned DestReg = MRI->createVirtualRegister(SuperRC);
250 =
BuildMI(*MBB, I, DL, Read2Desc, DestReg)
256 .addMemOperand(*Paired->memoperands_begin());
258 unsigned SubRegIdx0 = (EltSize == 4) ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
259 unsigned SubRegIdx1 = (EltSize == 4) ? AMDGPU::sub1 : AMDGPU::sub2_sub3;
266 .
addReg(DestReg, 0, SubRegIdx0);
271 LIS->InsertMachineInstrInMaps(Read2);
275 SlotIndex PairedIndex = LIS->getInstructionIndex(Paired);
278 bool UpdateM0Range = M0Segment->
end == PairedIndex.
getRegSlot();
282 LIS->ReplaceMachineInstrInMaps(I, Copy0);
283 LIS->ReplaceMachineInstrInMaps(Paired, Copy1);
285 I->eraseFromParent();
286 Paired->eraseFromParent();
289 LIS->shrinkToUses(&AddrRegLI);
291 LIS->createAndComputeVirtRegInterval(DestReg);
294 SlotIndex Read2Index = LIS->getInstructionIndex(Read2);
298 DEBUG(
dbgs() <<
"Inserted read2: " << *Read2 <<
'\n');
313 =
TII->getNamedOperand(*Paired, AMDGPU::OpName::data0);
317 =
TII->getNamedOperand(*I, AMDGPU::OpName::offset)->getImm() & 0xffff;
319 =
TII->getNamedOperand(*Paired, AMDGPU::OpName::offset)->getImm() & 0xffff;
321 unsigned NewOffset0 = Offset0 / EltSize;
322 unsigned NewOffset1 = Offset1 / EltSize;
323 unsigned Opc = (EltSize == 4) ? AMDGPU::DS_WRITE2_B32 : AMDGPU::DS_WRITE2_B64;
327 bool UseST64 = (NewOffset0 % 64 == 0) && (NewOffset1 % 64 == 0);
331 Opc = (EltSize == 4) ? AMDGPU::DS_WRITE2ST64_B32 : AMDGPU::DS_WRITE2ST64_B64;
335 (NewOffset0 != NewOffset1) &&
336 "Computed offset doesn't fit");
343 SlotIndex PairedIndex = LIS->getInstructionIndex(Paired);
346 bool UpdateM0Range = M0Segment->
end == PairedIndex.
getRegSlot();
349 =
BuildMI(*MBB, I, DL, Write2Desc)
357 .addMemOperand(*Paired->memoperands_begin());
362 LIS->RemoveMachineInstrFromMaps(I);
363 LIS->RemoveMachineInstrFromMaps(Paired);
364 I->eraseFromParent();
365 Paired->eraseFromParent();
368 LIS->repairIntervalsInRange(MBB, Write2, Write2, OrigRegs);
371 SlotIndex Write2Index = LIS->getInstructionIndex(Write2);
375 DEBUG(
dbgs() <<
"Inserted write2 inst: " << *Write2 <<
'\n');
383 bool Modified =
false;
395 if (Opc == AMDGPU::DS_READ_B32 || Opc == AMDGPU::DS_READ_B64) {
396 unsigned Size = (Opc == AMDGPU::DS_READ_B64) ? 8 : 4;
400 I = mergeRead2Pair(I, Match, Size);
406 }
else if (Opc == AMDGPU::DS_WRITE_B32 || Opc == AMDGPU::DS_WRITE_B64) {
407 unsigned Size = (Opc == AMDGPU::DS_WRITE_B64) ? 8 : 4;
411 I = mergeWrite2Pair(I, Match, Size);
431 LIS = &getAnalysis<LiveIntervals>();
433 DEBUG(
dbgs() <<
"Running SILoadStoreOptimizer\n");
435 assert(!MRI->isSSA());
437 bool Modified =
false;
440 Modified |= optimizeBlock(MBB);
bool isUInt< 8 >(uint64_t x)
const MachineFunction * getParent() const
getParent - Return the MachineFunction containing this basic block.
Interface definition for SIRegisterInfo.
const Segment * getSegmentContaining(SlotIndex Idx) const
Return the segment that contains the specified index, or null if there is none.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
char & SILoadStoreOptimizerID
LiveInterval - This class represents the liveness of a register, or stack slot.
Describe properties that are true of each instruction in the target description file.
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
This represents a simple continuous liveness interval for a value.
void substVirtReg(unsigned Reg, unsigned SubIdx, const TargetRegisterInfo &)
substVirtReg - Substitute the current register with the virtual subregister Reg:SubReg.
This class represents the liveness of a register, stack slot, etc.
COPY - Target-independent register copy.
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
const MachineInstrBuilder & addImm(int64_t Val) const
addImm - Add a new immediate operand.
FunctionPass * createSILoadStoreOptimizerPass(TargetMachine &tm)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bundle_iterator< MachineInstr, instr_iterator > iterator
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
INITIALIZE_PASS_BEGIN(SILoadStoreOptimizer, DEBUG_TYPE,"SI Load / Store Optimizer", false, false) INITIALIZE_PASS_END(SILoadStoreOptimizer
Represent the analysis usage information of a pass.
FunctionPass class - This class is used to implement most global optimizations.
MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID)
BuildMI - Builder interface.
unsigned getSubReg() const
MachineOperand class - Representation of each machine instruction operand.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void initializeSILoadStoreOptimizerPass(PassRegistry &)
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
TargetSubtargetInfo - Generic base class for all target subtargets.
Representation of each machine instruction.
Interface definition for SIInstrInfo.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def...
unsigned getReg() const
getReg - Returns the register number.
virtual const TargetInstrInfo * getInstrInfo() const
const MachineInstrBuilder & addOperand(const MachineOperand &MO) const
Primary interface to the complete machine description for the target machine.
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex)
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
addReg - Add a new virtual register operand...
SlotIndex - An opaque wrapper around machine indexes.
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly. ...