54 #define DEBUG_TYPE "si-load-store-opt"
65 static bool offsetsCanBeCombined(
unsigned Offset0,
89 SILoadStoreOptimizer()
101 StringRef getPassName()
const override {
return "SI Load / Store Optimizer"; }
114 "SI Load / Store Optimizer",
false,
false)
119 char SILoadStoreOptimizer::
ID = 0;
124 return new SILoadStoreOptimizer(TM);
132 MI->removeFromParent();
153 !(A->mayStore() || B->mayStore()));
186 if (!InstToMove->mayLoadOrStore())
194 bool SILoadStoreOptimizer::offsetsCanBeCombined(
unsigned Offset0,
199 if (Offset0 == Offset1)
203 if ((Offset0 % Size != 0) || (Offset1 % Size != 0))
206 unsigned EltOffset0 = Offset0 / Size;
207 unsigned EltOffset1 = Offset1 / Size;
215 if ((EltOffset0 % 64 != 0) || (EltOffset1 % 64) != 0)
232 for ( ; MBBI !=
E; ++MBBI) {
234 if (MBBI->getOpcode() != I->getOpcode()) {
242 if (MBBI->hasUnmodeledSideEffects())
247 if (MBBI->mayLoadOrStore() &&
265 if (MBBI->hasOrderedMemoryRef())
283 if (AddrReg0.getReg() == AddrReg1.
getReg() &&
284 AddrReg0.getSubReg() == AddrReg1.
getSubReg()) {
286 AMDGPU::OpName::offset);
287 unsigned Offset0 = I->getOperand(OffsetIdx).getImm() & 0xffff;
288 unsigned Offset1 = MBBI->getOperand(OffsetIdx).getImm() & 0xffff;
294 if (offsetsCanBeCombined(Offset0, Offset1, EltSize) &&
323 const MachineOperand *Dest1 =
TII->getNamedOperand(*Paired, AMDGPU::OpName::vdst);
326 =
TII->getNamedOperand(*I, AMDGPU::OpName::offset)->getImm() & 0xffff;
328 =
TII->getNamedOperand(*Paired, AMDGPU::OpName::offset)->getImm() & 0xffff;
330 unsigned NewOffset0 = Offset0 / EltSize;
331 unsigned NewOffset1 = Offset1 / EltSize;
332 unsigned Opc = (EltSize == 4) ? AMDGPU::DS_READ2_B32 : AMDGPU::DS_READ2_B64;
336 bool UseST64 = (NewOffset0 % 64 == 0) && (NewOffset1 % 64 == 0);
340 Opc = (EltSize == 4) ? AMDGPU::DS_READ2ST64_B32 : AMDGPU::DS_READ2ST64_B64;
343 unsigned SubRegIdx0 = (EltSize == 4) ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
344 unsigned SubRegIdx1 = (EltSize == 4) ? AMDGPU::sub1 : AMDGPU::sub2_sub3;
346 if (NewOffset0 > NewOffset1) {
353 (NewOffset0 != NewOffset1) &&
354 "Computed offset doesn't fit");
359 = (EltSize == 4) ? &AMDGPU::VReg_64RegClass : &AMDGPU::VReg_128RegClass;
360 unsigned DestReg =
MRI->createVirtualRegister(SuperRC);
364 =
BuildMI(*MBB, Paired, DL, Read2Desc, DestReg)
370 .addMemOperand(*Paired->memoperands_begin());
376 BuildMI(*MBB, Paired, DL, CopyDesc)
378 .
addReg(DestReg, 0, SubRegIdx0);
386 I->eraseFromParent();
387 Paired->eraseFromParent();
389 DEBUG(
dbgs() <<
"Inserted read2: " << *Read2 <<
'\n');
405 =
TII->getNamedOperand(*Paired, AMDGPU::OpName::data0);
409 =
TII->getNamedOperand(*I, AMDGPU::OpName::offset)->getImm() & 0xffff;
411 =
TII->getNamedOperand(*Paired, AMDGPU::OpName::offset)->getImm() & 0xffff;
413 unsigned NewOffset0 = Offset0 / EltSize;
414 unsigned NewOffset1 = Offset1 / EltSize;
415 unsigned Opc = (EltSize == 4) ? AMDGPU::DS_WRITE2_B32 : AMDGPU::DS_WRITE2_B64;
419 bool UseST64 = (NewOffset0 % 64 == 0) && (NewOffset1 % 64 == 0);
423 Opc = (EltSize == 4) ? AMDGPU::DS_WRITE2ST64_B32 : AMDGPU::DS_WRITE2ST64_B64;
426 if (NewOffset0 > NewOffset1) {
433 (NewOffset0 != NewOffset1) &&
434 "Computed offset doesn't fit");
440 =
BuildMI(*MBB, Paired, DL, Write2Desc)
448 .addMemOperand(*Paired->memoperands_begin());
453 I->eraseFromParent();
454 Paired->eraseFromParent();
456 DEBUG(
dbgs() <<
"Inserted write2 inst: " << *Write2 <<
'\n');
464 bool Modified =
false;
477 if (Opc == AMDGPU::DS_READ_B32 || Opc == AMDGPU::DS_READ_B64) {
478 unsigned Size = (Opc == AMDGPU::DS_READ_B64) ? 8 : 4;
483 I = mergeRead2Pair(I, Match, Size, InstsToMove);
489 }
else if (Opc == AMDGPU::DS_WRITE_B32 || Opc == AMDGPU::DS_WRITE_B64) {
490 unsigned Size = (Opc == AMDGPU::DS_WRITE_B64) ? 8 : 4;
495 I = mergeWrite2Pair(I, Match, Size, InstsToMove);
521 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
523 DEBUG(
dbgs() <<
"Running SILoadStoreOptimizer\n");
525 bool Modified =
false;
528 Modified |= optimizeBlock(MBB);
void push_back(const T &Elt)
Interface definition for SIRegisterInfo.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
AMDGPU specific subclass of TargetSubtarget.
char & SILoadStoreOptimizerID
Describe properties that are true of each instruction in the target description file.
const SIInstrInfo * getInstrInfo() const override
bool readsVirtualRegister(unsigned Reg) const
Return true if the MachineInstr reads the specified virtual register.
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
bool loadStoreOptEnabled() const
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const HexagonRegisterInfo & getRegisterInfo() const
HexagonInstrInfo specifics.
static bool canMoveInstsAcrossMemOp(MachineInstr &MemOp, ArrayRef< MachineInstr * > InstsToMove, const SIInstrInfo *TII, AliasAnalysis *AA)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
static GCRegistry::Add< OcamlGC > B("ocaml","ocaml 3.10-compatible GC")
FunctionPass * createSILoadStoreOptimizerPass(TargetMachine &tm)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isUInt< 8 >(uint64_t x)
unsigned const MachineRegisterInfo * MRI
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
INITIALIZE_PASS_BEGIN(SILoadStoreOptimizer, DEBUG_TYPE,"SI Load / Store Optimizer", false, false) INITIALIZE_PASS_END(SILoadStoreOptimizer
Represent the analysis usage information of a pass.
iterator_range< mop_iterator > defs()
Returns a range over all explicit operands that are register definitions.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,"Assign register bank of generic virtual registers", false, false) RegBankSelect
FunctionPass class - This class is used to implement most global optimizations.
unsigned getSubReg() const
static void moveInstsAfter(MachineBasicBlock::iterator I, ArrayRef< MachineInstr * > InstsToMove)
static void addDefsToList(const MachineInstr &MI, SmallVectorImpl< const MachineOperand * > &Defs)
MachineOperand class - Representation of each machine instruction operand.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
bool areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA=nullptr) const override
void setPreservesCFG()
This function should be called by the pass, iff they do not:
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
void initializeSILoadStoreOptimizerPass(PassRegistry &)
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Representation of each machine instruction.
Interface definition for SIInstrInfo.
static bool memAccessesCanBeReordered(MachineBasicBlock::iterator A, MachineBasicBlock::iterator B, const SIInstrInfo *TII, llvm::AliasAnalysis *AA)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
unsigned getReg() const
getReg - Returns the register number.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
const MachineInstrBuilder & addOperand(const MachineOperand &MO) const
Primary interface to the complete machine description for the target machine.
StringRef - Represent a constant reference to a string, i.e.
static bool addToListsIfDependent(MachineInstr &MI, SmallVectorImpl< const MachineOperand * > &Defs, SmallVectorImpl< MachineInstr * > &Insts)
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
static GCRegistry::Add< ErlangGC > A("erlang","erlang-compatible garbage collector")