34 #define DEBUG_TYPE "aarch64-ldst-opt"
39 STATISTIC(NumPairCreated,
"Number of load/store pair instructions generated");
40 STATISTIC(NumPostFolded,
"Number of post-index updates folded");
41 STATISTIC(NumPreFolded,
"Number of pre-index updates folded");
43 "Number of load/store from unscaled generated");
73 bool &MergeForward,
int &SExtIdx,
113 const char *getPassName()
const override {
114 return "AArch64 load / store optimization pass";
127 case AArch64::STURSi:
129 case AArch64::STURDi:
131 case AArch64::STURQi:
133 case AArch64::STURWi:
135 case AArch64::STURXi:
137 case AArch64::LDURSi:
139 case AArch64::LDURDi:
141 case AArch64::LDURQi:
143 case AArch64::LDURWi:
145 case AArch64::LDURXi:
147 case AArch64::LDURSWi:
153 int AArch64LoadStoreOpt::getMemSize(
MachineInstr *MemMI) {
157 case AArch64::STRSui:
158 case AArch64::STURSi:
160 case AArch64::STRDui:
161 case AArch64::STURDi:
163 case AArch64::STRQui:
164 case AArch64::STURQi:
166 case AArch64::STRWui:
167 case AArch64::STURWi:
169 case AArch64::STRXui:
170 case AArch64::STURXi:
172 case AArch64::LDRSui:
173 case AArch64::LDURSi:
175 case AArch64::LDRDui:
176 case AArch64::LDURDi:
178 case AArch64::LDRQui:
179 case AArch64::LDURQi:
181 case AArch64::LDRWui:
182 case AArch64::LDURWi:
184 case AArch64::LDRXui:
185 case AArch64::LDURXi:
187 case AArch64::LDRSWui:
188 case AArch64::LDURSWi:
194 bool *IsValidLdStrOpc =
nullptr) {
196 *IsValidLdStrOpc =
true;
200 *IsValidLdStrOpc =
false;
202 case AArch64::STRDui:
203 case AArch64::STURDi:
204 case AArch64::STRQui:
205 case AArch64::STURQi:
206 case AArch64::STRWui:
207 case AArch64::STURWi:
208 case AArch64::STRXui:
209 case AArch64::STURXi:
210 case AArch64::LDRDui:
211 case AArch64::LDURDi:
212 case AArch64::LDRQui:
213 case AArch64::LDURQi:
214 case AArch64::LDRWui:
215 case AArch64::LDURWi:
216 case AArch64::LDRXui:
217 case AArch64::LDURXi:
218 case AArch64::STRSui:
219 case AArch64::STURSi:
220 case AArch64::LDRSui:
221 case AArch64::LDURSi:
223 case AArch64::LDRSWui:
224 return AArch64::LDRWui;
225 case AArch64::LDURSWi:
226 return AArch64::LDURWi;
234 case AArch64::STRSui:
235 case AArch64::STURSi:
236 return AArch64::STPSi;
237 case AArch64::STRDui:
238 case AArch64::STURDi:
239 return AArch64::STPDi;
240 case AArch64::STRQui:
241 case AArch64::STURQi:
242 return AArch64::STPQi;
243 case AArch64::STRWui:
244 case AArch64::STURWi:
245 return AArch64::STPWi;
246 case AArch64::STRXui:
247 case AArch64::STURXi:
248 return AArch64::STPXi;
249 case AArch64::LDRSui:
250 case AArch64::LDURSi:
251 return AArch64::LDPSi;
252 case AArch64::LDRDui:
253 case AArch64::LDURDi:
254 return AArch64::LDPDi;
255 case AArch64::LDRQui:
256 case AArch64::LDURQi:
257 return AArch64::LDPQi;
258 case AArch64::LDRWui:
259 case AArch64::LDURWi:
260 return AArch64::LDPWi;
261 case AArch64::LDRXui:
262 case AArch64::LDURXi:
263 return AArch64::LDPXi;
264 case AArch64::LDRSWui:
265 case AArch64::LDURSWi:
266 return AArch64::LDPSWi;
274 case AArch64::STRSui:
275 return AArch64::STRSpre;
276 case AArch64::STRDui:
277 return AArch64::STRDpre;
278 case AArch64::STRQui:
279 return AArch64::STRQpre;
280 case AArch64::STRWui:
281 return AArch64::STRWpre;
282 case AArch64::STRXui:
283 return AArch64::STRXpre;
284 case AArch64::LDRSui:
285 return AArch64::LDRSpre;
286 case AArch64::LDRDui:
287 return AArch64::LDRDpre;
288 case AArch64::LDRQui:
289 return AArch64::LDRQpre;
290 case AArch64::LDRWui:
291 return AArch64::LDRWpre;
292 case AArch64::LDRXui:
293 return AArch64::LDRXpre;
294 case AArch64::LDRSWui:
295 return AArch64::LDRSWpre;
303 case AArch64::STRSui:
304 return AArch64::STRSpost;
305 case AArch64::STRDui:
306 return AArch64::STRDpost;
307 case AArch64::STRQui:
308 return AArch64::STRQpost;
309 case AArch64::STRWui:
310 return AArch64::STRWpost;
311 case AArch64::STRXui:
312 return AArch64::STRXpost;
313 case AArch64::LDRSui:
314 return AArch64::LDRSpost;
315 case AArch64::LDRDui:
316 return AArch64::LDRDpost;
317 case AArch64::LDRQui:
318 return AArch64::LDRQpost;
319 case AArch64::LDRWui:
320 return AArch64::LDRWpost;
321 case AArch64::LDRXui:
322 return AArch64::LDRXpost;
323 case AArch64::LDRSWui:
324 return AArch64::LDRSWpost;
331 bool MergeForward,
int SExtIdx) {
354 MergeForward ? Paired->getOperand(1) : I->getOperand(1);
358 if (I->getOperand(2).getImm() ==
359 Paired->getOperand(2).getImm() + OffsetStride) {
366 SExtIdx = (SExtIdx + 1) % 2;
374 OffsetImm /= OffsetStride;
378 I->getDebugLoc(),
TII->get(NewOpc))
381 .addOperand(BaseRegOp)
388 DEBUG(
dbgs() <<
"Creating pair load/store. Replacing instructions:\n ");
392 DEBUG(
dbgs() <<
" with instruction:\n ");
402 unsigned DstRegX = DstMO.
getReg();
404 unsigned DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32);
414 BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
421 BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
422 TII->get(AArch64::SBFMXri), DstRegX)
436 I->eraseFromParent();
437 Paired->eraseFromParent();
457 ModifiedRegs.
set(*AI);
459 assert(MO.
isUse() &&
"Reg operand not a def and not a use?!?");
467 if (!IsUnscaled && (Offset > 63 || Offset < -64))
472 int ElemOffset = Offset / OffsetStride;
473 if (ElemOffset > 63 || ElemOffset < -64)
484 return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
503 for (
auto &MIb : MemInsns)
514 bool &MergeForward,
int &SExtIdx,
543 ModifiedRegs.
resize(TRI->getNumRegs());
544 UsedRegs.
resize(TRI->getNumRegs());
549 for (
unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) {
559 bool CanMergeOpc = Opc == MI->
getOpcode();
562 bool IsValidLdStrOpc;
564 if (!IsValidLdStrOpc)
567 SExtIdx = NonSExtOpc == (
unsigned)Opc ? 1 : 0;
584 if (BaseReg == MIBaseReg && ((Offset == MIOffset + OffsetStride) ||
585 (Offset + OffsetStride == MIOffset))) {
586 int MinOffset = Offset < MIOffset ? Offset : MIOffset;
605 (
alignTo(MinOffset, OffsetStride) != MinOffset)) {
628 MergeForward =
false;
658 if (ModifiedRegs[BaseReg])
671 assert((Update->getOpcode() == AArch64::ADDXri ||
672 Update->getOpcode() == AArch64::SUBXri) &&
673 "Unexpected base register update instruction to merge!");
678 if (++NextI == Update)
681 int Value = Update->getOperand(2).getImm();
683 "Can't merge 1 << 12 offset into pre-indexed load / store");
684 if (Update->getOpcode() == AArch64::SUBXri)
689 BuildMI(*I->getParent(),
I, I->getDebugLoc(),
TII->get(NewOpc))
690 .addOperand(Update->getOperand(0))
691 .addOperand(I->getOperand(0))
692 .addOperand(I->getOperand(1))
696 DEBUG(
dbgs() <<
"Creating pre-indexed load/store.");
697 DEBUG(
dbgs() <<
" Replacing instructions:\n ");
701 DEBUG(
dbgs() <<
" with instruction:\n ");
706 I->eraseFromParent();
707 Update->eraseFromParent();
714 assert((Update->getOpcode() == AArch64::ADDXri ||
715 Update->getOpcode() == AArch64::SUBXri) &&
716 "Unexpected base register update instruction to merge!");
721 if (++NextI == Update)
724 int Value = Update->getOperand(2).getImm();
726 "Can't merge 1 << 12 offset into post-indexed load / store");
727 if (Update->getOpcode() == AArch64::SUBXri)
732 BuildMI(*I->getParent(),
I, I->getDebugLoc(),
TII->get(NewOpc))
733 .addOperand(Update->getOperand(0))
734 .addOperand(I->getOperand(0))
735 .addOperand(I->getOperand(1))
739 DEBUG(
dbgs() <<
"Creating post-indexed load/store.");
740 DEBUG(
dbgs() <<
" Replacing instructions:\n ");
744 DEBUG(
dbgs() <<
" with instruction:\n ");
749 I->eraseFromParent();
750 Update->eraseFromParent();
760 case AArch64::SUBXri:
764 case AArch64::ADDXri:
798 TII->getRegClass(MemMI->
getDesc(), 0, TRI, MF)->getSize();
802 if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
814 ModifiedRegs.
resize(TRI->getNumRegs());
815 UsedRegs.
resize(TRI->getNumRegs());
817 for (
unsigned Count = 0; MBBI != E; ++MBBI) {
836 if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg])
853 unsigned RegSize =
TII->getRegClass(MemMI->
getDesc(), 0, TRI, MF)->getSize();
857 if (MBBI == B || Offset != 0)
861 if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
867 ModifiedRegs.
resize(TRI->getNumRegs());
868 UsedRegs.
resize(TRI->getNumRegs());
870 for (
unsigned Count = 0; MBBI != B; --MBBI) {
889 if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg])
896 bool Modified =
false;
921 case AArch64::STRSui:
922 case AArch64::STRDui:
923 case AArch64::STRQui:
924 case AArch64::STRXui:
925 case AArch64::STRWui:
926 case AArch64::LDRSui:
927 case AArch64::LDRDui:
928 case AArch64::LDRQui:
929 case AArch64::LDRXui:
930 case AArch64::LDRWui:
931 case AArch64::LDRSWui:
933 case AArch64::STURSi:
934 case AArch64::STURDi:
935 case AArch64::STURQi:
936 case AArch64::STURWi:
937 case AArch64::STURXi:
938 case AArch64::LDURSi:
939 case AArch64::LDURDi:
940 case AArch64::LDURQi:
941 case AArch64::LDURWi:
942 case AArch64::LDURXi:
943 case AArch64::LDURSWi: {
956 if (
TII->isLdStPairSuppressed(MI)) {
961 bool MergeForward =
false;
964 findMatchingInsn(MBBI, MergeForward, SExtIdx,
ScanLimit);
969 MBBI = mergePairedInsns(MBBI, Paired, MergeForward, SExtIdx);
974 ++NumUnscaledPairCreated;
995 case AArch64::STRSui:
996 case AArch64::STRDui:
997 case AArch64::STRQui:
998 case AArch64::STRXui:
999 case AArch64::STRWui:
1000 case AArch64::LDRSui:
1001 case AArch64::LDRDui:
1002 case AArch64::LDRQui:
1003 case AArch64::LDRXui:
1004 case AArch64::LDRWui:
1006 case AArch64::STURSi:
1007 case AArch64::STURDi:
1008 case AArch64::STURQi:
1009 case AArch64::STURWi:
1010 case AArch64::STURXi:
1011 case AArch64::LDURSi:
1012 case AArch64::LDURDi:
1013 case AArch64::LDURQi:
1014 case AArch64::LDURWi:
1015 case AArch64::LDURXi: {
1023 findMatchingUpdateInsnForward(MBBI,
ScanLimit, 0);
1026 MBBI = mergePostIdxUpdateInsn(MBBI, Update);
1043 Update = findMatchingUpdateInsnBackward(MBBI,
ScanLimit);
1046 MBBI = mergePreIdxUpdateInsn(MBBI, Update);
1064 Update = findMatchingUpdateInsnForward(MBBI,
ScanLimit, Value);
1067 MBBI = mergePreIdxUpdateInsn(MBBI, Update);
1088 bool Modified =
false;
1089 for (
auto &MBB : Fn)
1090 Modified |= optimizeBlock(MBB);
1101 return new AArch64LoadStoreOpt();
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
void push_back(const T &Elt)
const MachineFunction * getParent() const
getParent - Return the MachineFunction containing this basic block.
STATISTIC(NumFunctions,"Total number of functions")
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
void setBitsNotInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
setBitsNotInMask - Add a bit to this vector for every '0' bit in Mask.
static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride)
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
static cl::opt< bool > EnableAArch64UnscaledMemOp("aarch64-unscaled-mem-op", cl::Hidden, cl::desc("Allow AArch64 unscaled load/store combining"), cl::init(true))
static bool isUnscaledLdst(unsigned Opc)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
static unsigned getMatchingPairOpcode(unsigned Opc)
const HexagonInstrInfo * TII
unsigned getSize(const MachineInstr *MI) const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setImplicit(bool Val=true)
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Reg
All possible values of the reg field in the ModR/M byte.
static bool isMatchingUpdateInsn(MachineInstr *MI, unsigned BaseReg, int Offset)
const MachineInstrBuilder & addImm(int64_t Val) const
addImm - Add a new immediate operand.
unsigned getNumOperands() const
Access to explicit operands of the instruction.
FunctionPass * createAArch64LoadStoreOptimizationPass()
createARMLoadStoreOptimizationPass - returns an instance of the load / store optimization pass...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool isDebugValue() const
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
bundle_iterator< MachineInstr, instr_iterator > iterator
initializer< Ty > init(const Ty &Val)
static unsigned getMatchingNonSExtOpcode(unsigned Opc, bool *IsValidLdStrOpc=nullptr)
const MachineOperand & getOperand(unsigned i) const
MCRegAliasIterator enumerates all registers aliasing Reg.
FunctionPass class - This class is used to implement most global optimizations.
MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID)
BuildMI - Builder interface.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
const uint32_t * getRegMask() const
getRegMask - Returns a bit mask of registers preserved by this RegMask operand.
MachineOperand class - Representation of each machine instruction operand.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
static bool mayAlias(MachineInstr *MIa, MachineInstr *MIb, const AArch64InstrInfo *TII)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static int alignTo(int Num, int PowOf2)
KILL - This instruction is a noop that is used only to adjust the liveness of registers.
Representation of each machine instruction.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
void setReg(unsigned Reg)
Change the register this operand corresponds to.
static cl::opt< unsigned > ScanLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden)
bool isCall(QueryType Type=AnyInBundle) const
static unsigned getPreIndexedOpcode(unsigned Opc)
unsigned getReg() const
getReg - Returns the register number.
static unsigned getPostIndexedOpcode(unsigned Opc)
virtual const TargetInstrInfo * getInstrInfo() const
LLVM Value Representation.
bool areMemAccessesTriviallyDisjoint(MachineInstr *MIa, MachineInstr *MIb, AliasAnalysis *AA=nullptr) const override
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
static void trackRegDefsUses(MachineInstr *MI, BitVector &ModifiedRegs, BitVector &UsedRegs, const TargetRegisterInfo *TRI)
trackRegDefsUses - Remember what registers the specified instruction uses and modifies.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
addReg - Add a new virtual register operand...
bool modifiesRegister(unsigned Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register...