34 #define DEBUG_TYPE "aarch64-ldst-opt"
36 STATISTIC(NumPairCreated,
"Number of load/store pair instructions generated");
37 STATISTIC(NumPostFolded,
"Number of post-index updates folded");
38 STATISTIC(NumPreFolded,
"Number of pre-index updates folded");
40 "Number of load/store from unscaled generated");
41 STATISTIC(NumZeroStoresPromoted,
"Number of narrow zero stores promoted");
42 STATISTIC(NumLoadsFromStoresPromoted,
"Number of loads from stores promoted");
53 #define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
57 typedef struct LdStPairFlags {
69 LdStPairFlags() : MergeForward(
false), SExtIdx(-1) {}
71 void setMergeForward(
bool V =
true) { MergeForward = V; }
72 bool getMergeForward()
const {
return MergeForward; }
74 void setSExtIdx(
int V) { SExtIdx = V; }
75 int getSExtIdx()
const {
return SExtIdx; }
98 bool FindNarrowMerge);
109 const LdStPairFlags &Flags);
115 const LdStPairFlags &Flags);
127 int UnscaledOffset,
unsigned Limit);
138 unsigned BaseReg,
int Offset);
171 static
bool isNarrowStore(
unsigned Opc) {
175 case AArch64::STRBBui:
176 case AArch64::STURBBi:
177 case AArch64::STRHHui:
178 case AArch64::STURHHi:
188 case AArch64::LDRBBui:
189 case AArch64::LDURBBi:
190 case AArch64::LDRSBWui:
191 case AArch64::LDURSBWi:
192 case AArch64::STRBBui:
193 case AArch64::STURBBi:
195 case AArch64::LDRHHui:
196 case AArch64::LDURHHi:
197 case AArch64::LDRSHWui:
198 case AArch64::LDURSHWi:
199 case AArch64::STRHHui:
200 case AArch64::STURHHi:
202 case AArch64::LDRSui:
203 case AArch64::LDURSi:
204 case AArch64::LDRSWui:
205 case AArch64::LDURSWi:
206 case AArch64::LDRWui:
207 case AArch64::LDURWi:
208 case AArch64::STRSui:
209 case AArch64::STURSi:
210 case AArch64::STRWui:
211 case AArch64::STURWi:
213 case AArch64::LDPSWi:
218 case AArch64::LDRDui:
219 case AArch64::LDURDi:
220 case AArch64::LDRXui:
221 case AArch64::LDURXi:
222 case AArch64::STRDui:
223 case AArch64::STURDi:
224 case AArch64::STRXui:
225 case AArch64::STURXi:
231 case AArch64::LDRQui:
232 case AArch64::LDURQi:
233 case AArch64::STRQui:
234 case AArch64::STURQi:
242 bool *IsValidLdStrOpc =
nullptr) {
244 *IsValidLdStrOpc =
true;
248 *IsValidLdStrOpc =
false;
250 case AArch64::STRDui:
251 case AArch64::STURDi:
252 case AArch64::STRQui:
253 case AArch64::STURQi:
254 case AArch64::STRBBui:
255 case AArch64::STURBBi:
256 case AArch64::STRHHui:
257 case AArch64::STURHHi:
258 case AArch64::STRWui:
259 case AArch64::STURWi:
260 case AArch64::STRXui:
261 case AArch64::STURXi:
262 case AArch64::LDRDui:
263 case AArch64::LDURDi:
264 case AArch64::LDRQui:
265 case AArch64::LDURQi:
266 case AArch64::LDRWui:
267 case AArch64::LDURWi:
268 case AArch64::LDRXui:
269 case AArch64::LDURXi:
270 case AArch64::STRSui:
271 case AArch64::STURSi:
272 case AArch64::LDRSui:
273 case AArch64::LDURSi:
275 case AArch64::LDRSWui:
276 return AArch64::LDRWui;
277 case AArch64::LDURSWi:
278 return AArch64::LDURWi;
286 case AArch64::STRBBui:
287 return AArch64::STRHHui;
288 case AArch64::STRHHui:
289 return AArch64::STRWui;
290 case AArch64::STURBBi:
291 return AArch64::STURHHi;
292 case AArch64::STURHHi:
293 return AArch64::STURWi;
294 case AArch64::STURWi:
295 return AArch64::STURXi;
296 case AArch64::STRWui:
297 return AArch64::STRXui;
305 case AArch64::STRSui:
306 case AArch64::STURSi:
307 return AArch64::STPSi;
308 case AArch64::STRDui:
309 case AArch64::STURDi:
310 return AArch64::STPDi;
311 case AArch64::STRQui:
312 case AArch64::STURQi:
313 return AArch64::STPQi;
314 case AArch64::STRWui:
315 case AArch64::STURWi:
316 return AArch64::STPWi;
317 case AArch64::STRXui:
318 case AArch64::STURXi:
319 return AArch64::STPXi;
320 case AArch64::LDRSui:
321 case AArch64::LDURSi:
322 return AArch64::LDPSi;
323 case AArch64::LDRDui:
324 case AArch64::LDURDi:
325 return AArch64::LDPDi;
326 case AArch64::LDRQui:
327 case AArch64::LDURQi:
328 return AArch64::LDPQi;
329 case AArch64::LDRWui:
330 case AArch64::LDURWi:
331 return AArch64::LDPWi;
332 case AArch64::LDRXui:
333 case AArch64::LDURXi:
334 return AArch64::LDPXi;
335 case AArch64::LDRSWui:
336 case AArch64::LDURSWi:
337 return AArch64::LDPSWi;
348 case AArch64::LDRBBui:
349 return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui ||
350 StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
351 case AArch64::LDURBBi:
352 return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi ||
353 StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
354 case AArch64::LDRHHui:
355 return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui ||
356 StOpc == AArch64::STRXui;
357 case AArch64::LDURHHi:
358 return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi ||
359 StOpc == AArch64::STURXi;
360 case AArch64::LDRWui:
361 return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
362 case AArch64::LDURWi:
363 return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
364 case AArch64::LDRXui:
365 return StOpc == AArch64::STRXui;
366 case AArch64::LDURXi:
367 return StOpc == AArch64::STURXi;
375 case AArch64::STRSui:
376 return AArch64::STRSpre;
377 case AArch64::STRDui:
378 return AArch64::STRDpre;
379 case AArch64::STRQui:
380 return AArch64::STRQpre;
381 case AArch64::STRBBui:
382 return AArch64::STRBBpre;
383 case AArch64::STRHHui:
384 return AArch64::STRHHpre;
385 case AArch64::STRWui:
386 return AArch64::STRWpre;
387 case AArch64::STRXui:
388 return AArch64::STRXpre;
389 case AArch64::LDRSui:
390 return AArch64::LDRSpre;
391 case AArch64::LDRDui:
392 return AArch64::LDRDpre;
393 case AArch64::LDRQui:
394 return AArch64::LDRQpre;
395 case AArch64::LDRBBui:
396 return AArch64::LDRBBpre;
397 case AArch64::LDRHHui:
398 return AArch64::LDRHHpre;
399 case AArch64::LDRWui:
400 return AArch64::LDRWpre;
401 case AArch64::LDRXui:
402 return AArch64::LDRXpre;
403 case AArch64::LDRSWui:
404 return AArch64::LDRSWpre;
406 return AArch64::LDPSpre;
407 case AArch64::LDPSWi:
408 return AArch64::LDPSWpre;
410 return AArch64::LDPDpre;
412 return AArch64::LDPQpre;
414 return AArch64::LDPWpre;
416 return AArch64::LDPXpre;
418 return AArch64::STPSpre;
420 return AArch64::STPDpre;
422 return AArch64::STPQpre;
424 return AArch64::STPWpre;
426 return AArch64::STPXpre;
434 case AArch64::STRSui:
435 return AArch64::STRSpost;
436 case AArch64::STRDui:
437 return AArch64::STRDpost;
438 case AArch64::STRQui:
439 return AArch64::STRQpost;
440 case AArch64::STRBBui:
441 return AArch64::STRBBpost;
442 case AArch64::STRHHui:
443 return AArch64::STRHHpost;
444 case AArch64::STRWui:
445 return AArch64::STRWpost;
446 case AArch64::STRXui:
447 return AArch64::STRXpost;
448 case AArch64::LDRSui:
449 return AArch64::LDRSpost;
450 case AArch64::LDRDui:
451 return AArch64::LDRDpost;
452 case AArch64::LDRQui:
453 return AArch64::LDRQpost;
454 case AArch64::LDRBBui:
455 return AArch64::LDRBBpost;
456 case AArch64::LDRHHui:
457 return AArch64::LDRHHpost;
458 case AArch64::LDRWui:
459 return AArch64::LDRWpost;
460 case AArch64::LDRXui:
461 return AArch64::LDRXpost;
462 case AArch64::LDRSWui:
463 return AArch64::LDRSWpost;
465 return AArch64::LDPSpost;
466 case AArch64::LDPSWi:
467 return AArch64::LDPSWpost;
469 return AArch64::LDPDpost;
471 return AArch64::LDPQpost;
473 return AArch64::LDPWpost;
475 return AArch64::LDPXpost;
477 return AArch64::STPSpost;
479 return AArch64::STPDpost;
481 return AArch64::STPQpost;
483 return AArch64::STPWpost;
485 return AArch64::STPXpost;
494 case AArch64::LDPSWi:
509 unsigned PairedRegOp = 0) {
510 assert(PairedRegOp < 2 &&
"Unexpected register operand idx.");
537 return (UnscaledStOffset <= UnscaledLdOffset) &&
538 (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
543 return (Opc == AArch64::STRWui || Opc == AArch64::STURWi ||
544 isNarrowStore(Opc)) &&
551 const LdStPairFlags &
Flags) {
553 "Expected promotable zero stores.");
561 if (NextI == MergeMI)
564 unsigned Opc = I->getOpcode();
565 bool IsScaled = !
TII->isUnscaledLdSt(Opc);
568 bool MergeForward = Flags.getMergeForward();
588 assert(((OffsetImm & 1) == 0) &&
"Unexpected offset to merge");
597 .addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR)
600 .setMemRefs(I->mergeMemRefsWith(*MergeMI));
603 DEBUG(
dbgs() <<
"Creating wider store. Replacing instructions:\n ");
607 DEBUG(
dbgs() <<
" with instruction:\n ");
612 I->eraseFromParent();
613 MergeMI->eraseFromParent();
620 const LdStPairFlags &Flags) {
630 int SExtIdx = Flags.getSExtIdx();
633 bool IsUnscaled =
TII->isUnscaledLdSt(Opc);
634 int OffsetStride = IsUnscaled ?
getMemScale(*I) : 1;
636 bool MergeForward = Flags.getMergeForward();
647 bool PairedIsUnscaled =
TII->isUnscaledLdSt(Paired->getOpcode());
648 if (IsUnscaled != PairedIsUnscaled) {
653 if (PairedIsUnscaled) {
657 "Offset should be a multiple of the stride!");
658 PairedOffset /= MemSize;
660 PairedOffset *= MemSize;
666 if (Offset == PairedOffset + OffsetStride) {
673 SExtIdx = (SExtIdx + 1) % 2;
682 "Unscaled offset cannot be scaled.");
693 if (RegOp0.
isUse()) {
708 MI.clearRegisterKills(Reg, TRI);
714 .addOperand(BaseRegOp)
716 .setMemRefs(I->mergeMemRefsWith(*Paired));
720 DEBUG(
dbgs() <<
"Creating pair load/store. Replacing instructions:\n ");
724 DEBUG(
dbgs() <<
" with instruction:\n ");
733 unsigned DstRegX = DstMO.
getReg();
735 unsigned DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32);
751 BuildMI(*MBB, InsertionPoint, DL,
TII->get(AArch64::SBFMXri), DstRegX)
764 I->eraseFromParent();
765 Paired->eraseFromParent();
780 bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);
783 TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) &&
784 "Unexpected RegClass");
787 if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) {
790 if (StRt == LdRt && LoadSize == 8) {
791 StoreI->clearRegisterKills(StRt, TRI);
792 DEBUG(
dbgs() <<
"Remove load instruction:\n ");
795 LoadI->eraseFromParent();
800 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
801 TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)
802 .addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)
808 if (!Subtarget->isLittleEndian())
810 bool IsUnscaled =
TII->isUnscaledLdSt(*LoadI);
811 assert(IsUnscaled ==
TII->isUnscaledLdSt(*StoreI) &&
812 "Unsupported ld/st match");
813 assert(LoadSize <= StoreSize &&
"Invalid load size");
814 int UnscaledLdOffset = IsUnscaled
817 int UnscaledStOffset = IsUnscaled
820 int Width = LoadSize * 8;
821 int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
822 int Imms = Immr + Width - 1;
823 unsigned DestReg = IsStoreXReg
824 ? TRI->getMatchingSuperReg(LdRt, AArch64::sub_32,
825 &AArch64::GPR64RegClass)
828 assert((UnscaledLdOffset >= UnscaledStOffset &&
829 (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
832 Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
833 Imms = Immr + Width - 1;
834 if (UnscaledLdOffset == UnscaledStOffset) {
835 uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12)
841 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
842 TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),
848 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
849 TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri),
860 MI.clearRegisterKills(StRt, TRI);
862 DEBUG(
dbgs() <<
"Promoting load by replacing :\n ");
866 DEBUG(
dbgs() <<
" with instructions:\n ");
873 LoadI->eraseFromParent();
888 unsigned Reg = MO.getReg();
893 if (Reg != AArch64::WZR && Reg != AArch64::XZR)
895 ModifiedRegs.
set(*AI);
897 assert(MO.isUse() &&
"Reg operand not a def and not a use?!?");
910 if (Offset % OffsetStride)
912 Offset /= OffsetStride;
914 return Offset <= 63 && Offset >= -64;
922 return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
948 bool AArch64LoadStoreOpt::findMatchingStore(
963 ModifiedRegs.reset();
996 if (ModifiedRegs[BaseReg])
1002 }
while (MBBI != B && Count < Limit);
1009 LdStPairFlags &Flags,
1018 "FirstMI shouldn't get here if either of these checks are true.");
1028 bool IsValidLdStrOpc, PairIsValidLdStrOpc;
1030 assert(IsValidLdStrOpc &&
1031 "Given Opc should be a Load or Store with an immediate");
1034 Flags.setSExtIdx(NonSExtOpc == (
unsigned)OpcA ? 1 : 0);
1040 if (!PairIsValidLdStrOpc)
1045 if (isNarrowStore(OpcA) || isNarrowStore(OpcB))
1059 LdStPairFlags &Flags,
unsigned Limit,
1060 bool FindNarrowMerge) {
1067 bool IsUnscaled =
TII->isUnscaledLdSt(FirstMI);
1071 int OffsetStride = IsUnscaled ?
getMemScale(FirstMI) : 1;
1076 ModifiedRegs.reset();
1082 for (
unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) {
1090 Flags.setSExtIdx(-1);
1102 bool MIIsUnscaled =
TII->isUnscaledLdSt(MI);
1103 if (IsUnscaled != MIIsUnscaled) {
1111 if (MIOffset % MemSize) {
1116 MIOffset /= MemSize;
1118 MIOffset *= MemSize;
1122 if (BaseReg == MIBaseReg && ((Offset == MIOffset + OffsetStride) ||
1123 (Offset + OffsetStride == MIOffset))) {
1124 int MinOffset = Offset < MIOffset ? Offset : MIOffset;
1125 if (FindNarrowMerge) {
1130 if ((!IsUnscaled &&
alignTo(MinOffset, 2) != MinOffset) ||
1149 if (IsUnscaled && (
alignTo(MinOffset, OffsetStride) != MinOffset)) {
1171 Flags.setMergeForward(
false);
1182 Flags.setMergeForward(
true);
1200 if (ModifiedRegs[BaseReg])
1214 assert((Update->getOpcode() == AArch64::ADDXri ||
1215 Update->getOpcode() == AArch64::SUBXri) &&
1216 "Unexpected base register update instruction to merge!");
1221 if (++NextI == Update)
1224 int Value = Update->getOperand(2).getImm();
1226 "Can't merge 1 << 12 offset into pre-/post-indexed load / store");
1227 if (Update->getOpcode() == AArch64::SUBXri)
1235 MIB =
BuildMI(*I->getParent(),
I, I->getDebugLoc(),
TII->get(NewOpc))
1240 .setMemRefs(I->memoperands_begin(), I->memoperands_end());
1244 MIB =
BuildMI(*I->getParent(),
I, I->getDebugLoc(),
TII->get(NewOpc))
1249 .addImm(Value / Scale)
1250 .setMemRefs(I->memoperands_begin(), I->memoperands_end());
1255 DEBUG(
dbgs() <<
"Creating pre-indexed load/store.");
1257 DEBUG(
dbgs() <<
"Creating post-indexed load/store.");
1258 DEBUG(
dbgs() <<
" Replacing instructions:\n ");
1262 DEBUG(
dbgs() <<
" with instruction:\n ");
1267 I->eraseFromParent();
1268 Update->eraseFromParent();
1273 bool AArch64LoadStoreOpt::isMatchingUpdateInsn(
MachineInstr &MemMI,
1275 unsigned BaseReg,
int Offset) {
1279 case AArch64::SUBXri:
1280 case AArch64::ADDXri:
1298 UpdateOffset = -UpdateOffset;
1302 if (!IsPairedInsn && (UpdateOffset > 255 || UpdateOffset < -256))
1310 if (UpdateOffset % Scale != 0)
1313 int ScaledOffset = UpdateOffset / Scale;
1314 if (ScaledOffset > 63 || ScaledOffset < -64)
1320 if (!Offset || Offset == UpdateOffset)
1339 if (MIUnscaledOffset != UnscaledOffset)
1345 for (
unsigned i = 0, e = IsPairedInsn ? 2 : 1;
i != e; ++
i) {
1347 if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
1353 ModifiedRegs.reset();
1356 for (
unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) {
1365 if (isMatchingUpdateInsn(*I, MI, BaseReg, UnscaledOffset))
1373 if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg])
1391 if (MBBI == B || Offset != 0)
1396 for (
unsigned i = 0, e = IsPairedInsn ? 2 : 1;
i != e; ++
i) {
1398 if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
1404 ModifiedRegs.reset();
1417 if (isMatchingUpdateInsn(*I, MI, BaseReg, Offset))
1425 if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg])
1427 }
while (MBBI != B && Count < Limit);
1431 bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
1445 if (findMatchingStore(MBBI,
LdStLimit, StoreI)) {
1446 ++NumLoadsFromStoresPromoted;
1450 MBBI = promoteLoadFromStore(MBBI, StoreI);
1457 bool AArch64LoadStoreOpt::tryToMergeZeroStInst(
1463 if (!
TII->isCandidateToMergeOrPair(MI))
1467 LdStPairFlags
Flags;
1469 findMatchingInsn(MBBI, Flags,
LdStLimit,
true);
1471 ++NumZeroStoresPromoted;
1475 MBBI = mergeNarrowZeroStores(MBBI, MergeMI, Flags);
1487 if (!
TII->isCandidateToMergeOrPair(MI))
1493 bool IsUnscaled =
TII->isUnscaledLdSt(MI);
1495 int OffsetStride = IsUnscaled ?
getMemScale(MI) : 1;
1498 Offset -= OffsetStride;
1503 LdStPairFlags
Flags;
1505 findMatchingInsn(MBBI, Flags,
LdStLimit,
false);
1508 if (
TII->isUnscaledLdSt(MI))
1509 ++NumUnscaledPairCreated;
1512 MBBI = mergePairedInsns(MBBI, Paired, Flags);
1519 bool EnableNarrowZeroStOpt) {
1520 bool Modified =
false;
1540 case AArch64::LDRBBui:
1541 case AArch64::LDRHHui:
1542 case AArch64::LDRWui:
1543 case AArch64::LDRXui:
1545 case AArch64::LDURBBi:
1546 case AArch64::LDURHHi:
1547 case AArch64::LDURWi:
1548 case AArch64::LDURXi: {
1549 if (tryToPromoteLoadFromStore(MBBI)) {
1570 EnableNarrowZeroStOpt && MBBI !=
E;) {
1572 if (tryToMergeZeroStInst(MBBI)) {
1589 if (
TII->isPairableLdStInst(*MBBI) && tryToPairLdStInst(MBBI))
1613 case AArch64::STRSui:
1614 case AArch64::STRDui:
1615 case AArch64::STRQui:
1616 case AArch64::STRXui:
1617 case AArch64::STRWui:
1618 case AArch64::STRHHui:
1619 case AArch64::STRBBui:
1620 case AArch64::LDRSui:
1621 case AArch64::LDRDui:
1622 case AArch64::LDRQui:
1623 case AArch64::LDRXui:
1624 case AArch64::LDRWui:
1625 case AArch64::LDRHHui:
1626 case AArch64::LDRBBui:
1628 case AArch64::STURSi:
1629 case AArch64::STURDi:
1630 case AArch64::STURQi:
1631 case AArch64::STURWi:
1632 case AArch64::STURXi:
1633 case AArch64::LDURSi:
1634 case AArch64::LDURDi:
1635 case AArch64::LDURQi:
1636 case AArch64::LDURWi:
1637 case AArch64::LDURXi:
1639 case AArch64::LDPSi:
1640 case AArch64::LDPSWi:
1641 case AArch64::LDPDi:
1642 case AArch64::LDPQi:
1643 case AArch64::LDPWi:
1644 case AArch64::LDPXi:
1645 case AArch64::STPSi:
1646 case AArch64::STPDi:
1647 case AArch64::STPQi:
1648 case AArch64::STPWi:
1649 case AArch64::STPXi: {
1661 findMatchingUpdateInsnForward(MBBI, 0,
UpdateLimit);
1664 MBBI = mergeUpdateInsn(MBBI, Update,
false);
1671 if (
TII->isUnscaledLdSt(Opc)) {
1681 Update = findMatchingUpdateInsnBackward(MBBI,
UpdateLimit);
1684 MBBI = mergeUpdateInsn(MBBI, Update,
true);
1699 Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset,
UpdateLimit);
1702 MBBI = mergeUpdateInsn(MBBI, Update,
true);
1729 ModifiedRegs.resize(TRI->getNumRegs());
1730 UsedRegs.resize(TRI->getNumRegs());
1732 bool Modified =
false;
1733 bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign();
1734 for (
auto &MBB : Fn)
1735 Modified |= optimizeBlock(MBB, enableNarrowZeroStOpt);
1753 return new AArch64LoadStoreOpt();
void push_back(const T &Elt)
INITIALIZE_PASS(AArch64LoadStoreOpt,"aarch64-ldst-opt", AARCH64_LOAD_STORE_OPT_NAME, false, false) static bool isNarrowStore(unsigned Opc)
const AArch64RegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
STATISTIC(NumFunctions,"Total number of functions")
void initializeAArch64LoadStoreOptPass(PassRegistry &)
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
void setBitsNotInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
setBitsNotInMask - Add a bit to this vector for every '0' bit in Mask.
static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride)
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
An instruction for reading from memory.
static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst, MachineInstr &StoreInst, const AArch64InstrInfo *TII)
iterator_range< mop_iterator > operands()
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
bool isUnscaledLdSt(unsigned Opc) const
Return true if this is an unscaled load/store.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
struct fuzzer::@269 Flags
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
static unsigned getMatchingPairOpcode(unsigned Opc)
const HexagonInstrInfo * TII
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineBasicBlock iterator that automatically skips over MIs that are inside bundles (i...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void setImplicit(bool Val=true)
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Reg
All possible values of the reg field in the ModR/M byte.
static bool mayAlias(MachineInstr &MIa, MachineInstr &MIb, const AArch64InstrInfo *TII)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
static bool isPairedLdSt(const MachineInstr &MI)
static unsigned isMatchingStore(MachineInstr &LoadInst, MachineInstr &StoreInst)
static GCRegistry::Add< OcamlGC > B("ocaml","ocaml 3.10-compatible GC")
An instruction for storing to memory.
FunctionPass * createAArch64LoadStoreOptimizationPass()
createAArch64LoadStoreOptimizationPass - returns an instance of the load / store optimization pass...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
const MachineBasicBlock * getParent() const
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static int getMemScale(MachineInstr &MI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
initializer< Ty > init(const Ty &Val)
#define AARCH64_LOAD_STORE_OPT_NAME
static unsigned getMatchingNonSExtOpcode(unsigned Opc, bool *IsValidLdStrOpc=nullptr)
const MachineOperand & getOperand(unsigned i) const
MCRegAliasIterator enumerates all registers aliasing Reg.
static unsigned getMatchingWideOpcode(unsigned Opc)
static cl::opt< unsigned > LdStLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden)
FunctionPass class - This class is used to implement most global optimizations.
self_iterator getIterator()
bool areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA=nullptr) const override
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setIsKill(bool Val=true)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
MachineOperand class - Representation of each machine instruction operand.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
static bool isPromotableZeroStoreInst(MachineInstr &MI)
bool isTransient() const
Return true if this is a transient instruction that is either very likely to be eliminated during reg...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
MachineFunctionProperties & set(Property P)
Representation of each machine instruction.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
void setReg(unsigned Reg)
Change the register this operand corresponds to.
bool isCall(QueryType Type=AnyInBundle) const
static const MachineOperand & getLdStRegOp(const MachineInstr &MI, unsigned PairedRegOp=0)
bool isLdStPairSuppressed(const MachineInstr &MI) const
Return true if pairing the given load or store is hinted to be unprofitable.
static unsigned getPreIndexedOpcode(unsigned Opc)
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
unsigned getReg() const
getReg - Returns the register number.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getPostIndexedOpcode(unsigned Opc)
LLVM Value Representation.
static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, LdStPairFlags &Flags, const AArch64InstrInfo *TII)
StringRef - Represent a constant reference to a string, i.e.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
static cl::opt< unsigned > UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden)
Properties which a MachineFunction may have at a given point in time.
static void trackRegDefsUses(const MachineInstr &MI, BitVector &ModifiedRegs, BitVector &UsedRegs, const TargetRegisterInfo *TRI)
trackRegDefsUses - Remember what registers the specified instruction uses and modifies.