56#define DEBUG_TYPE "aarch64-ldst-opt"
58STATISTIC(NumPairCreated,
"Number of load/store pair instructions generated");
59STATISTIC(NumPostFolded,
"Number of post-index updates folded");
60STATISTIC(NumPreFolded,
"Number of pre-index updates folded");
62 "Number of load/store from unscaled generated");
63STATISTIC(NumZeroStoresPromoted,
"Number of narrow zero stores promoted");
64STATISTIC(NumLoadsFromStoresPromoted,
"Number of loads from stores promoted");
65STATISTIC(NumFailedAlignmentCheck,
"Number of load/store pair transformation "
66 "not passed the alignment check");
68 "Number of const offset of index address folded");
71 "Controls which pairs are considered for renaming");
91#define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
95using LdStPairFlags =
struct LdStPairFlags {
99 bool MergeForward =
false;
110 std::optional<MCPhysReg> RenameReg;
112 LdStPairFlags() =
default;
114 void setMergeForward(
bool V =
true) { MergeForward = V; }
115 bool getMergeForward()
const {
return MergeForward; }
117 void setSExtIdx(
int V) { SExtIdx = V; }
118 int getSExtIdx()
const {
return SExtIdx; }
120 void setRenameReg(
MCPhysReg R) { RenameReg = R; }
121 void clearRenameReg() { RenameReg = std::nullopt; }
122 std::optional<MCPhysReg> getRenameReg()
const {
return RenameReg; }
150 LdStPairFlags &Flags,
152 bool FindNarrowMerge);
163 const LdStPairFlags &Flags);
169 const LdStPairFlags &Flags);
181 int UnscaledOffset,
unsigned Limit);
199 unsigned BaseReg,
int Offset);
202 unsigned IndexReg,
unsigned &
Offset);
235 int UpperLoadIdx,
int Accumulated);
243 MachineFunctionProperties::Property::NoVRegs);
249char AArch64LoadStoreOpt::ID = 0;
256static
bool isNarrowStore(
unsigned Opc) {
260 case AArch64::STRBBui:
261 case AArch64::STURBBi:
262 case AArch64::STRHHui:
263 case AArch64::STURHHi:
271 switch (
MI.getOpcode()) {
277 case AArch64::STZ2Gi:
283 bool *IsValidLdStrOpc =
nullptr) {
285 *IsValidLdStrOpc =
true;
289 *IsValidLdStrOpc =
false;
290 return std::numeric_limits<unsigned>::max();
291 case AArch64::STRDui:
292 case AArch64::STURDi:
293 case AArch64::STRDpre:
294 case AArch64::STRQui:
295 case AArch64::STURQi:
296 case AArch64::STRQpre:
297 case AArch64::STRBBui:
298 case AArch64::STURBBi:
299 case AArch64::STRHHui:
300 case AArch64::STURHHi:
301 case AArch64::STRWui:
302 case AArch64::STRWpre:
303 case AArch64::STURWi:
304 case AArch64::STRXui:
305 case AArch64::STRXpre:
306 case AArch64::STURXi:
307 case AArch64::LDRDui:
308 case AArch64::LDURDi:
309 case AArch64::LDRDpre:
310 case AArch64::LDRQui:
311 case AArch64::LDURQi:
312 case AArch64::LDRQpre:
313 case AArch64::LDRWui:
314 case AArch64::LDURWi:
315 case AArch64::LDRWpre:
316 case AArch64::LDRXui:
317 case AArch64::LDURXi:
318 case AArch64::LDRXpre:
319 case AArch64::STRSui:
320 case AArch64::STURSi:
321 case AArch64::STRSpre:
322 case AArch64::LDRSui:
323 case AArch64::LDURSi:
324 case AArch64::LDRSpre:
326 case AArch64::LDRSWui:
327 return AArch64::LDRWui;
328 case AArch64::LDURSWi:
329 return AArch64::LDURWi;
330 case AArch64::LDRSWpre:
331 return AArch64::LDRWpre;
339 case AArch64::STRBBui:
340 return AArch64::STRHHui;
341 case AArch64::STRHHui:
342 return AArch64::STRWui;
343 case AArch64::STURBBi:
344 return AArch64::STURHHi;
345 case AArch64::STURHHi:
346 return AArch64::STURWi;
347 case AArch64::STURWi:
348 return AArch64::STURXi;
349 case AArch64::STRWui:
350 return AArch64::STRXui;
358 case AArch64::STRSui:
359 case AArch64::STURSi:
360 return AArch64::STPSi;
361 case AArch64::STRSpre:
362 return AArch64::STPSpre;
363 case AArch64::STRDui:
364 case AArch64::STURDi:
365 return AArch64::STPDi;
366 case AArch64::STRDpre:
367 return AArch64::STPDpre;
368 case AArch64::STRQui:
369 case AArch64::STURQi:
370 return AArch64::STPQi;
371 case AArch64::STRQpre:
372 return AArch64::STPQpre;
373 case AArch64::STRWui:
374 case AArch64::STURWi:
375 return AArch64::STPWi;
376 case AArch64::STRWpre:
377 return AArch64::STPWpre;
378 case AArch64::STRXui:
379 case AArch64::STURXi:
380 return AArch64::STPXi;
381 case AArch64::STRXpre:
382 return AArch64::STPXpre;
383 case AArch64::LDRSui:
384 case AArch64::LDURSi:
385 return AArch64::LDPSi;
386 case AArch64::LDRSpre:
387 return AArch64::LDPSpre;
388 case AArch64::LDRDui:
389 case AArch64::LDURDi:
390 return AArch64::LDPDi;
391 case AArch64::LDRDpre:
392 return AArch64::LDPDpre;
393 case AArch64::LDRQui:
394 case AArch64::LDURQi:
395 return AArch64::LDPQi;
396 case AArch64::LDRQpre:
397 return AArch64::LDPQpre;
398 case AArch64::LDRWui:
399 case AArch64::LDURWi:
400 return AArch64::LDPWi;
401 case AArch64::LDRWpre:
402 return AArch64::LDPWpre;
403 case AArch64::LDRXui:
404 case AArch64::LDURXi:
405 return AArch64::LDPXi;
406 case AArch64::LDRXpre:
407 return AArch64::LDPXpre;
408 case AArch64::LDRSWui:
409 case AArch64::LDURSWi:
410 return AArch64::LDPSWi;
411 case AArch64::LDRSWpre:
412 return AArch64::LDPSWpre;
423 case AArch64::LDRBBui:
424 return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui ||
425 StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
426 case AArch64::LDURBBi:
427 return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi ||
428 StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
429 case AArch64::LDRHHui:
430 return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui ||
431 StOpc == AArch64::STRXui;
432 case AArch64::LDURHHi:
433 return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi ||
434 StOpc == AArch64::STURXi;
435 case AArch64::LDRWui:
436 return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
437 case AArch64::LDURWi:
438 return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
439 case AArch64::LDRXui:
440 return StOpc == AArch64::STRXui;
441 case AArch64::LDURXi:
442 return StOpc == AArch64::STURXi;
454 case AArch64::STRSui:
455 return AArch64::STRSpre;
456 case AArch64::STRDui:
457 return AArch64::STRDpre;
458 case AArch64::STRQui:
459 return AArch64::STRQpre;
460 case AArch64::STRBBui:
461 return AArch64::STRBBpre;
462 case AArch64::STRHHui:
463 return AArch64::STRHHpre;
464 case AArch64::STRWui:
465 return AArch64::STRWpre;
466 case AArch64::STRXui:
467 return AArch64::STRXpre;
468 case AArch64::LDRSui:
469 return AArch64::LDRSpre;
470 case AArch64::LDRDui:
471 return AArch64::LDRDpre;
472 case AArch64::LDRQui:
473 return AArch64::LDRQpre;
474 case AArch64::LDRBBui:
475 return AArch64::LDRBBpre;
476 case AArch64::LDRHHui:
477 return AArch64::LDRHHpre;
478 case AArch64::LDRWui:
479 return AArch64::LDRWpre;
480 case AArch64::LDRXui:
481 return AArch64::LDRXpre;
482 case AArch64::LDRSWui:
483 return AArch64::LDRSWpre;
485 return AArch64::LDPSpre;
486 case AArch64::LDPSWi:
487 return AArch64::LDPSWpre;
489 return AArch64::LDPDpre;
491 return AArch64::LDPQpre;
493 return AArch64::LDPWpre;
495 return AArch64::LDPXpre;
497 return AArch64::STPSpre;
499 return AArch64::STPDpre;
501 return AArch64::STPQpre;
503 return AArch64::STPWpre;
505 return AArch64::STPXpre;
507 return AArch64::STGPreIndex;
509 return AArch64::STZGPreIndex;
511 return AArch64::ST2GPreIndex;
512 case AArch64::STZ2Gi:
513 return AArch64::STZ2GPreIndex;
515 return AArch64::STGPpre;
524 case AArch64::LDRBBroX:
525 return AArch64::LDRBBui;
533 case AArch64::STRSui:
534 case AArch64::STURSi:
535 return AArch64::STRSpost;
536 case AArch64::STRDui:
537 case AArch64::STURDi:
538 return AArch64::STRDpost;
539 case AArch64::STRQui:
540 case AArch64::STURQi:
541 return AArch64::STRQpost;
542 case AArch64::STRBBui:
543 return AArch64::STRBBpost;
544 case AArch64::STRHHui:
545 return AArch64::STRHHpost;
546 case AArch64::STRWui:
547 case AArch64::STURWi:
548 return AArch64::STRWpost;
549 case AArch64::STRXui:
550 case AArch64::STURXi:
551 return AArch64::STRXpost;
552 case AArch64::LDRSui:
553 case AArch64::LDURSi:
554 return AArch64::LDRSpost;
555 case AArch64::LDRDui:
556 case AArch64::LDURDi:
557 return AArch64::LDRDpost;
558 case AArch64::LDRQui:
559 case AArch64::LDURQi:
560 return AArch64::LDRQpost;
561 case AArch64::LDRBBui:
562 return AArch64::LDRBBpost;
563 case AArch64::LDRHHui:
564 return AArch64::LDRHHpost;
565 case AArch64::LDRWui:
566 case AArch64::LDURWi:
567 return AArch64::LDRWpost;
568 case AArch64::LDRXui:
569 case AArch64::LDURXi:
570 return AArch64::LDRXpost;
571 case AArch64::LDRSWui:
572 return AArch64::LDRSWpost;
574 return AArch64::LDPSpost;
575 case AArch64::LDPSWi:
576 return AArch64::LDPSWpost;
578 return AArch64::LDPDpost;
580 return AArch64::LDPQpost;
582 return AArch64::LDPWpost;
584 return AArch64::LDPXpost;
586 return AArch64::STPSpost;
588 return AArch64::STPDpost;
590 return AArch64::STPQpost;
592 return AArch64::STPWpost;
594 return AArch64::STPXpost;
596 return AArch64::STGPostIndex;
598 return AArch64::STZGPostIndex;
600 return AArch64::ST2GPostIndex;
601 case AArch64::STZ2Gi:
602 return AArch64::STZ2GPostIndex;
604 return AArch64::STGPpost;
611 unsigned OpcB =
MI.getOpcode();
616 case AArch64::STRSpre:
617 return (OpcB == AArch64::STRSui) || (OpcB == AArch64::STURSi);
618 case AArch64::STRDpre:
619 return (OpcB == AArch64::STRDui) || (OpcB == AArch64::STURDi);
620 case AArch64::STRQpre:
621 return (OpcB == AArch64::STRQui) || (OpcB == AArch64::STURQi);
622 case AArch64::STRWpre:
623 return (OpcB == AArch64::STRWui) || (OpcB == AArch64::STURWi);
624 case AArch64::STRXpre:
625 return (OpcB == AArch64::STRXui) || (OpcB == AArch64::STURXi);
626 case AArch64::LDRSpre:
627 return (OpcB == AArch64::LDRSui) || (OpcB == AArch64::LDURSi);
628 case AArch64::LDRDpre:
629 return (OpcB == AArch64::LDRDui) || (OpcB == AArch64::LDURDi);
630 case AArch64::LDRQpre:
631 return (OpcB == AArch64::LDRQui) || (OpcB == AArch64::LDURQi);
632 case AArch64::LDRWpre:
633 return (OpcB == AArch64::LDRWui) || (OpcB == AArch64::LDURWi);
634 case AArch64::LDRXpre:
635 return (OpcB == AArch64::LDRXui) || (OpcB == AArch64::LDURXi);
636 case AArch64::LDRSWpre:
637 return (OpcB == AArch64::LDRSWui) || (OpcB == AArch64::LDURSWi);
643 int &MinOffset,
int &MaxOffset) {
661 unsigned PairedRegOp = 0) {
662 assert(PairedRegOp < 2 &&
"Unexpected register operand idx.");
668 return MI.getOperand(
Idx);
677 int UnscaledStOffset =
681 int UnscaledLdOffset =
685 return (UnscaledStOffset <= UnscaledLdOffset) &&
686 (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
690 unsigned Opc =
MI.getOpcode();
691 return (Opc == AArch64::STRWui || Opc == AArch64::STURWi ||
692 isNarrowStore(Opc)) &&
697 switch (
MI.getOpcode()) {
701 case AArch64::LDRBBui:
702 case AArch64::LDRHHui:
703 case AArch64::LDRWui:
704 case AArch64::LDRXui:
706 case AArch64::LDURBBi:
707 case AArch64::LDURHHi:
708 case AArch64::LDURWi:
709 case AArch64::LDURXi:
715 unsigned Opc =
MI.getOpcode();
720 case AArch64::STRSui:
721 case AArch64::STRDui:
722 case AArch64::STRQui:
723 case AArch64::STRXui:
724 case AArch64::STRWui:
725 case AArch64::STRHHui:
726 case AArch64::STRBBui:
727 case AArch64::LDRSui:
728 case AArch64::LDRDui:
729 case AArch64::LDRQui:
730 case AArch64::LDRXui:
731 case AArch64::LDRWui:
732 case AArch64::LDRHHui:
733 case AArch64::LDRBBui:
737 case AArch64::STZ2Gi:
740 case AArch64::STURSi:
741 case AArch64::STURDi:
742 case AArch64::STURQi:
743 case AArch64::STURWi:
744 case AArch64::STURXi:
745 case AArch64::LDURSi:
746 case AArch64::LDURDi:
747 case AArch64::LDURQi:
748 case AArch64::LDURWi:
749 case AArch64::LDURXi:
752 case AArch64::LDPSWi:
772 unsigned Opc =
MI.getOpcode();
778 case AArch64::LDRBBroX:
788 case AArch64::ORRWrs:
789 case AArch64::ADDWri:
797 const LdStPairFlags &Flags) {
799 "Expected promotable zero stores.");
807 if (NextI == MergeMI)
810 unsigned Opc =
I->getOpcode();
811 unsigned MergeMIOpc = MergeMI->getOpcode();
812 bool IsScaled = !
TII->hasUnscaledLdStOffset(Opc);
813 bool IsMergedMIScaled = !
TII->hasUnscaledLdStOffset(MergeMIOpc);
814 int OffsetStride = IsScaled ?
TII->getMemScale(*
I) : 1;
815 int MergeMIOffsetStride = IsMergedMIScaled ?
TII->getMemScale(*MergeMI) : 1;
817 bool MergeForward =
Flags.getMergeForward();
828 int64_t IOffsetInBytes =
830 int64_t MIOffsetInBytes =
835 if (IOffsetInBytes > MIOffsetInBytes)
836 OffsetImm = MIOffsetInBytes;
838 OffsetImm = IOffsetInBytes;
841 bool FinalIsScaled = !
TII->hasUnscaledLdStOffset(NewOpcode);
845 int NewOffsetStride = FinalIsScaled ?
TII->getMemScale(NewOpcode) : 1;
846 assert(((OffsetImm % NewOffsetStride) == 0) &&
847 "Offset should be a multiple of the store memory scale");
848 OffsetImm = OffsetImm / NewOffsetStride;
856 .
addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR)
860 .setMIFlags(
I->mergeFlagsWith(*MergeMI));
863 LLVM_DEBUG(
dbgs() <<
"Creating wider store. Replacing instructions:\n ");
872 I->eraseFromParent();
873 MergeMI->eraseFromParent();
883 auto MBB =
MI.getParent();
891 return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() &&
892 TRI->regsOverlap(MOP.getReg(), DefReg);
906 if (MOP.isReg() && MOP.isKill())
910 if (MOP.isReg() && !MOP.isKill())
911 Units.
addReg(MOP.getReg());
917 const LdStPairFlags &Flags) {
927 int SExtIdx =
Flags.getSExtIdx();
930 bool IsUnscaled =
TII->hasUnscaledLdStOffset(Opc);
931 int OffsetStride = IsUnscaled ?
TII->getMemScale(*
I) : 1;
933 bool MergeForward =
Flags.getMergeForward();
935 std::optional<MCPhysReg> RenameReg =
Flags.getRenameReg();
938 DefinedInBB.addReg(*RenameReg);
942 auto GetMatchingSubReg =
945 TRI->sub_and_superregs_inclusive(*RenameReg)) {
946 if (
C->contains(SubOrSuper))
953 [
this, RegToRename, GetMatchingSubReg, MergeForward](
MachineInstr &
MI,
956 bool SeenDef =
false;
957 for (
unsigned OpIdx = 0; OpIdx <
MI.getNumOperands(); ++OpIdx) {
962 (!MergeForward || !SeenDef ||
964 TRI->regsOverlap(MOP.
getReg(), RegToRename)) {
967 "Need renamable operands");
970 MI.getRegClassConstraint(OpIdx,
TII,
TRI))
971 MatchingReg = GetMatchingSubReg(RC);
975 MatchingReg = GetMatchingSubReg(
976 TRI->getMinimalPhysRegClass(MOP.
getReg()));
983 for (
unsigned OpIdx = 0; OpIdx <
MI.getNumOperands(); ++OpIdx) {
986 TRI->regsOverlap(MOP.
getReg(), RegToRename)) {
989 "Need renamable operands");
992 MI.getRegClassConstraint(OpIdx,
TII,
TRI))
993 MatchingReg = GetMatchingSubReg(RC);
995 MatchingReg = GetMatchingSubReg(
996 TRI->getMinimalPhysRegClass(MOP.
getReg()));
997 assert(MatchingReg != AArch64::NoRegister &&
998 "Cannot find matching regs for renaming");
1007 UINT32_MAX, UpdateMIs);
1020 RegToCheck = RegToRename;
1023 MergeForward ? std::next(
I) :
I,
1024 MergeForward ? std::next(Paired) : Paired))
1027 return !MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1029 !TRI->regsOverlap(MOP.getReg(), RegToCheck);
1031 "Rename register used between paired instruction, trashing the "
1047 bool PairedIsUnscaled =
TII->hasUnscaledLdStOffset(Paired->getOpcode());
1048 if (IsUnscaled != PairedIsUnscaled) {
1052 int MemSize =
TII->getMemScale(*Paired);
1053 if (PairedIsUnscaled) {
1056 assert(!(PairedOffset %
TII->getMemScale(*Paired)) &&
1057 "Offset should be a multiple of the stride!");
1058 PairedOffset /= MemSize;
1060 PairedOffset *= MemSize;
1068 if (
Offset == PairedOffset + OffsetStride &&
1076 SExtIdx = (SExtIdx + 1) % 2;
1084 assert(!(OffsetImm %
TII->getMemScale(*RtMI)) &&
1085 "Unscaled offset cannot be scaled.");
1086 OffsetImm /=
TII->getMemScale(*RtMI);
1095 MachineOperand &PairedRegOp = RtMI == &*Paired ? RegOp0 : RegOp1;
1097 if (RegOp0.
isUse()) {
1098 if (!MergeForward) {
1109 for (
auto It = std::next(
I); It != Paired && PairedRegOp.
isKill(); ++It)
1110 if (It->readsRegister(PairedRegOp.
getReg(),
TRI))
1119 MI.clearRegisterKills(Reg,
TRI);
1135 .setMIFlags(
I->mergeFlagsWith(*Paired));
1140 dbgs() <<
"Creating pair load/store. Replacing instructions:\n ");
1145 if (SExtIdx != -1) {
1155 Register DstRegW =
TRI->getSubReg(DstRegX, AArch64::sub_32);
1165 BuildMI(*
MBB, InsertionPoint,
DL,
TII->get(TargetOpcode::KILL), DstRegW)
1171 BuildMI(*
MBB, InsertionPoint,
DL,
TII->get(AArch64::SBFMXri), DstRegX)
1186 DefinedInBB.addReg(MOP.
getReg());
1189 I->eraseFromParent();
1190 Paired->eraseFromParent();
1199 next_nodbg(LoadI, LoadI->getParent()->end());
1201 int LoadSize =
TII->getMemScale(*LoadI);
1202 int StoreSize =
TII->getMemScale(*StoreI);
1206 bool IsStoreXReg =
TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);
1209 TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) &&
1210 "Unexpected RegClass");
1213 if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) {
1216 if (StRt == LdRt && LoadSize == 8) {
1218 LoadI->getIterator())) {
1219 if (
MI.killsRegister(StRt,
TRI)) {
1220 MI.clearRegisterKills(StRt,
TRI);
1227 LoadI->eraseFromParent();
1232 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1233 TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)
1234 .
addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)
1241 if (!Subtarget->isLittleEndian())
1243 bool IsUnscaled =
TII->hasUnscaledLdStOffset(*LoadI);
1244 assert(IsUnscaled ==
TII->hasUnscaledLdStOffset(*StoreI) &&
1245 "Unsupported ld/st match");
1246 assert(LoadSize <= StoreSize &&
"Invalid load size");
1247 int UnscaledLdOffset =
1251 int UnscaledStOffset =
1255 int Width = LoadSize * 8;
1258 LdRt, AArch64::sub_32, &AArch64::GPR64RegClass))
1261 assert((UnscaledLdOffset >= UnscaledStOffset &&
1262 (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
1265 int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
1266 int Imms = Immr + Width - 1;
1267 if (UnscaledLdOffset == UnscaledStOffset) {
1268 uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12)
1274 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1275 TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),
1282 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1283 TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri),
1295 if (
MI.killsRegister(StRt,
TRI)) {
1296 MI.clearRegisterKills(StRt,
TRI);
1311 LoadI->eraseFromParent();
1321 if (
Offset % OffsetStride)
1325 return Offset <= 63 && Offset >= -64;
1333 return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
1340 if (MIa.
mayAlias(AA, *MIb,
false)) {
1350bool AArch64LoadStoreOpt::findMatchingStore(
1365 ModifiedRegUnits.clear();
1366 UsedRegUnits.clear();
1375 if (!
MI.isTransient())
1401 if (!ModifiedRegUnits.available(BaseReg))
1407 }
while (
MBBI !=
B && Count < Limit);
1419 LdStPairFlags &Flags,
1422 if (
MI.hasOrderedMemoryRef() ||
TII->isLdStPairSuppressed(
MI))
1427 !
TII->isLdStPairSuppressed(FirstMI) &&
1428 "FirstMI shouldn't get here if either of these checks are true.");
1435 unsigned OpcB =
MI.getOpcode();
1446 bool IsValidLdStrOpc, PairIsValidLdStrOpc;
1448 assert(IsValidLdStrOpc &&
1449 "Given Opc should be a Load or Store with an immediate");
1452 Flags.setSExtIdx(NonSExtOpc == (
unsigned)OpcA ? 1 : 0);
1458 if (!PairIsValidLdStrOpc)
1463 if (isNarrowStore(OpcA) || isNarrowStore(OpcB))
1473 return TII->hasUnscaledLdStOffset(OpcA) !=
TII->hasUnscaledLdStOffset(OpcB) &&
1482 auto *RegClass =
TRI->getMinimalPhysRegClass(MOP.
getReg());
1489 if (RegClass->HasDisjunctSubRegs) {
1492 <<
" Cannot rename operands with multiple disjunct subregisters ("
1503 return TRI->isSuperOrSubRegisterEq(
1526 return MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1527 MOP.isImplicit() && MOP.isKill() &&
1528 TRI->regsOverlap(RegToRename, MOP.getReg());
1534 bool FoundDef =
false;
1565 if (
MI.isPseudo()) {
1566 LLVM_DEBUG(
dbgs() <<
" Cannot rename pseudo/bundle instruction\n");
1570 for (
auto &MOP :
MI.operands()) {
1572 !
TRI->regsOverlap(MOP.
getReg(), RegToRename))
1582 for (
auto &MOP :
MI.operands()) {
1584 !
TRI->regsOverlap(MOP.
getReg(), RegToRename))
1601 LLVM_DEBUG(
dbgs() <<
" Did not find definition for register in BB\n");
1629 LLVM_DEBUG(dbgs() <<
"Checking " << MI);
1631 if (MI.getFlag(MachineInstr::FrameSetup)) {
1632 LLVM_DEBUG(dbgs() <<
" Cannot rename framesetup instructions "
1637 for (
auto &MOP :
MI.operands()) {
1638 if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1639 !TRI->regsOverlap(MOP.getReg(), RegToRename))
1641 if (!canRenameMOP(MOP, TRI)) {
1642 LLVM_DEBUG(dbgs() <<
" Cannot rename " << MOP <<
" in " << MI);
1668 auto AnySubOrSuperRegCalleePreserved = [&MF,
TRI](
MCPhysReg PR) {
1669 return any_of(
TRI->sub_and_superregs_inclusive(PR),
1671 return TRI->isCalleeSavedPhysReg(SubOrSuper, MF);
1677 auto CanBeUsedForAllClasses = [&RequiredClasses,
TRI](
MCPhysReg PR) {
1680 TRI->sub_and_superregs_inclusive(PR),
1681 [
C](
MCPhysReg SubOrSuper) { return C->contains(SubOrSuper); });
1685 auto *RegClass =
TRI->getMinimalPhysRegClass(Reg);
1688 !
RegInfo.isReserved(PR) && !AnySubOrSuperRegCalleePreserved(PR) &&
1689 CanBeUsedForAllClasses(PR)) {
1697 <<
TRI->getRegClassName(RegClass) <<
"\n");
1698 return std::nullopt;
1709 std::optional<MCPhysReg> RenameReg;
1718 const bool IsLoad = FirstMI.
mayLoad();
1720 if (!MaybeCanRename) {
1723 RequiredClasses,
TRI)};
1729 if (*MaybeCanRename) {
1731 RequiredClasses,
TRI);
1740 LdStPairFlags &Flags,
unsigned Limit,
1741 bool FindNarrowMerge) {
1749 bool IsUnscaled =
TII->hasUnscaledLdStOffset(FirstMI);
1753 int OffsetStride = IsUnscaled ?
TII->getMemScale(FirstMI) : 1;
1756 std::optional<bool> MaybeCanRename;
1758 MaybeCanRename = {
false};
1764 Flags.clearRenameReg();
1768 ModifiedRegUnits.clear();
1769 UsedRegUnits.clear();
1775 for (
unsigned Count = 0;
MBBI != E && Count < Limit;
1784 if (!
MI.isTransient())
1787 Flags.setSExtIdx(-1);
1790 assert(
MI.mayLoadOrStore() &&
"Expected memory operation.");
1799 bool MIIsUnscaled =
TII->hasUnscaledLdStOffset(
MI);
1800 if (IsUnscaled != MIIsUnscaled) {
1804 int MemSize =
TII->getMemScale(
MI);
1808 if (MIOffset % MemSize) {
1814 MIOffset /= MemSize;
1816 MIOffset *= MemSize;
1822 if (BaseReg == MIBaseReg) {
1828 bool IsOutOfBounds = MIOffset !=
TII->getMemScale(
MI);
1829 bool IsBaseRegUsed = !UsedRegUnits.available(
1831 bool IsBaseRegModified = !ModifiedRegUnits.available(
1836 bool IsMIRegTheSame =
1839 if (IsOutOfBounds || IsBaseRegUsed || IsBaseRegModified ||
1847 if ((
Offset != MIOffset + OffsetStride) &&
1848 (
Offset + OffsetStride != MIOffset)) {
1857 if (FindNarrowMerge) {
1862 if ((!IsUnscaled &&
alignTo(MinOffset, 2) != MinOffset) ||
1879 <<
"keep looking.\n");
1885 if (IsUnscaled && (
alignTo(MinOffset, OffsetStride) != MinOffset)) {
1890 <<
"Offset doesn't fit due to alignment requirements, "
1891 <<
"keep looking.\n");
1902 if (!ModifiedRegUnits.available(BaseReg))
1905 const bool SameLoadReg =
MayLoad &&
TRI->isSuperOrSubRegisterEq(
1912 bool RtNotModified =
1914 bool RtNotUsed = !(
MI.mayLoad() && !SameLoadReg &&
1917 LLVM_DEBUG(
dbgs() <<
"Checking, can combine 2nd into 1st insn:\n"
1919 << (RtNotModified ?
"true" :
"false") <<
"\n"
1921 << (RtNotUsed ?
"true" :
"false") <<
"\n");
1923 if (RtNotModified && RtNotUsed && !
mayAlias(
MI, MemInsns, AA)) {
1928 std::optional<MCPhysReg> RenameReg =
1930 Reg, DefinedInBB, UsedInBetween,
1931 RequiredClasses,
TRI);
1937 <<
"keep looking.\n");
1940 Flags.setRenameReg(*RenameReg);
1943 Flags.setMergeForward(
false);
1945 Flags.clearRenameReg();
1956 LLVM_DEBUG(
dbgs() <<
"Checking, can combine 1st into 2nd insn:\n"
1958 <<
"' not modified: "
1959 << (RtNotModified ?
"true" :
"false") <<
"\n");
1961 if (RtNotModified && !
mayAlias(FirstMI, MemInsns, AA)) {
1963 Flags.setMergeForward(
true);
1964 Flags.clearRenameReg();
1969 MaybeCanRename, FirstMI,
MI, Reg, DefinedInBB, UsedInBetween,
1970 RequiredClasses,
TRI);
1972 Flags.setMergeForward(
true);
1973 Flags.setRenameReg(*RenameReg);
1974 MBBIWithRenameReg =
MBBI;
1977 LLVM_DEBUG(
dbgs() <<
"Unable to combine these instructions due to "
1978 <<
"interference in between, keep looking.\n");
1982 if (
Flags.getRenameReg())
1983 return MBBIWithRenameReg;
1997 if (!ModifiedRegUnits.available(BaseReg)) {
2003 if (
MI.mayLoadOrStore())
2011 assert((
MI.getOpcode() == AArch64::SUBXri ||
2012 MI.getOpcode() == AArch64::ADDXri) &&
2013 "Expected a register update instruction");
2014 auto End =
MI.getParent()->end();
2015 if (MaybeCFI ==
End ||
2016 MaybeCFI->getOpcode() != TargetOpcode::CFI_INSTRUCTION ||
2019 MI.getOperand(0).getReg() != AArch64::SP)
2023 unsigned CFIIndex = MaybeCFI->getOperand(0).getCFIIndex();
2038 assert((Update->getOpcode() == AArch64::ADDXri ||
2039 Update->getOpcode() == AArch64::SUBXri) &&
2040 "Unexpected base register update instruction to merge!");
2052 if (NextI == Update)
2055 int Value = Update->getOperand(2).getImm();
2057 "Can't merge 1 << 12 offset into pre-/post-indexed load / store");
2058 if (Update->getOpcode() == AArch64::SUBXri)
2064 int Scale, MinOffset, MaxOffset;
2068 MIB =
BuildMI(*
I->getParent(),
I,
I->getDebugLoc(),
TII->get(NewOpc))
2069 .
add(Update->getOperand(0))
2077 MIB =
BuildMI(*
I->getParent(),
I,
I->getDebugLoc(),
TII->get(NewOpc))
2078 .
add(Update->getOperand(0))
2107 I->eraseFromParent();
2108 Update->eraseFromParent();
2116 unsigned Offset,
int Scale) {
2117 assert((Update->getOpcode() == AArch64::MOVKWi) &&
2118 "Unexpected const mov instruction to merge!");
2123 unsigned Mask = (1 << 12) * Scale - 1;
2132 BuildMI(*
I->getParent(),
I,
I->getDebugLoc(),
TII->get(AArch64::ADDXri))
2140 MemMIB =
BuildMI(*
I->getParent(),
I,
I->getDebugLoc(),
TII->get(NewOpc))
2148 ++NumConstOffsetFolded;
2163 I->eraseFromParent();
2164 PrevI->eraseFromParent();
2165 Update->eraseFromParent();
2170bool AArch64LoadStoreOpt::isMatchingUpdateInsn(
MachineInstr &MemMI,
2172 unsigned BaseReg,
int Offset) {
2173 switch (
MI.getOpcode()) {
2176 case AArch64::SUBXri:
2177 case AArch64::ADDXri:
2180 if (!
MI.getOperand(2).isImm())
2188 if (
MI.getOperand(0).getReg() != BaseReg ||
2189 MI.getOperand(1).getReg() != BaseReg)
2192 int UpdateOffset =
MI.getOperand(2).getImm();
2193 if (
MI.getOpcode() == AArch64::SUBXri)
2194 UpdateOffset = -UpdateOffset;
2198 int Scale, MinOffset, MaxOffset;
2200 if (UpdateOffset % Scale != 0)
2204 int ScaledOffset = UpdateOffset / Scale;
2205 if (ScaledOffset > MaxOffset || ScaledOffset < MinOffset)
2217bool AArch64LoadStoreOpt::isMatchingMovConstInsn(
MachineInstr &MemMI,
2223 if (
MI.getOpcode() == AArch64::MOVKWi &&
2224 TRI->isSuperOrSubRegisterEq(IndexReg,
MI.getOperand(1).getReg())) {
2234 if (MovzMI.
getOpcode() == AArch64::MOVZWi) {
2236 unsigned High =
MI.getOperand(2).getImm() <<
MI.getOperand(3).getImm();
2239 return Offset >> 24 == 0;
2253 TII->getMemScale(MemMI);
2258 if (MIUnscaledOffset != UnscaledOffset)
2269 for (
unsigned i = 0, e = IsPairedInsn ? 2 : 1; i !=
e; ++i) {
2271 if (DestReg == BaseReg ||
TRI->isSubRegister(BaseReg, DestReg))
2278 ModifiedRegUnits.clear();
2279 UsedRegUnits.clear();
2285 const bool BaseRegSP = BaseReg == AArch64::SP;
2293 for (
unsigned Count = 0;
MBBI != E && Count < Limit;
2299 if (!
MI.isTransient())
2303 if (isMatchingUpdateInsn(*
I,
MI, BaseReg, UnscaledOffset))
2313 if (!ModifiedRegUnits.available(BaseReg) ||
2314 !UsedRegUnits.available(BaseReg) ||
2315 (BaseRegSP &&
MBBI->mayLoadOrStore()))
2340 for (
unsigned i = 0, e = IsPairedInsn ? 2 : 1; i !=
e; ++i) {
2342 if (DestReg == BaseReg ||
TRI->isSubRegister(BaseReg, DestReg))
2347 const bool BaseRegSP = BaseReg == AArch64::SP;
2356 unsigned RedZoneSize =
2361 ModifiedRegUnits.clear();
2362 UsedRegUnits.clear();
2364 bool MemAcessBeforeSPPreInc =
false;
2371 if (!
MI.isTransient())
2375 if (isMatchingUpdateInsn(*
I,
MI, BaseReg,
Offset)) {
2378 if (MemAcessBeforeSPPreInc &&
MBBI->getOperand(2).getImm() > RedZoneSize)
2388 if (!ModifiedRegUnits.available(BaseReg) ||
2389 !UsedRegUnits.available(BaseReg))
2394 if (BaseRegSP &&
MBBI->mayLoadOrStore())
2395 MemAcessBeforeSPPreInc =
true;
2396 }
while (
MBBI !=
B && Count < Limit);
2401AArch64LoadStoreOpt::findMatchingConstOffsetBackward(
2424 ModifiedRegUnits.clear();
2425 UsedRegUnits.clear();
2433 if (!
MI.isTransient())
2437 if (isMatchingMovConstInsn(*
I,
MI, IndexReg,
Offset)) {
2446 if (!ModifiedRegUnits.available(IndexReg) ||
2447 !UsedRegUnits.available(IndexReg))
2450 }
while (
MBBI !=
B && Count < Limit);
2455 auto MatchBaseReg = [&](
unsigned Count) {
2456 for (
unsigned I = 0;
I < Count;
I++) {
2457 auto OpI =
MI.getOperand(
I);
2458 if (OpI.isReg() && OpI.getReg() != BaseReg)
2464 unsigned Opc =
MI.getOpcode();
2468 case AArch64::MOVZXi:
2469 return MatchBaseReg(1);
2470 case AArch64::MOVKXi:
2471 return MatchBaseReg(2);
2472 case AArch64::ORRXrs:
2477 if (MatchBaseReg(3) && Imm.isImm() && Imm.getImm() == 32)
2486 int UpperLoadIdx,
int Accumulated) {
2492 if (!UpperLoadIdx) {
2495 (*MIs.
begin())->eraseFromParent();
2501 (*MI)->eraseFromParent();
2507 Register DstRegW =
TRI->getSubReg(BaseReg, AArch64::sub_32);
2511 .
addReg(DstRegW, DstRegState)
2512 .
addReg(DstRegW, DstRegState)
2517 I->eraseFromParent();
2521bool AArch64LoadStoreOpt::tryFoldSymmetryConstantLoad(
2524 if (
MI.getOpcode() != AArch64::STRXui)
2532 TypeSize Scale(0U,
false), Width(0U,
false);
2533 int64_t MinOffset, MaxOffset;
2546 unsigned Count = 0, UpperLoadIdx = 0;
2548 bool hasORR =
false, Found =
false;
2550 ModifiedRegUnits.
clear();
2551 UsedRegUnits.clear();
2555 if (!
MI.isTransient())
2560 if (!ModifiedRegUnits.available(BaseReg) ||
2561 !UsedRegUnits.available(BaseReg))
2566 unsigned Opc =
MI.getOpcode();
2567 if (Opc == AArch64::ORRXrs) {
2572 unsigned ValueOrder = Opc == AArch64::MOVZXi ? 1 : 2;
2583 UpperLoadIdx = MIs.
size();
2585 Accumulated -= Accumulated & (
Mask << IShift);
2586 Accumulated += Adder;
2587 if (Accumulated != 0 &&
2588 (((Accumulated >> 32) == (Accumulated & 0xffffffffULL)) ||
2589 (hasORR && (Accumulated >> 32 == 0)))) {
2593 }
while (
MBBI !=
B && Count < Limit);
2596 I = doFoldSymmetryConstantLoad(
MI, MIs, UpperLoadIdx, Accumulated);
2603bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
2607 if (
MI.hasOrderedMemoryRef())
2621 ++NumLoadsFromStoresPromoted;
2625 MBBI = promoteLoadFromStore(
MBBI, StoreI);
2632bool AArch64LoadStoreOpt::tryToMergeZeroStInst(
2638 if (!
TII->isCandidateToMergeOrPair(
MI))
2642 LdStPairFlags
Flags;
2646 ++NumZeroStoresPromoted;
2650 MBBI = mergeNarrowZeroStores(
MBBI, MergeMI, Flags);
2662 if (!
TII->isCandidateToMergeOrPair(
MI))
2666 if (
MI.mayLoad() && Subtarget->hasDisableLdp())
2670 if (
MI.mayStore() && Subtarget->hasDisableStp())
2676 bool IsUnscaled =
TII->hasUnscaledLdStOffset(
MI);
2678 int OffsetStride = IsUnscaled ?
TII->getMemScale(
MI) : 1;
2686 LdStPairFlags
Flags;
2692 auto Prev = std::prev(
MBBI);
2697 MI.memoperands_empty() ? nullptr :
MI.memoperands().front();
2702 if ((
MI.mayLoad() && Subtarget->hasLdpAlignedOnly()) ||
2703 (
MI.mayStore() && Subtarget->hasStpAlignedOnly())) {
2705 if (!
MemOp || !
MemOp->getMemoryType().isValid()) {
2706 NumFailedAlignmentCheck++;
2715 if (MemAlignment < 2 * TypeAlignment) {
2716 NumFailedAlignmentCheck++;
2722 if (
TII->hasUnscaledLdStOffset(
MI))
2723 ++NumUnscaledPairCreated;
2725 MBBI = mergePairedInsns(
MBBI, Paired, Flags);
2728 for (
auto I = std::next(Prev);
I !=
MBBI;
I++)
2736bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
2750 MBBI = mergeUpdateInsn(
MBBI, Update,
false);
2755 if (
TII->hasUnscaledLdStOffset(
MI.getOpcode()))
2766 MBBI = mergeUpdateInsn(
MBBI, Update,
true);
2773 int UnscaledOffset =
2781 Update = findMatchingUpdateInsnForward(
MBBI, UnscaledOffset,
UpdateLimit);
2784 MBBI = mergeUpdateInsn(
MBBI, Update,
true);
2798 if (
TII->hasUnscaledLdStOffset(
MI.getOpcode()))
2810 if (Update != E && (
Offset & (Scale - 1)) == 0) {
2820 bool EnableNarrowZeroStOpt) {
2851 if (EnableNarrowZeroStOpt)
2868 DefinedInBB.
clear();
2869 DefinedInBB.addLiveIns(
MBB);
2877 if (
TII->isPairableLdStInst(*
MBBI) && tryToPairLdStInst(
MBBI))
2944 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2949 ModifiedRegUnits.init(*
TRI);
2950 UsedRegUnits.init(*
TRI);
2951 DefinedInBB.init(*
TRI);
2954 bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign();
2955 for (
auto &
MBB : Fn) {
2976 return new AArch64LoadStoreOpt();
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
static cl::opt< bool > EnableRenaming("aarch64-load-store-renaming", cl::init(true), cl::Hidden)
static MachineOperand & getLdStRegOp(MachineInstr &MI, unsigned PairedRegOp=0)
static bool isPromotableLoadFromStore(MachineInstr &MI)
static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale, int &MinOffset, int &MaxOffset)
static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride)
static unsigned getMatchingPairOpcode(unsigned Opc)
static bool isMergeableLdStUpdate(MachineInstr &MI)
static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, LdStPairFlags &Flags, const AArch64InstrInfo *TII)
static std::optional< MCPhysReg > tryToFindRegisterToRename(const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static bool needsWinCFI(const MachineFunction *MF)
static bool isSymmetricLoadCandidate(MachineInstr &MI, Register BaseReg)
static bool canRenameUntilSecondLoad(MachineInstr &FirstLoad, MachineInstr &SecondLoad, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static std::optional< MCPhysReg > findRenameRegForSameLdStRegPair(std::optional< bool > MaybeCanRename, MachineInstr &FirstMI, MachineInstr &MI, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
static cl::opt< unsigned > LdStLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden)
static bool canRenameMOP(const MachineOperand &MOP, const TargetRegisterInfo *TRI)
static unsigned getPreIndexedOpcode(unsigned Opc)
#define AARCH64_LOAD_STORE_OPT_NAME
static cl::opt< unsigned > UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden)
static bool isPromotableZeroStoreInst(MachineInstr &MI)
static unsigned getMatchingWideOpcode(unsigned Opc)
static unsigned getMatchingNonSExtOpcode(unsigned Opc, bool *IsValidLdStrOpc=nullptr)
static MachineBasicBlock::iterator maybeMoveCFI(MachineInstr &MI, MachineBasicBlock::iterator MaybeCFI)
static int alignTo(int Num, int PowOf2)
static bool isTagStore(const MachineInstr &MI)
static unsigned isMatchingStore(MachineInstr &LoadInst, MachineInstr &StoreInst)
static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg, const TargetRegisterInfo *TRI, unsigned Limit, std::function< bool(MachineInstr &, bool)> &Fn)
static bool isRewritableImplicitDef(unsigned Opc)
static unsigned getPostIndexedOpcode(unsigned Opc)
static cl::opt< unsigned > LdStConstLimit("aarch64-load-store-const-scan-limit", cl::init(10), cl::Hidden)
static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst, MachineInstr &StoreInst, const AArch64InstrInfo *TII)
static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI)
static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale)
static void updateDefinedRegisters(MachineInstr &MI, LiveRegUnits &Units, const TargetRegisterInfo *TRI)
static bool canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static unsigned getBaseAddressOpcode(unsigned Opc)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
const HexagonInstrInfo * TII
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, DomTreeUpdater *DTU)
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, TypeSize &Width, int64_t &MinOffset, int64_t &MaxOffset)
Returns true if opcode Opc is a memory operation.
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
static const MachineOperand & getLdStAmountOp(const MachineInstr &MI)
Returns the shift amount operator of a load/store.
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
const AArch64RegisterInfo * getRegisterInfo() const override
const AArch64InstrInfo * getInstrInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
unsigned getRedZoneSize(const Function &F) const
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
static bool shouldExecute(unsigned CounterName)
FunctionPass class - This class is used to implement most global optimizations.
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
A set of register units used to track register liveness.
static void accumulateUsedDefed(const MachineInstr &MI, LiveRegUnits &ModifiedRegUnits, LiveRegUnits &UsedRegUnits, const TargetRegisterInfo *TRI)
For a machine instruction MI, adds all register units used in UsedRegUnits and defined or clobbered i...
bool available(MCPhysReg Reg) const
Returns true if no part of physical register Reg is live.
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
void addReg(MCPhysReg Reg)
Adds register units covered by physical register Reg.
void removeReg(MCPhysReg Reg)
Removes all register units covered by physical register Reg.
void accumulate(const MachineInstr &MI)
Adds all register units used, defined or clobbered in MI.
An instruction for reading from memory.
bool usesWindowsCFI() const
OpType getOperation() const
Wrapper class representing physical registers. Should be passed by value.
reverse_instr_iterator instr_rend()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const std::vector< MCCFIInstruction > & getFrameInstructions() const
Returns a reference to a list of cfi instructions in the function's prologue.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool mayAlias(AAResults *AA, const MachineInstr &Other, bool UseTBAA) const
Returns true if this instruction's memory access aliases the memory access of Other.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
iterator_range< mop_iterator > operands()
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
bool isPseudo(QueryType Type=IgnoreBundle) const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
MachineOperand class - Representation of each machine instruction operand.
void setImplicit(bool Val=true)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
bool isRenamable() const
isRenamable - Returns true if this register may be renamed, i.e.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
LLVM Value Representation.
self_iterator getIterator()
A range adaptor for a pair of iterators.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ Define
Register definition.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< filter_iterator< ConstMIBundleOperands, bool(*)(const MachineOperand &)> > phys_regs_and_masks(const MachineInstr &MI)
Returns an iterator range over all physical register and mask operands for MI and bundled instruction...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionPass * createAArch64LoadStoreOptimizationPass()
createAArch64LoadStoreOptimizationPass - returns an instance of the load / store optimization pass.
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
void initializeAArch64LoadStoreOptPass(PassRegistry &)
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.