55#define DEBUG_TYPE "aarch64-ldst-opt"
57STATISTIC(NumPairCreated,
"Number of load/store pair instructions generated");
58STATISTIC(NumPostFolded,
"Number of post-index updates folded");
59STATISTIC(NumPreFolded,
"Number of pre-index updates folded");
61 "Number of load/store from unscaled generated");
62STATISTIC(NumZeroStoresPromoted,
"Number of narrow zero stores promoted");
63STATISTIC(NumLoadsFromStoresPromoted,
"Number of loads from stores promoted");
64STATISTIC(NumFailedAlignmentCheck,
"Number of load/store pair transformation "
65 "not passed the alignment check");
67 "Number of const offset of index address folded");
69 "Number of UMOV + GPR stores folded to FPR stores");
72 "Controls which pairs are considered for renaming");
97#define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
101using LdStPairFlags =
struct LdStPairFlags {
105 bool MergeForward =
false;
116 std::optional<MCPhysReg> RenameReg;
118 LdStPairFlags() =
default;
120 void setMergeForward(
bool V =
true) { MergeForward = V; }
121 bool getMergeForward()
const {
return MergeForward; }
123 void setSExtIdx(
int V) { SExtIdx = V; }
124 int getSExtIdx()
const {
return SExtIdx; }
126 void setRenameReg(
MCPhysReg R) { RenameReg = R; }
127 void clearRenameReg() { RenameReg = std::nullopt; }
128 std::optional<MCPhysReg> getRenameReg()
const {
return RenameReg; }
131struct AArch64LoadStoreOpt {
145 LdStPairFlags &Flags,
147 bool FindNarrowMerge);
158 const LdStPairFlags &Flags);
164 const LdStPairFlags &Flags);
176 int UnscaledOffset,
unsigned Limit);
198 unsigned BaseReg,
int Offset);
201 unsigned IndexReg,
unsigned &
Offset);
204 std::optional<MachineBasicBlock::iterator>
207 bool IsPreIdx,
bool MergeEither);
256char AArch64LoadStoreOptLegacy::ID = 0;
263static
bool isNarrowStore(
unsigned Opc) {
267 case AArch64::STRBBui:
268 case AArch64::STURBBi:
269 case AArch64::STRHHui:
270 case AArch64::STURHHi:
278 switch (
MI.getOpcode()) {
284 case AArch64::STZ2Gi:
290 bool *IsValidLdStrOpc =
nullptr) {
292 *IsValidLdStrOpc =
true;
296 *IsValidLdStrOpc =
false;
297 return std::numeric_limits<unsigned>::max();
298 case AArch64::STRDui:
299 case AArch64::STURDi:
300 case AArch64::STRDpre:
301 case AArch64::STRQui:
302 case AArch64::STURQi:
303 case AArch64::STRQpre:
304 case AArch64::STRBBui:
305 case AArch64::STURBBi:
306 case AArch64::STRHHui:
307 case AArch64::STURHHi:
308 case AArch64::STRWui:
309 case AArch64::STRWpre:
310 case AArch64::STURWi:
311 case AArch64::STRXui:
312 case AArch64::STRXpre:
313 case AArch64::STURXi:
314 case AArch64::STR_ZXI:
315 case AArch64::LDRDui:
316 case AArch64::LDURDi:
317 case AArch64::LDRDpre:
318 case AArch64::LDRQui:
319 case AArch64::LDURQi:
320 case AArch64::LDRQpre:
321 case AArch64::LDRWui:
322 case AArch64::LDURWi:
323 case AArch64::LDRWpre:
324 case AArch64::LDRXui:
325 case AArch64::LDURXi:
326 case AArch64::LDRXpre:
327 case AArch64::STRSui:
328 case AArch64::STURSi:
329 case AArch64::STRSpre:
330 case AArch64::LDRSui:
331 case AArch64::LDURSi:
332 case AArch64::LDRSpre:
333 case AArch64::LDR_ZXI:
335 case AArch64::LDRSWui:
336 return AArch64::LDRWui;
337 case AArch64::LDURSWi:
338 return AArch64::LDURWi;
339 case AArch64::LDRSWpre:
340 return AArch64::LDRWpre;
348 case AArch64::STRBBui:
349 return AArch64::STRHHui;
350 case AArch64::STRHHui:
351 return AArch64::STRWui;
352 case AArch64::STURBBi:
353 return AArch64::STURHHi;
354 case AArch64::STURHHi:
355 return AArch64::STURWi;
356 case AArch64::STURWi:
357 return AArch64::STURXi;
358 case AArch64::STRWui:
359 return AArch64::STRXui;
367 case AArch64::STRSui:
368 case AArch64::STURSi:
369 return AArch64::STPSi;
370 case AArch64::STRSpre:
371 return AArch64::STPSpre;
372 case AArch64::STRDui:
373 case AArch64::STURDi:
374 return AArch64::STPDi;
375 case AArch64::STRDpre:
376 return AArch64::STPDpre;
377 case AArch64::STRQui:
378 case AArch64::STURQi:
379 case AArch64::STR_ZXI:
380 return AArch64::STPQi;
381 case AArch64::STRQpre:
382 return AArch64::STPQpre;
383 case AArch64::STRWui:
384 case AArch64::STURWi:
385 return AArch64::STPWi;
386 case AArch64::STRWpre:
387 return AArch64::STPWpre;
388 case AArch64::STRXui:
389 case AArch64::STURXi:
390 return AArch64::STPXi;
391 case AArch64::STRXpre:
392 return AArch64::STPXpre;
393 case AArch64::LDRSui:
394 case AArch64::LDURSi:
395 return AArch64::LDPSi;
396 case AArch64::LDRSpre:
397 return AArch64::LDPSpre;
398 case AArch64::LDRDui:
399 case AArch64::LDURDi:
400 return AArch64::LDPDi;
401 case AArch64::LDRDpre:
402 return AArch64::LDPDpre;
403 case AArch64::LDRQui:
404 case AArch64::LDURQi:
405 case AArch64::LDR_ZXI:
406 return AArch64::LDPQi;
407 case AArch64::LDRQpre:
408 return AArch64::LDPQpre;
409 case AArch64::LDRWui:
410 case AArch64::LDURWi:
411 return AArch64::LDPWi;
412 case AArch64::LDRWpre:
413 return AArch64::LDPWpre;
414 case AArch64::LDRXui:
415 case AArch64::LDURXi:
416 return AArch64::LDPXi;
417 case AArch64::LDRXpre:
418 return AArch64::LDPXpre;
419 case AArch64::LDRSWui:
420 case AArch64::LDURSWi:
421 return AArch64::LDPSWi;
422 case AArch64::LDRSWpre:
423 return AArch64::LDPSWpre;
434 case AArch64::LDRBBui:
435 return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui ||
436 StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
437 case AArch64::LDURBBi:
438 return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi ||
439 StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
440 case AArch64::LDRHHui:
441 return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui ||
442 StOpc == AArch64::STRXui;
443 case AArch64::LDURHHi:
444 return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi ||
445 StOpc == AArch64::STURXi;
446 case AArch64::LDRWui:
447 return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
448 case AArch64::LDURWi:
449 return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
450 case AArch64::LDRXui:
451 return StOpc == AArch64::STRXui;
452 case AArch64::LDURXi:
453 return StOpc == AArch64::STURXi;
465 case AArch64::STRBui:
466 return AArch64::STRBpre;
467 case AArch64::STRHui:
468 return AArch64::STRHpre;
469 case AArch64::STRSui:
470 return AArch64::STRSpre;
471 case AArch64::STRDui:
472 return AArch64::STRDpre;
473 case AArch64::STRQui:
474 return AArch64::STRQpre;
475 case AArch64::STRBBui:
476 return AArch64::STRBBpre;
477 case AArch64::STRHHui:
478 return AArch64::STRHHpre;
479 case AArch64::STRWui:
480 return AArch64::STRWpre;
481 case AArch64::STRXui:
482 return AArch64::STRXpre;
483 case AArch64::LDRBui:
484 return AArch64::LDRBpre;
485 case AArch64::LDRHui:
486 return AArch64::LDRHpre;
487 case AArch64::LDRSui:
488 return AArch64::LDRSpre;
489 case AArch64::LDRDui:
490 return AArch64::LDRDpre;
491 case AArch64::LDRQui:
492 return AArch64::LDRQpre;
493 case AArch64::LDRBBui:
494 return AArch64::LDRBBpre;
495 case AArch64::LDRHHui:
496 return AArch64::LDRHHpre;
497 case AArch64::LDRWui:
498 return AArch64::LDRWpre;
499 case AArch64::LDRXui:
500 return AArch64::LDRXpre;
501 case AArch64::LDRSWui:
502 return AArch64::LDRSWpre;
504 return AArch64::LDPSpre;
505 case AArch64::LDPSWi:
506 return AArch64::LDPSWpre;
508 return AArch64::LDPDpre;
510 return AArch64::LDPQpre;
512 return AArch64::LDPWpre;
514 return AArch64::LDPXpre;
516 return AArch64::STPSpre;
518 return AArch64::STPDpre;
520 return AArch64::STPQpre;
522 return AArch64::STPWpre;
524 return AArch64::STPXpre;
526 return AArch64::STGPreIndex;
528 return AArch64::STZGPreIndex;
530 return AArch64::ST2GPreIndex;
531 case AArch64::STZ2Gi:
532 return AArch64::STZ2GPreIndex;
534 return AArch64::STGPpre;
543 case AArch64::LDRBroX:
544 return AArch64::LDRBui;
545 case AArch64::LDRBBroX:
546 return AArch64::LDRBBui;
547 case AArch64::LDRSBXroX:
548 return AArch64::LDRSBXui;
549 case AArch64::LDRSBWroX:
550 return AArch64::LDRSBWui;
551 case AArch64::LDRHroX:
552 return AArch64::LDRHui;
553 case AArch64::LDRHHroX:
554 return AArch64::LDRHHui;
555 case AArch64::LDRSHXroX:
556 return AArch64::LDRSHXui;
557 case AArch64::LDRSHWroX:
558 return AArch64::LDRSHWui;
559 case AArch64::LDRWroX:
560 return AArch64::LDRWui;
561 case AArch64::LDRSroX:
562 return AArch64::LDRSui;
563 case AArch64::LDRSWroX:
564 return AArch64::LDRSWui;
565 case AArch64::LDRDroX:
566 return AArch64::LDRDui;
567 case AArch64::LDRXroX:
568 return AArch64::LDRXui;
569 case AArch64::LDRQroX:
570 return AArch64::LDRQui;
578 case AArch64::STRBui:
579 return AArch64::STRBpost;
580 case AArch64::STRHui:
581 return AArch64::STRHpost;
582 case AArch64::STRSui:
583 case AArch64::STURSi:
584 return AArch64::STRSpost;
585 case AArch64::STRDui:
586 case AArch64::STURDi:
587 return AArch64::STRDpost;
588 case AArch64::STRQui:
589 case AArch64::STURQi:
590 return AArch64::STRQpost;
591 case AArch64::STRBBui:
592 return AArch64::STRBBpost;
593 case AArch64::STRHHui:
594 return AArch64::STRHHpost;
595 case AArch64::STRWui:
596 case AArch64::STURWi:
597 return AArch64::STRWpost;
598 case AArch64::STRXui:
599 case AArch64::STURXi:
600 return AArch64::STRXpost;
601 case AArch64::LDRBui:
602 return AArch64::LDRBpost;
603 case AArch64::LDRHui:
604 return AArch64::LDRHpost;
605 case AArch64::LDRSui:
606 case AArch64::LDURSi:
607 return AArch64::LDRSpost;
608 case AArch64::LDRDui:
609 case AArch64::LDURDi:
610 return AArch64::LDRDpost;
611 case AArch64::LDRQui:
612 case AArch64::LDURQi:
613 return AArch64::LDRQpost;
614 case AArch64::LDRBBui:
615 return AArch64::LDRBBpost;
616 case AArch64::LDRHHui:
617 return AArch64::LDRHHpost;
618 case AArch64::LDRWui:
619 case AArch64::LDURWi:
620 return AArch64::LDRWpost;
621 case AArch64::LDRXui:
622 case AArch64::LDURXi:
623 return AArch64::LDRXpost;
624 case AArch64::LDRSWui:
625 return AArch64::LDRSWpost;
627 return AArch64::LDPSpost;
628 case AArch64::LDPSWi:
629 return AArch64::LDPSWpost;
631 return AArch64::LDPDpost;
633 return AArch64::LDPQpost;
635 return AArch64::LDPWpost;
637 return AArch64::LDPXpost;
639 return AArch64::STPSpost;
641 return AArch64::STPDpost;
643 return AArch64::STPQpost;
645 return AArch64::STPWpost;
647 return AArch64::STPXpost;
649 return AArch64::STGPostIndex;
651 return AArch64::STZGPostIndex;
653 return AArch64::ST2GPostIndex;
654 case AArch64::STZ2Gi:
655 return AArch64::STZ2GPostIndex;
657 return AArch64::STGPpost;
664 unsigned OpcB =
MI.getOpcode();
669 case AArch64::STRSpre:
670 return (OpcB == AArch64::STRSui) || (OpcB == AArch64::STURSi);
671 case AArch64::STRDpre:
672 return (OpcB == AArch64::STRDui) || (OpcB == AArch64::STURDi);
673 case AArch64::STRQpre:
674 return (OpcB == AArch64::STRQui) || (OpcB == AArch64::STURQi);
675 case AArch64::STRWpre:
676 return (OpcB == AArch64::STRWui) || (OpcB == AArch64::STURWi);
677 case AArch64::STRXpre:
678 return (OpcB == AArch64::STRXui) || (OpcB == AArch64::STURXi);
679 case AArch64::LDRSpre:
680 return (OpcB == AArch64::LDRSui) || (OpcB == AArch64::LDURSi);
681 case AArch64::LDRDpre:
682 return (OpcB == AArch64::LDRDui) || (OpcB == AArch64::LDURDi);
683 case AArch64::LDRQpre:
684 return (OpcB == AArch64::LDRQui) || (OpcB == AArch64::LDURQi);
685 case AArch64::LDRWpre:
686 return (OpcB == AArch64::LDRWui) || (OpcB == AArch64::LDURWi);
687 case AArch64::LDRXpre:
688 return (OpcB == AArch64::LDRXui) || (OpcB == AArch64::LDURXi);
689 case AArch64::LDRSWpre:
690 return (OpcB == AArch64::LDRSWui) || (OpcB == AArch64::LDURSWi);
696 int &MinOffset,
int &MaxOffset) {
714 unsigned PairedRegOp = 0) {
715 assert(PairedRegOp < 2 &&
"Unexpected register operand idx.");
721 return MI.getOperand(Idx);
730 int UnscaledStOffset =
734 int UnscaledLdOffset =
738 return (UnscaledStOffset <= UnscaledLdOffset) &&
739 (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
743 unsigned Opc =
MI.getOpcode();
744 return (
Opc == AArch64::STRWui ||
Opc == AArch64::STURWi ||
745 isNarrowStore(
Opc)) &&
750 switch (
MI.getOpcode()) {
754 case AArch64::LDRBBui:
755 case AArch64::LDRHHui:
756 case AArch64::LDRWui:
757 case AArch64::LDRXui:
759 case AArch64::LDURBBi:
760 case AArch64::LDURHHi:
761 case AArch64::LDURWi:
762 case AArch64::LDURXi:
768 unsigned Opc =
MI.getOpcode();
773 case AArch64::STRBui:
774 case AArch64::STRHui:
775 case AArch64::STRSui:
776 case AArch64::STRDui:
777 case AArch64::STRQui:
778 case AArch64::STRXui:
779 case AArch64::STRWui:
780 case AArch64::STRHHui:
781 case AArch64::STRBBui:
782 case AArch64::LDRBui:
783 case AArch64::LDRHui:
784 case AArch64::LDRSui:
785 case AArch64::LDRDui:
786 case AArch64::LDRQui:
787 case AArch64::LDRXui:
788 case AArch64::LDRWui:
789 case AArch64::LDRHHui:
790 case AArch64::LDRBBui:
794 case AArch64::STZ2Gi:
797 case AArch64::STURSi:
798 case AArch64::STURDi:
799 case AArch64::STURQi:
800 case AArch64::STURWi:
801 case AArch64::STURXi:
802 case AArch64::LDURSi:
803 case AArch64::LDURDi:
804 case AArch64::LDURQi:
805 case AArch64::LDURWi:
806 case AArch64::LDURXi:
809 case AArch64::LDPSWi:
838 unsigned Opc =
MI.getOpcode();
844 case AArch64::LDRBroX:
845 case AArch64::LDRBBroX:
846 case AArch64::LDRSBXroX:
847 case AArch64::LDRSBWroX:
850 case AArch64::LDRHroX:
851 case AArch64::LDRHHroX:
852 case AArch64::LDRSHXroX:
853 case AArch64::LDRSHWroX:
856 case AArch64::LDRWroX:
857 case AArch64::LDRSroX:
858 case AArch64::LDRSWroX:
861 case AArch64::LDRDroX:
862 case AArch64::LDRXroX:
865 case AArch64::LDRQroX:
875 case AArch64::ORRWrs:
876 case AArch64::ADDWri:
884 const LdStPairFlags &Flags) {
886 "Expected promotable zero stores.");
894 if (NextI == MergeMI)
897 unsigned Opc =
I->getOpcode();
898 unsigned MergeMIOpc = MergeMI->getOpcode();
899 bool IsScaled = !
TII->hasUnscaledLdStOffset(
Opc);
900 bool IsMergedMIScaled = !
TII->hasUnscaledLdStOffset(MergeMIOpc);
901 int OffsetStride = IsScaled ?
TII->getMemScale(*
I) : 1;
902 int MergeMIOffsetStride = IsMergedMIScaled ?
TII->getMemScale(*MergeMI) : 1;
904 bool MergeForward =
Flags.getMergeForward();
910 const MachineOperand &BaseRegOp =
912 : AArch64InstrInfo::getLdStBaseOp(*
I);
915 int64_t IOffsetInBytes =
917 int64_t MIOffsetInBytes =
922 if (IOffsetInBytes > MIOffsetInBytes)
923 OffsetImm = MIOffsetInBytes;
925 OffsetImm = IOffsetInBytes;
930 if (!
TII->hasUnscaledLdStOffset(NewOpcode)) {
931 int NewOffsetStride =
TII->getMemScale(NewOpcode);
932 assert(((OffsetImm % NewOffsetStride) == 0) &&
933 "Offset should be a multiple of the store memory scale");
934 OffsetImm = OffsetImm / NewOffsetStride;
940 MachineInstrBuilder MIB;
942 .
addReg(isNarrowStore(
Opc) ? AArch64::WZR : AArch64::XZR)
946 .setMIFlags(
I->mergeFlagsWith(*MergeMI));
949 LLVM_DEBUG(
dbgs() <<
"Creating wider store. Replacing instructions:\n ");
958 I->eraseFromParent();
959 MergeMI->eraseFromParent();
969 auto MBB =
MI.getParent();
977 return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() &&
978 TRI->regsOverlap(MOP.getReg(), DefReg);
992 if (MOP.isReg() && MOP.isKill())
996 if (MOP.isReg() && !MOP.isKill())
997 Units.
addReg(MOP.getReg());
1004 unsigned InstrNumToSet,
1011 unsigned OperandNo = 0;
1012 bool RegFound =
false;
1013 for (
const auto Op : MergedInstr.
operands()) {
1014 if (
Op.getReg() ==
Reg) {
1023 {InstrNumToSet, OperandNo});
1029 const LdStPairFlags &Flags) {
1036 if (NextI == Paired)
1039 int SExtIdx =
Flags.getSExtIdx();
1042 bool IsUnscaled =
TII->hasUnscaledLdStOffset(
Opc);
1043 int OffsetStride = IsUnscaled ?
TII->getMemScale(*
I) : 1;
1045 bool MergeForward =
Flags.getMergeForward();
1047 std::optional<MCPhysReg> RenameReg =
Flags.getRenameReg();
1050 DefinedInBB.
addReg(*RenameReg);
1054 auto GetMatchingSubReg =
1057 TRI->sub_and_superregs_inclusive(*RenameReg)) {
1058 if (
C->contains(SubOrSuper))
1064 std::function<bool(MachineInstr &,
bool)> UpdateMIs =
1065 [
this, RegToRename, GetMatchingSubReg, MergeForward](MachineInstr &
MI,
1068 bool SeenDef =
false;
1070 MachineOperand &MOP =
MI.getOperand(
OpIdx);
1074 (!MergeForward || !SeenDef ||
1076 TRI->regsOverlap(MOP.
getReg(), RegToRename)) {
1079 "Need renamable operands");
1083 MatchingReg = GetMatchingSubReg(RC);
1087 MatchingReg = GetMatchingSubReg(
1088 TRI->getMinimalPhysRegClass(MOP.
getReg()));
1096 MachineOperand &MOP =
MI.getOperand(
OpIdx);
1098 TRI->regsOverlap(MOP.
getReg(), RegToRename)) {
1101 "Need renamable operands");
1105 MatchingReg = GetMatchingSubReg(RC);
1107 MatchingReg = GetMatchingSubReg(
1108 TRI->getMinimalPhysRegClass(MOP.
getReg()));
1109 assert(MatchingReg != AArch64::NoRegister &&
1110 "Cannot find matching regs for renaming");
1119 TRI, UINT32_MAX, UpdateMIs);
1132 RegToCheck = RegToRename;
1135 MergeForward ? std::next(
I) :
I,
1136 MergeForward ? std::next(Paired) : Paired))
1138 [
this, RegToCheck](
const MachineOperand &MOP) {
1139 return !MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1141 !TRI->regsOverlap(MOP.getReg(), RegToCheck);
1143 "Rename register used between paired instruction, trashing the "
1153 const MachineOperand &BaseRegOp =
1155 : AArch64InstrInfo::getLdStBaseOp(*
I);
1159 bool PairedIsUnscaled =
TII->hasUnscaledLdStOffset(Paired->getOpcode());
1160 if (IsUnscaled != PairedIsUnscaled) {
1164 int MemSize =
TII->getMemScale(*Paired);
1165 if (PairedIsUnscaled) {
1168 assert(!(PairedOffset %
TII->getMemScale(*Paired)) &&
1169 "Offset should be a multiple of the stride!");
1170 PairedOffset /= MemSize;
1172 PairedOffset *= MemSize;
1179 MachineInstr *RtMI, *Rt2MI;
1180 if (
Offset == PairedOffset + OffsetStride &&
1188 SExtIdx = (SExtIdx + 1) % 2;
1196 assert(!(OffsetImm %
TII->getMemScale(*RtMI)) &&
1197 "Unscaled offset cannot be scaled.");
1198 OffsetImm /=
TII->getMemScale(*RtMI);
1202 MachineInstrBuilder MIB;
1207 MachineOperand &PairedRegOp = RtMI == &*Paired ? RegOp0 : RegOp1;
1209 if (RegOp0.
isUse()) {
1210 if (!MergeForward) {
1221 for (
auto It = std::next(
I); It != Paired && PairedRegOp.
isKill(); ++It)
1222 if (It->readsRegister(PairedRegOp.
getReg(),
TRI))
1230 for (MachineInstr &
MI :
1231 make_range(std::next(
I->getIterator()), Paired->getIterator()))
1248 .setMIFlags(
I->mergeFlagsWith(*Paired));
1253 dbgs() <<
"Creating pair load/store. Replacing instructions:\n ");
1258 if (SExtIdx != -1) {
1263 MachineOperand &DstMO = MIB->
getOperand(SExtIdx);
1268 Register DstRegW =
TRI->getSubReg(DstRegX, AArch64::sub_32);
1277 MachineInstrBuilder MIBKill =
1278 BuildMI(*
MBB, InsertionPoint,
DL,
TII->get(TargetOpcode::KILL), DstRegW)
1280 .
addReg(DstRegX, RegState::Define);
1283 MachineInstrBuilder MIBSXTW =
1284 BuildMI(*
MBB, InsertionPoint,
DL,
TII->get(AArch64::SBFMXri), DstRegX)
1324 if (
I->peekDebugInstrNum()) {
1332 unsigned NewInstrNum;
1333 if (DstRegX ==
I->getOperand(0).getReg()) {
1342 if (Paired->peekDebugInstrNum()) {
1350 unsigned NewInstrNum;
1351 if (DstRegX == Paired->getOperand(0).getReg()) {
1364 }
else if (
Opc == AArch64::LDR_ZXI ||
Opc == AArch64::STR_ZXI) {
1370 AArch64::ZPRRegClass.contains(MOp1.
getReg()) &&
"Invalid register.");
1371 MOp0.
setReg(AArch64::Q0 + (MOp0.
getReg() - AArch64::Z0));
1372 MOp1.
setReg(AArch64::Q0 + (MOp1.
getReg() - AArch64::Z0));
1403 if (
I->peekDebugInstrNum()) {
1408 if (Paired->peekDebugInstrNum()) {
1428 SmallSetVector<Register, 4>
Ops;
1429 for (
const MachineOperand &MO :
1431 if (MO.isReg() && MO.isImplicit() && MO.isDef())
1432 Ops.insert(MO.getReg());
1433 for (
const MachineOperand &MO :
1435 if (MO.isReg() && MO.isImplicit() && MO.isDef())
1436 Ops.insert(MO.getReg());
1438 MIB.addDef(
Op, RegState::Implicit);
1440 CopyImplicitOps(
I, Paired);
1443 I->eraseFromParent();
1444 Paired->eraseFromParent();
1453 next_nodbg(LoadI, LoadI->getParent()->end());
1455 int LoadSize =
TII->getMemScale(*LoadI);
1456 int StoreSize =
TII->getMemScale(*StoreI);
1460 bool IsStoreXReg =
TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);
1463 TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) &&
1464 "Unexpected RegClass");
1466 MachineInstr *BitExtMI;
1467 if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) {
1470 if (StRt == LdRt && LoadSize == 8) {
1471 for (MachineInstr &
MI :
make_range(StoreI->getIterator(),
1472 LoadI->getIterator())) {
1473 if (
MI.killsRegister(StRt,
TRI)) {
1474 MI.clearRegisterKills(StRt,
TRI);
1481 LoadI->eraseFromParent();
1486 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1487 TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)
1488 .
addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)
1497 bool IsUnscaled =
TII->hasUnscaledLdStOffset(*LoadI);
1498 assert(IsUnscaled ==
TII->hasUnscaledLdStOffset(*StoreI) &&
1499 "Unsupported ld/st match");
1500 assert(LoadSize <= StoreSize &&
"Invalid load size");
1501 int UnscaledLdOffset =
1505 int UnscaledStOffset =
1509 int Width = LoadSize * 8;
1512 LdRt, AArch64::sub_32, &AArch64::GPR64RegClass))
1515 assert((UnscaledLdOffset >= UnscaledStOffset &&
1516 (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
1519 int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
1520 int Imms = Immr + Width - 1;
1521 if (UnscaledLdOffset == UnscaledStOffset) {
1522 uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12)
1528 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1529 TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),
1534 }
else if (IsStoreXReg && Imms == 31) {
1537 assert(Immr <= Imms &&
"Expected LSR alias of UBFM");
1538 BitExtMI =
BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1539 TII->get(AArch64::UBFMWri),
1540 TRI->getSubReg(DestReg, AArch64::sub_32))
1541 .
addReg(
TRI->getSubReg(StRt, AArch64::sub_32))
1547 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1548 TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri),
1558 for (MachineInstr &
MI :
make_range(StoreI->getIterator(),
1560 if (
MI.killsRegister(StRt,
TRI)) {
1561 MI.clearRegisterKills(StRt,
TRI);
1576 LoadI->eraseFromParent();
1586 if (
Offset % OffsetStride)
1598 return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
1615bool AArch64LoadStoreOpt::findMatchingStore(
1620 MachineInstr &LoadMI = *
I;
1630 ModifiedRegUnits.
clear();
1631 UsedRegUnits.
clear();
1636 MachineInstr &
MI = *
MBBI;
1640 if (!
MI.isTransient())
1666 if (!ModifiedRegUnits.
available(BaseReg))
1684 LdStPairFlags &Flags,
1687 if (
MI.hasOrderedMemoryRef() ||
TII->isLdStPairSuppressed(
MI))
1692 !
TII->isLdStPairSuppressed(FirstMI) &&
1693 "FirstMI shouldn't get here if either of these checks are true.");
1700 unsigned OpcB =
MI.getOpcode();
1708 if (OpcA == AArch64::LDR_ZXI || OpcA == AArch64::STR_ZXI ||
1709 OpcB == AArch64::LDR_ZXI || OpcB == AArch64::STR_ZXI)
1717 bool IsValidLdStrOpc, PairIsValidLdStrOpc;
1719 assert(IsValidLdStrOpc &&
1720 "Given Opc should be a Load or Store with an immediate");
1723 Flags.setSExtIdx(NonSExtOpc == OpcA ? 1 : 0);
1729 if (!PairIsValidLdStrOpc)
1734 if (isNarrowStore(OpcA) || isNarrowStore(OpcB))
1737 TII->getMemScale(FirstMI) ==
TII->getMemScale(
MI);
1746 return TII->hasUnscaledLdStOffset(OpcA) !=
TII->hasUnscaledLdStOffset(OpcB) &&
1755 auto *RegClass =
TRI->getMinimalPhysRegClass(MOP.
getReg());
1762 if (RegClass->HasDisjunctSubRegs && RegClass->CoveredBySubRegs &&
1763 (
TRI->getSubRegisterClass(RegClass, AArch64::dsub0) ||
1764 TRI->getSubRegisterClass(RegClass, AArch64::qsub0) ||
1765 TRI->getSubRegisterClass(RegClass, AArch64::zsub0))) {
1768 <<
" Cannot rename operands with multiple disjunct subregisters ("
1779 return TRI->isSuperOrSubRegisterEq(
1802 return MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1803 MOP.isImplicit() && MOP.isKill() &&
1804 TRI->regsOverlap(RegToRename, MOP.getReg());
1810 bool FoundDef =
false;
1841 if (
MI.isPseudo()) {
1842 LLVM_DEBUG(
dbgs() <<
" Cannot rename pseudo/bundle instruction\n");
1846 for (
auto &MOP :
MI.operands()) {
1848 !
TRI->regsOverlap(MOP.
getReg(), RegToRename))
1858 for (
auto &MOP :
MI.operands()) {
1860 !
TRI->regsOverlap(MOP.
getReg(), RegToRename))
1877 LLVM_DEBUG(
dbgs() <<
" Did not find definition for register in BB\n");
1905 LLVM_DEBUG(dbgs() <<
"Checking " << MI);
1907 if (MI.getFlag(MachineInstr::FrameSetup)) {
1908 LLVM_DEBUG(dbgs() <<
" Cannot rename framesetup instructions "
1913 for (
auto &MOP :
MI.operands()) {
1914 if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1915 !TRI->regsOverlap(MOP.getReg(), RegToRename))
1917 if (!canRenameMOP(MOP, TRI)) {
1918 LLVM_DEBUG(dbgs() <<
" Cannot rename " << MOP <<
" in " << MI);
1944 auto AnySubOrSuperRegCalleePreserved = [&MF,
TRI](
MCPhysReg PR) {
1945 return any_of(
TRI->sub_and_superregs_inclusive(PR),
1947 return TRI->isCalleeSavedPhysReg(SubOrSuper, MF);
1953 auto CanBeUsedForAllClasses = [&RequiredClasses,
TRI](
MCPhysReg PR) {
1956 TRI->sub_and_superregs_inclusive(PR),
1957 [
C](
MCPhysReg SubOrSuper) { return C->contains(SubOrSuper); });
1961 auto *RegClass =
TRI->getMinimalPhysRegClass(
Reg);
1964 !
RegInfo.isReserved(PR) && !AnySubOrSuperRegCalleePreserved(PR) &&
1965 CanBeUsedForAllClasses(PR)) {
1973 <<
TRI->getRegClassName(RegClass) <<
"\n");
1974 return std::nullopt;
1985 std::optional<MCPhysReg> RenameReg;
1994 const bool IsLoad = FirstMI.
mayLoad();
1996 if (!MaybeCanRename) {
1999 RequiredClasses,
TRI)};
2005 if (*MaybeCanRename) {
2007 RequiredClasses,
TRI);
2016 LdStPairFlags &Flags,
unsigned Limit,
2017 bool FindNarrowMerge) {
2020 MachineInstr &FirstMI = *
I;
2024 bool IsUnscaled =
TII->hasUnscaledLdStOffset(FirstMI);
2028 int OffsetStride = IsUnscaled ?
TII->getMemScale(FirstMI) : 1;
2031 std::optional<bool> MaybeCanRename;
2033 MaybeCanRename = {
false};
2035 SmallPtrSet<const TargetRegisterClass *, 5> RequiredClasses;
2036 LiveRegUnits UsedInBetween;
2039 Flags.clearRenameReg();
2043 ModifiedRegUnits.
clear();
2044 UsedRegUnits.
clear();
2047 SmallVector<MachineInstr *, 4> MemInsns;
2052 MachineInstr &
MI = *
MBBI;
2059 if (!
MI.isTransient())
2062 Flags.setSExtIdx(-1);
2065 assert(
MI.mayLoadOrStore() &&
"Expected memory operation.");
2074 bool MIIsUnscaled =
TII->hasUnscaledLdStOffset(
MI);
2075 if (IsUnscaled != MIIsUnscaled) {
2079 int MemSize =
TII->getMemScale(
MI);
2083 if (MIOffset % MemSize) {
2089 MIOffset /= MemSize;
2091 MIOffset *= MemSize;
2097 if (BaseReg == MIBaseReg) {
2103 bool IsOutOfBounds = MIOffset !=
TII->getMemScale(
MI);
2104 bool IsBaseRegUsed = !UsedRegUnits.
available(
2106 bool IsBaseRegModified = !ModifiedRegUnits.
available(
2111 bool IsMIRegTheSame =
2114 if (IsOutOfBounds || IsBaseRegUsed || IsBaseRegModified ||
2122 if ((
Offset != MIOffset + OffsetStride) &&
2123 (
Offset + OffsetStride != MIOffset)) {
2132 if (FindNarrowMerge) {
2137 if ((!IsUnscaled &&
alignTo(MinOffset, 2) != MinOffset) ||
2154 <<
"keep looking.\n");
2160 if (IsUnscaled && (
alignTo(MinOffset, OffsetStride) != MinOffset)) {
2165 <<
"Offset doesn't fit due to alignment requirements, "
2166 <<
"keep looking.\n");
2177 if (!ModifiedRegUnits.
available(BaseReg))
2180 const bool SameLoadReg =
MayLoad &&
TRI->isSuperOrSubRegisterEq(
2187 bool RtNotModified =
2189 bool RtNotUsed = !(
MI.mayLoad() && !SameLoadReg &&
2192 LLVM_DEBUG(
dbgs() <<
"Checking, can combine 2nd into 1st insn:\n"
2194 << (RtNotModified ?
"true" :
"false") <<
"\n"
2196 << (RtNotUsed ?
"true" :
"false") <<
"\n");
2198 if (RtNotModified && RtNotUsed && !
mayAlias(
MI, MemInsns, AA)) {
2203 std::optional<MCPhysReg> RenameReg =
2205 Reg, DefinedInBB, UsedInBetween,
2206 RequiredClasses,
TRI);
2212 <<
"keep looking.\n");
2215 Flags.setRenameReg(*RenameReg);
2218 Flags.setMergeForward(
false);
2220 Flags.clearRenameReg();
2231 LLVM_DEBUG(
dbgs() <<
"Checking, can combine 1st into 2nd insn:\n"
2233 <<
"' not modified: "
2234 << (RtNotModified ?
"true" :
"false") <<
"\n");
2236 if (RtNotModified && !
mayAlias(FirstMI, MemInsns, AA)) {
2238 Flags.setMergeForward(
true);
2239 Flags.clearRenameReg();
2244 MaybeCanRename, FirstMI,
MI,
Reg, DefinedInBB, UsedInBetween,
2245 RequiredClasses,
TRI);
2247 Flags.setMergeForward(
true);
2248 Flags.setRenameReg(*RenameReg);
2252 LLVM_DEBUG(
dbgs() <<
"Unable to combine these instructions due to "
2253 <<
"interference in between, keep looking.\n");
2269 if (!ModifiedRegUnits.
available(BaseReg)) {
2275 if (
MI.mayLoadOrStore())
2283 assert((
MI.getOpcode() == AArch64::SUBXri ||
2284 MI.getOpcode() == AArch64::ADDXri) &&
2285 "Expected a register update instruction");
2286 auto End =
MI.getParent()->end();
2287 if (MaybeCFI == End ||
2288 MaybeCFI->getOpcode() != TargetOpcode::CFI_INSTRUCTION ||
2291 MI.getOperand(0).getReg() != AArch64::SP)
2295 unsigned CFIIndex = MaybeCFI->getOperand(0).getCFIIndex();
2306std::optional<MachineBasicBlock::iterator> AArch64LoadStoreOpt::mergeUpdateInsn(
2308 bool IsForward,
bool IsPreIdx,
bool MergeEither) {
2309 assert((Update->getOpcode() == AArch64::ADDXri ||
2310 Update->getOpcode() == AArch64::SUBXri) &&
2311 "Unexpected base register update instruction to merge!");
2327 if (std::any_of(std::next(CFI),
I, [](
const auto &Insn) {
2328 return Insn.getOpcode() == TargetOpcode::CFI_INSTRUCTION;
2330 return std::nullopt;
2332 MachineBasicBlock *
MBB = InsertPt->getParent();
2341 if (NextI == Update)
2344 int Value = Update->getOperand(2).getImm();
2346 "Can't merge 1 << 12 offset into pre-/post-indexed load / store");
2347 if (Update->getOpcode() == AArch64::SUBXri)
2352 MachineInstrBuilder MIB;
2353 int Scale, MinOffset, MaxOffset;
2357 MIB =
BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
2359 .
add(Update->getOperand(0))
2367 MIB =
BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
2369 .
add(Update->getOperand(0))
2394 I->eraseFromParent();
2395 Update->eraseFromParent();
2403 unsigned Offset,
int Scale) {
2404 assert((Update->getOpcode() == AArch64::MOVKWi) &&
2405 "Unexpected const mov instruction to merge!");
2409 MachineInstr &MemMI = *
I;
2410 unsigned Mask = (1 << 12) * Scale - 1;
2415 MachineInstrBuilder AddMIB, MemMIB;
2419 BuildMI(*
I->getParent(),
I,
I->getDebugLoc(),
TII->get(AArch64::ADDXri))
2427 MemMIB =
BuildMI(*
I->getParent(),
I,
I->getDebugLoc(),
TII->get(NewOpc))
2435 ++NumConstOffsetFolded;
2450 I->eraseFromParent();
2451 PrevI->eraseFromParent();
2452 Update->eraseFromParent();
2457bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
2459 unsigned BaseReg,
int Offset) {
2460 switch (
MI.getOpcode()) {
2463 case AArch64::SUBXri:
2464 case AArch64::ADDXri:
2467 if (!
MI.getOperand(2).isImm())
2475 if (
MI.getOperand(0).getReg() != BaseReg ||
2476 MI.getOperand(1).getReg() != BaseReg)
2479 int UpdateOffset =
MI.getOperand(2).getImm();
2480 if (
MI.getOpcode() == AArch64::SUBXri)
2481 UpdateOffset = -UpdateOffset;
2485 int Scale, MinOffset, MaxOffset;
2487 if (UpdateOffset % Scale != 0)
2491 int ScaledOffset = UpdateOffset / Scale;
2492 if (ScaledOffset > MaxOffset || ScaledOffset < MinOffset)
2504bool AArch64LoadStoreOpt::isMatchingMovConstInsn(MachineInstr &MemMI,
2510 if (
MI.getOpcode() == AArch64::MOVKWi &&
2511 TRI->isSuperOrSubRegisterEq(IndexReg,
MI.getOperand(1).getReg())) {
2520 MachineInstr &MovzMI = *
MBBI;
2522 if (MovzMI.
getOpcode() == AArch64::MOVZWi &&
2525 unsigned High =
MI.getOperand(2).getImm() <<
MI.getOperand(3).getImm();
2528 return Offset >> 24 == 0;
2537 MachineInstr &MemMI = *
I;
2542 TII->getMemScale(MemMI);
2547 if (MIUnscaledOffset != UnscaledOffset)
2558 for (
unsigned i = 0, e = IsPairedInsn ? 2 : 1; i !=
e; ++i) {
2560 if (DestReg == BaseReg ||
TRI->isSubRegister(BaseReg, DestReg))
2567 ModifiedRegUnits.
clear();
2568 UsedRegUnits.
clear();
2574 const bool BaseRegSP =
BaseReg == AArch64::SP;
2583 MachineBasicBlock *CurMBB =
I->getParent();
2590 MachineInstr &
MI = *
MBBI;
2594 if (!
MI.isTransient())
2598 if (isMatchingUpdateInsn(*
I,
MI, BaseReg, UnscaledOffset))
2609 if (!ModifiedRegUnits.
available(BaseReg) ||
2611 (BaseRegSP &&
MBBI->mayLoadOrStore()))
2615 if (!VisitSucc || Limit <=
Count)
2620 MachineBasicBlock *SuccToVisit =
nullptr;
2621 unsigned LiveSuccCount = 0;
2622 for (MachineBasicBlock *Succ : CurMBB->
successors()) {
2623 for (MCRegAliasIterator AI(BaseReg,
TRI,
true); AI.isValid(); ++AI) {
2624 if (Succ->isLiveIn(*AI)) {
2625 if (LiveSuccCount++)
2627 if (Succ->pred_size() == 1)
2635 CurMBB = SuccToVisit;
2646 MachineInstr &MemMI = *
I;
2648 MachineFunction &MF = *MemMI.
getMF();
2656 : AArch64::NoRegister};
2665 for (
unsigned i = 0, e = IsPairedInsn ? 2 : 1; i !=
e; ++i)
2666 if (DestReg[i] == BaseReg ||
TRI->isSubRegister(BaseReg, DestReg[i]))
2670 const bool BaseRegSP =
BaseReg == AArch64::SP;
2678 const AArch64Subtarget &Subtarget = MF.
getSubtarget<AArch64Subtarget>();
2679 unsigned RedZoneSize =
2684 ModifiedRegUnits.
clear();
2685 UsedRegUnits.
clear();
2687 bool MemAccessBeforeSPPreInc =
false;
2691 MachineInstr &
MI = *
MBBI;
2695 if (!
MI.isTransient())
2699 if (isMatchingUpdateInsn(*
I,
MI, BaseReg,
Offset)) {
2702 if (MemAccessBeforeSPPreInc &&
MBBI->getOperand(2).getImm() > RedZoneSize)
2712 if (!ModifiedRegUnits.
available(BaseReg) ||
2720 if (
MI.mayLoadOrStore() ||
MI.hasUnmodeledSideEffects() ||
2721 (DestReg[0] != AArch64::NoRegister &&
2722 !(ModifiedRegUnits.
available(DestReg[0]) &&
2724 (DestReg[1] != AArch64::NoRegister &&
2725 !(ModifiedRegUnits.
available(DestReg[1]) &&
2727 MergeEither =
false;
2732 if (BaseRegSP &&
MBBI->mayLoadOrStore())
2733 MemAccessBeforeSPPreInc =
true;
2739AArch64LoadStoreOpt::findMatchingConstOffsetBackward(
2743 MachineInstr &MemMI = *
I;
2762 ModifiedRegUnits.
clear();
2763 UsedRegUnits.
clear();
2767 MachineInstr &
MI = *
MBBI;
2771 if (!
MI.isTransient())
2775 if (isMatchingMovConstInsn(*
I,
MI, IndexReg,
Offset)) {
2784 if (!ModifiedRegUnits.
available(IndexReg) ||
2792bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
2794 MachineInstr &
MI = *
MBBI;
2796 if (
MI.hasOrderedMemoryRef())
2810 ++NumLoadsFromStoresPromoted;
2814 MBBI = promoteLoadFromStore(
MBBI, StoreI);
2821bool AArch64LoadStoreOpt::tryToMergeZeroStInst(
2824 MachineInstr &
MI = *
MBBI;
2827 if (!
TII->isCandidateToMergeOrPair(
MI))
2831 LdStPairFlags
Flags;
2835 ++NumZeroStoresPromoted;
2839 MBBI = mergeNarrowZeroStores(
MBBI, MergeMI, Flags);
2848 MachineInstr &
MI = *
MBBI;
2851 if (!
TII->isCandidateToMergeOrPair(
MI))
2855 if (
MI.mayLoad() && Subtarget->hasDisableLdp())
2859 if (
MI.mayStore() && Subtarget->hasDisableStp())
2865 bool IsUnscaled =
TII->hasUnscaledLdStOffset(
MI);
2867 int OffsetStride = IsUnscaled ?
TII->getMemScale(
MI) : 1;
2875 LdStPairFlags
Flags;
2884 auto Prev = std::prev(
MBBI);
2887 MachineMemOperand *MemOp =
2888 MI.memoperands_empty() ? nullptr :
MI.memoperands().front();
2893 if ((
MI.mayLoad() && Subtarget->hasLdpAlignedOnly()) ||
2894 (
MI.mayStore() && Subtarget->hasStpAlignedOnly())) {
2896 if (!MemOp || !MemOp->getMemoryType().isValid()) {
2897 NumFailedAlignmentCheck++;
2903 uint64_t MemAlignment = MemOp->getAlign().value();
2904 uint64_t TypeAlignment =
2905 Align(MemOp->getSize().getValue().getKnownMinValue()).value();
2907 if (MemAlignment < 2 * TypeAlignment) {
2908 NumFailedAlignmentCheck++;
2914 if (
TII->hasUnscaledLdStOffset(
MI))
2915 ++NumUnscaledPairCreated;
2917 MBBI = mergePairedInsns(
MBBI, Paired, Flags);
2920 for (
auto I = std::next(Prev);
I !=
MBBI;
I++)
2926bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
2928 MachineInstr &
MI = *
MBBI;
2937 if (
MBBI->hasOrderedMemoryRef() &&
2950 if (
auto NextI = mergeUpdateInsn(
MBBI, Update,
false,
2959 if (
TII->hasUnscaledLdStOffset(
MI.getOpcode()))
2968 Update = findMatchingUpdateInsnBackward(
MBBI,
UpdateLimit, MergeEither);
2971 if (
auto NextI = mergeUpdateInsn(
MBBI, Update,
true,
2972 true, MergeEither)) {
2981 int UnscaledOffset =
2989 Update = findMatchingUpdateInsnForward(
MBBI, UnscaledOffset,
UpdateLimit);
2992 if (
auto NextI = mergeUpdateInsn(
MBBI, Update,
false,
3005 MachineInstr &
MI = *
MBBI;
3010 if (
TII->hasUnscaledLdStOffset(
MI.getOpcode()))
3022 if (Update !=
E && (
Offset & (Scale - 1)) == 0) {
3034 switch (GPRStoreOpc) {
3036 case AArch64::STRBBui:
3037 return AArch64::STRBui;
3038 case AArch64::STRHHui:
3039 return AArch64::STRHui;
3040 case AArch64::STRWui:
3041 return AArch64::STRSui;
3042 case AArch64::STRXui:
3043 return AArch64::STRDui;
3045 case AArch64::STURBBi:
3046 return AArch64::STURBi;
3047 case AArch64::STURHHi:
3048 return AArch64::STURHi;
3049 case AArch64::STURWi:
3050 return AArch64::STURSi;
3051 case AArch64::STURXi:
3052 return AArch64::STURDi;
3054 case AArch64::STRBBroW:
3055 return AArch64::STRBroW;
3056 case AArch64::STRBBroX:
3057 return AArch64::STRBroX;
3058 case AArch64::STRHHroW:
3059 return AArch64::STRHroW;
3060 case AArch64::STRHHroX:
3061 return AArch64::STRHroX;
3062 case AArch64::STRWroW:
3063 return AArch64::STRSroW;
3064 case AArch64::STRWroX:
3065 return AArch64::STRSroX;
3066 case AArch64::STRXroW:
3067 return AArch64::STRDroW;
3068 case AArch64::STRXroX:
3069 return AArch64::STRDroX;
3079 case AArch64::UMOVvi8_idx0:
3080 return AArch64::bsub;
3081 case AArch64::UMOVvi16_idx0:
3082 return AArch64::hsub;
3083 case AArch64::UMOVvi32_idx0:
3084 return AArch64::ssub;
3085 case AArch64::UMOVvi64_idx0:
3086 return AArch64::dsub;
3092bool AArch64LoadStoreOpt::tryToReplaceUMOVStore(
3094 MachineInstr &StoreMI = *
MBBI;
3117 MachineInstr *UMOVMI =
nullptr;
3119 unsigned SubRegIdx = 0;
3121 for (
auto It =
MBBI; It !=
B;) {
3122 MachineInstr &
MI = *--It;
3123 if (
MI.isDebugInstr())
3127 if (
MI.readsRegister(StoreValReg,
TRI))
3129 if (
MI.modifiesRegister(StoreValReg,
TRI)) {
3142 TRI->getRegSizeInBits(*
TRI->getMinimalPhysRegClass(FPRReg)))
3151 if (It->modifiesRegister(VecReg,
TRI))
3153 if (!VecRegKilled && It->killsRegister(VecReg,
TRI))
3154 VecRegKilled =
true;
3161 It->clearRegisterKills(VecReg,
TRI);
3163 LLVM_DEBUG(
dbgs() <<
"Folding UMOV + store: " << *UMOVMI <<
" + "
3175 ++NumUMOVFoldedToFPRStore;
3179bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &
MBB,
3180 bool EnableNarrowZeroStOpt) {
3212 if (EnableNarrowZeroStOpt)
3236 DefinedInBB.
clear();
3245 if (
TII->isPairableLdStInst(*
MBBI) && tryToPairLdStInst(
MBBI))
3289 if (tryToReplaceUMOVStore(
MBBI))
3298bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
3311 bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign();
3312 for (
auto &
MBB : Fn) {
3330bool AArch64LoadStoreOptLegacy::runOnMachineFunction(MachineFunction &MF) {
3333 AArch64LoadStoreOpt Impl;
3334 Impl.AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
3335 return Impl.runOnMachineFunction(MF);
3341 return new AArch64LoadStoreOptLegacy();
3347 AArch64LoadStoreOpt Impl;
3351 bool Changed = Impl.runOnMachineFunction(MF);
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
static cl::opt< bool > EnableRenaming("aarch64-load-store-renaming", cl::init(true), cl::Hidden)
static MachineOperand & getLdStRegOp(MachineInstr &MI, unsigned PairedRegOp=0)
static bool isPromotableLoadFromStore(MachineInstr &MI)
static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale, int &MinOffset, int &MaxOffset)
static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride)
static unsigned getMatchingPairOpcode(unsigned Opc)
static unsigned getGPRToFPRStoreOpcode(unsigned GPRStoreOpc)
static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, LdStPairFlags &Flags, const AArch64InstrInfo *TII)
static std::optional< MCPhysReg > tryToFindRegisterToRename(const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static bool needsWinCFI(const MachineFunction *MF)
static bool canRenameUntilSecondLoad(MachineInstr &FirstLoad, MachineInstr &SecondLoad, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static std::optional< MCPhysReg > findRenameRegForSameLdStRegPair(std::optional< bool > MaybeCanRename, MachineInstr &FirstMI, MachineInstr &MI, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
static cl::opt< unsigned > LdStLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden)
static bool canRenameMOP(const MachineOperand &MOP, const TargetRegisterInfo *TRI)
static bool isRewritableImplicitDef(const MachineOperand &MO)
static unsigned getPreIndexedOpcode(unsigned Opc)
#define AARCH64_LOAD_STORE_OPT_NAME
static void addDebugSubstitutionsToTable(MachineFunction *MF, unsigned InstrNumToSet, MachineInstr &OriginalInstr, MachineInstr &MergedInstr)
This function will add a new entry into the debugValueSubstitutions table when two instruction have b...
static cl::opt< unsigned > UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden)
static bool isPromotableZeroStoreInst(MachineInstr &MI)
static unsigned getMatchingWideOpcode(unsigned Opc)
static unsigned getMatchingNonSExtOpcode(unsigned Opc, bool *IsValidLdStrOpc=nullptr)
static MachineBasicBlock::iterator maybeMoveCFI(MachineInstr &MI, MachineBasicBlock::iterator MaybeCFI)
static bool isTagStore(const MachineInstr &MI)
static unsigned isMatchingStore(MachineInstr &LoadInst, MachineInstr &StoreInst)
static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg, const TargetRegisterInfo *TRI, unsigned Limit, std::function< bool(MachineInstr &, bool)> &Fn)
static unsigned getPostIndexedOpcode(unsigned Opc)
static unsigned getUMOVSubRegIdx(unsigned UMOVOpc)
static bool isMergeableLdStUpdate(MachineInstr &MI, AArch64FunctionInfo &AFI)
static cl::opt< unsigned > LdStConstLimit("aarch64-load-store-const-scan-limit", cl::init(10), cl::Hidden)
static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst, MachineInstr &StoreInst, const AArch64InstrInfo *TII)
static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI)
static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale)
static void updateDefinedRegisters(MachineInstr &MI, LiveRegUnits &Units, const TargetRegisterInfo *TRI)
static bool canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static cl::opt< unsigned > UMOVFoldLimit("aarch64-umov-fold-scan-limit", cl::init(16), cl::Hidden)
static unsigned getBaseAddressOpcode(unsigned Opc)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
const HexagonInstrInfo * TII
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
static const MachineOperand & getLdStAmountOp(const MachineInstr &MI)
Returns the shift amount operator of a load/store.
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
const AArch64RegisterInfo * getRegisterInfo() const override
const AArch64InstrInfo * getInstrInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
bool isLittleEndian() const
unsigned getRedZoneSize(const Function &F) const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
Represents analyses that only rely on functions' control flow.
static bool shouldExecute(CounterInfo &Counter)
FunctionPass class - This class is used to implement most global optimizations.
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
A set of register units used to track register liveness.
static void accumulateUsedDefed(const MachineInstr &MI, LiveRegUnits &ModifiedRegUnits, LiveRegUnits &UsedRegUnits, const TargetRegisterInfo *TRI)
For a machine instruction MI, adds all register units used in UsedRegUnits and defined or clobbered i...
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
void removeReg(MCRegister Reg)
Removes all register units covered by physical register Reg.
LLVM_ABI void addLiveIns(const MachineBasicBlock &MBB)
Adds registers living into block MBB.
void clear()
Clears the set.
LLVM_ABI void accumulate(const MachineInstr &MI)
Adds all register units used, defined or clobbered in MI.
An instruction for reading from memory.
bool usesWindowsCFI() const
OpType getOperation() const
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const std::vector< MCCFIInstruction > & getFrameInstructions() const
Returns a reference to a list of cfi instructions in the function's prologue.
void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair, unsigned SubReg=0)
Create a substitution between one <instr,operand> value to a different, new value.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
LLVM_ABI bool mayAlias(BatchAAResults *AA, const MachineInstr &Other, bool UseTBAA) const
Returns true if this instruction's memory access aliases the memory access of Other.
unsigned peekDebugInstrNum() const
Examine the instruction number of this MachineInstr.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
LLVM_ABI void setMemRefs(MachineFunction &MF, ArrayRef< MachineMemOperand * > MemRefs)
Assign this MachineInstr's memory reference descriptor list.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
bool isPseudo(QueryType Type=IgnoreBundle) const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
LLVM_ABI void dump() const
LLVM_ABI unsigned getDebugInstrNum()
Fetch the instruction number of this MachineInstr.
LLVM_ABI void clearRegisterKills(Register Reg, const TargetRegisterInfo *RegInfo)
Clear all kill flags affecting Reg.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
MachineOperand class - Representation of each machine instruction operand.
void setImplicit(bool Val=true)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
LLVM_ABI bool isRenamable() const
isRenamable - Returns true if this register may be renamed, i.e.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Wrapper class representing virtual and physical registers.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
An instruction for storing to memory.
Represent a constant reference to a string, i.e.
const MCAsmInfo & getMCAsmInfo() const
Return target specific asm information.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
self_iterator getIterator()
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
initializer< Ty > init(const Ty &Val)
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr RegState getKillRegState(bool B)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< filter_iterator< ConstMIBundleOperands, bool(*)(const MachineOperand &)> > phys_regs_and_masks(const MachineInstr &MI)
Returns an iterator range over all physical register and mask operands for MI and bundled instruction...
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
FunctionPass * createAArch64LoadStoreOptLegacyPass()
createAArch64LoadStoreOptimizationPass - returns an instance of the load / store optimization pass.
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Value
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Count
DWARFExpression::Operation Op
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
MCRegisterClass TargetRegisterClass