55#define DEBUG_TYPE "aarch64-ldst-opt"
57STATISTIC(NumPairCreated,
"Number of load/store pair instructions generated");
58STATISTIC(NumPostFolded,
"Number of post-index updates folded");
59STATISTIC(NumPreFolded,
"Number of pre-index updates folded");
61 "Number of load/store from unscaled generated");
62STATISTIC(NumZeroStoresPromoted,
"Number of narrow zero stores promoted");
63STATISTIC(NumLoadsFromStoresPromoted,
"Number of loads from stores promoted");
64STATISTIC(NumFailedAlignmentCheck,
"Number of load/store pair transformation "
65 "not passed the alignment check");
67 "Number of const offset of index address folded");
70 "Controls which pairs are considered for renaming");
90#define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
94using LdStPairFlags =
struct LdStPairFlags {
98 bool MergeForward =
false;
109 std::optional<MCPhysReg> RenameReg;
111 LdStPairFlags() =
default;
113 void setMergeForward(
bool V =
true) { MergeForward = V; }
114 bool getMergeForward()
const {
return MergeForward; }
116 void setSExtIdx(
int V) { SExtIdx = V; }
117 int getSExtIdx()
const {
return SExtIdx; }
119 void setRenameReg(
MCPhysReg R) { RenameReg = R; }
120 void clearRenameReg() { RenameReg = std::nullopt; }
121 std::optional<MCPhysReg> getRenameReg()
const {
return RenameReg; }
124struct AArch64LoadStoreOpt {
138 LdStPairFlags &Flags,
140 bool FindNarrowMerge);
151 const LdStPairFlags &Flags);
157 const LdStPairFlags &Flags);
169 int UnscaledOffset,
unsigned Limit);
191 unsigned BaseReg,
int Offset);
194 unsigned IndexReg,
unsigned &
Offset);
197 std::optional<MachineBasicBlock::iterator>
200 bool IsPreIdx,
bool MergeEither);
246char AArch64LoadStoreOptLegacy::ID = 0;
253static
bool isNarrowStore(
unsigned Opc) {
257 case AArch64::STRBBui:
258 case AArch64::STURBBi:
259 case AArch64::STRHHui:
260 case AArch64::STURHHi:
268 switch (
MI.getOpcode()) {
274 case AArch64::STZ2Gi:
280 bool *IsValidLdStrOpc =
nullptr) {
282 *IsValidLdStrOpc =
true;
286 *IsValidLdStrOpc =
false;
287 return std::numeric_limits<unsigned>::max();
288 case AArch64::STRDui:
289 case AArch64::STURDi:
290 case AArch64::STRDpre:
291 case AArch64::STRQui:
292 case AArch64::STURQi:
293 case AArch64::STRQpre:
294 case AArch64::STRBBui:
295 case AArch64::STURBBi:
296 case AArch64::STRHHui:
297 case AArch64::STURHHi:
298 case AArch64::STRWui:
299 case AArch64::STRWpre:
300 case AArch64::STURWi:
301 case AArch64::STRXui:
302 case AArch64::STRXpre:
303 case AArch64::STURXi:
304 case AArch64::STR_ZXI:
305 case AArch64::LDRDui:
306 case AArch64::LDURDi:
307 case AArch64::LDRDpre:
308 case AArch64::LDRQui:
309 case AArch64::LDURQi:
310 case AArch64::LDRQpre:
311 case AArch64::LDRWui:
312 case AArch64::LDURWi:
313 case AArch64::LDRWpre:
314 case AArch64::LDRXui:
315 case AArch64::LDURXi:
316 case AArch64::LDRXpre:
317 case AArch64::STRSui:
318 case AArch64::STURSi:
319 case AArch64::STRSpre:
320 case AArch64::LDRSui:
321 case AArch64::LDURSi:
322 case AArch64::LDRSpre:
323 case AArch64::LDR_ZXI:
325 case AArch64::LDRSWui:
326 return AArch64::LDRWui;
327 case AArch64::LDURSWi:
328 return AArch64::LDURWi;
329 case AArch64::LDRSWpre:
330 return AArch64::LDRWpre;
338 case AArch64::STRBBui:
339 return AArch64::STRHHui;
340 case AArch64::STRHHui:
341 return AArch64::STRWui;
342 case AArch64::STURBBi:
343 return AArch64::STURHHi;
344 case AArch64::STURHHi:
345 return AArch64::STURWi;
346 case AArch64::STURWi:
347 return AArch64::STURXi;
348 case AArch64::STRWui:
349 return AArch64::STRXui;
357 case AArch64::STRSui:
358 case AArch64::STURSi:
359 return AArch64::STPSi;
360 case AArch64::STRSpre:
361 return AArch64::STPSpre;
362 case AArch64::STRDui:
363 case AArch64::STURDi:
364 return AArch64::STPDi;
365 case AArch64::STRDpre:
366 return AArch64::STPDpre;
367 case AArch64::STRQui:
368 case AArch64::STURQi:
369 case AArch64::STR_ZXI:
370 return AArch64::STPQi;
371 case AArch64::STRQpre:
372 return AArch64::STPQpre;
373 case AArch64::STRWui:
374 case AArch64::STURWi:
375 return AArch64::STPWi;
376 case AArch64::STRWpre:
377 return AArch64::STPWpre;
378 case AArch64::STRXui:
379 case AArch64::STURXi:
380 return AArch64::STPXi;
381 case AArch64::STRXpre:
382 return AArch64::STPXpre;
383 case AArch64::LDRSui:
384 case AArch64::LDURSi:
385 return AArch64::LDPSi;
386 case AArch64::LDRSpre:
387 return AArch64::LDPSpre;
388 case AArch64::LDRDui:
389 case AArch64::LDURDi:
390 return AArch64::LDPDi;
391 case AArch64::LDRDpre:
392 return AArch64::LDPDpre;
393 case AArch64::LDRQui:
394 case AArch64::LDURQi:
395 case AArch64::LDR_ZXI:
396 return AArch64::LDPQi;
397 case AArch64::LDRQpre:
398 return AArch64::LDPQpre;
399 case AArch64::LDRWui:
400 case AArch64::LDURWi:
401 return AArch64::LDPWi;
402 case AArch64::LDRWpre:
403 return AArch64::LDPWpre;
404 case AArch64::LDRXui:
405 case AArch64::LDURXi:
406 return AArch64::LDPXi;
407 case AArch64::LDRXpre:
408 return AArch64::LDPXpre;
409 case AArch64::LDRSWui:
410 case AArch64::LDURSWi:
411 return AArch64::LDPSWi;
412 case AArch64::LDRSWpre:
413 return AArch64::LDPSWpre;
424 case AArch64::LDRBBui:
425 return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui ||
426 StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
427 case AArch64::LDURBBi:
428 return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi ||
429 StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
430 case AArch64::LDRHHui:
431 return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui ||
432 StOpc == AArch64::STRXui;
433 case AArch64::LDURHHi:
434 return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi ||
435 StOpc == AArch64::STURXi;
436 case AArch64::LDRWui:
437 return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
438 case AArch64::LDURWi:
439 return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
440 case AArch64::LDRXui:
441 return StOpc == AArch64::STRXui;
442 case AArch64::LDURXi:
443 return StOpc == AArch64::STURXi;
455 case AArch64::STRBui:
456 return AArch64::STRBpre;
457 case AArch64::STRHui:
458 return AArch64::STRHpre;
459 case AArch64::STRSui:
460 return AArch64::STRSpre;
461 case AArch64::STRDui:
462 return AArch64::STRDpre;
463 case AArch64::STRQui:
464 return AArch64::STRQpre;
465 case AArch64::STRBBui:
466 return AArch64::STRBBpre;
467 case AArch64::STRHHui:
468 return AArch64::STRHHpre;
469 case AArch64::STRWui:
470 return AArch64::STRWpre;
471 case AArch64::STRXui:
472 return AArch64::STRXpre;
473 case AArch64::LDRBui:
474 return AArch64::LDRBpre;
475 case AArch64::LDRHui:
476 return AArch64::LDRHpre;
477 case AArch64::LDRSui:
478 return AArch64::LDRSpre;
479 case AArch64::LDRDui:
480 return AArch64::LDRDpre;
481 case AArch64::LDRQui:
482 return AArch64::LDRQpre;
483 case AArch64::LDRBBui:
484 return AArch64::LDRBBpre;
485 case AArch64::LDRHHui:
486 return AArch64::LDRHHpre;
487 case AArch64::LDRWui:
488 return AArch64::LDRWpre;
489 case AArch64::LDRXui:
490 return AArch64::LDRXpre;
491 case AArch64::LDRSWui:
492 return AArch64::LDRSWpre;
494 return AArch64::LDPSpre;
495 case AArch64::LDPSWi:
496 return AArch64::LDPSWpre;
498 return AArch64::LDPDpre;
500 return AArch64::LDPQpre;
502 return AArch64::LDPWpre;
504 return AArch64::LDPXpre;
506 return AArch64::STPSpre;
508 return AArch64::STPDpre;
510 return AArch64::STPQpre;
512 return AArch64::STPWpre;
514 return AArch64::STPXpre;
516 return AArch64::STGPreIndex;
518 return AArch64::STZGPreIndex;
520 return AArch64::ST2GPreIndex;
521 case AArch64::STZ2Gi:
522 return AArch64::STZ2GPreIndex;
524 return AArch64::STGPpre;
533 case AArch64::LDRBroX:
534 return AArch64::LDRBui;
535 case AArch64::LDRBBroX:
536 return AArch64::LDRBBui;
537 case AArch64::LDRSBXroX:
538 return AArch64::LDRSBXui;
539 case AArch64::LDRSBWroX:
540 return AArch64::LDRSBWui;
541 case AArch64::LDRHroX:
542 return AArch64::LDRHui;
543 case AArch64::LDRHHroX:
544 return AArch64::LDRHHui;
545 case AArch64::LDRSHXroX:
546 return AArch64::LDRSHXui;
547 case AArch64::LDRSHWroX:
548 return AArch64::LDRSHWui;
549 case AArch64::LDRWroX:
550 return AArch64::LDRWui;
551 case AArch64::LDRSroX:
552 return AArch64::LDRSui;
553 case AArch64::LDRSWroX:
554 return AArch64::LDRSWui;
555 case AArch64::LDRDroX:
556 return AArch64::LDRDui;
557 case AArch64::LDRXroX:
558 return AArch64::LDRXui;
559 case AArch64::LDRQroX:
560 return AArch64::LDRQui;
568 case AArch64::STRBui:
569 return AArch64::STRBpost;
570 case AArch64::STRHui:
571 return AArch64::STRHpost;
572 case AArch64::STRSui:
573 case AArch64::STURSi:
574 return AArch64::STRSpost;
575 case AArch64::STRDui:
576 case AArch64::STURDi:
577 return AArch64::STRDpost;
578 case AArch64::STRQui:
579 case AArch64::STURQi:
580 return AArch64::STRQpost;
581 case AArch64::STRBBui:
582 return AArch64::STRBBpost;
583 case AArch64::STRHHui:
584 return AArch64::STRHHpost;
585 case AArch64::STRWui:
586 case AArch64::STURWi:
587 return AArch64::STRWpost;
588 case AArch64::STRXui:
589 case AArch64::STURXi:
590 return AArch64::STRXpost;
591 case AArch64::LDRBui:
592 return AArch64::LDRBpost;
593 case AArch64::LDRHui:
594 return AArch64::LDRHpost;
595 case AArch64::LDRSui:
596 case AArch64::LDURSi:
597 return AArch64::LDRSpost;
598 case AArch64::LDRDui:
599 case AArch64::LDURDi:
600 return AArch64::LDRDpost;
601 case AArch64::LDRQui:
602 case AArch64::LDURQi:
603 return AArch64::LDRQpost;
604 case AArch64::LDRBBui:
605 return AArch64::LDRBBpost;
606 case AArch64::LDRHHui:
607 return AArch64::LDRHHpost;
608 case AArch64::LDRWui:
609 case AArch64::LDURWi:
610 return AArch64::LDRWpost;
611 case AArch64::LDRXui:
612 case AArch64::LDURXi:
613 return AArch64::LDRXpost;
614 case AArch64::LDRSWui:
615 return AArch64::LDRSWpost;
617 return AArch64::LDPSpost;
618 case AArch64::LDPSWi:
619 return AArch64::LDPSWpost;
621 return AArch64::LDPDpost;
623 return AArch64::LDPQpost;
625 return AArch64::LDPWpost;
627 return AArch64::LDPXpost;
629 return AArch64::STPSpost;
631 return AArch64::STPDpost;
633 return AArch64::STPQpost;
635 return AArch64::STPWpost;
637 return AArch64::STPXpost;
639 return AArch64::STGPostIndex;
641 return AArch64::STZGPostIndex;
643 return AArch64::ST2GPostIndex;
644 case AArch64::STZ2Gi:
645 return AArch64::STZ2GPostIndex;
647 return AArch64::STGPpost;
654 unsigned OpcB =
MI.getOpcode();
659 case AArch64::STRSpre:
660 return (OpcB == AArch64::STRSui) || (OpcB == AArch64::STURSi);
661 case AArch64::STRDpre:
662 return (OpcB == AArch64::STRDui) || (OpcB == AArch64::STURDi);
663 case AArch64::STRQpre:
664 return (OpcB == AArch64::STRQui) || (OpcB == AArch64::STURQi);
665 case AArch64::STRWpre:
666 return (OpcB == AArch64::STRWui) || (OpcB == AArch64::STURWi);
667 case AArch64::STRXpre:
668 return (OpcB == AArch64::STRXui) || (OpcB == AArch64::STURXi);
669 case AArch64::LDRSpre:
670 return (OpcB == AArch64::LDRSui) || (OpcB == AArch64::LDURSi);
671 case AArch64::LDRDpre:
672 return (OpcB == AArch64::LDRDui) || (OpcB == AArch64::LDURDi);
673 case AArch64::LDRQpre:
674 return (OpcB == AArch64::LDRQui) || (OpcB == AArch64::LDURQi);
675 case AArch64::LDRWpre:
676 return (OpcB == AArch64::LDRWui) || (OpcB == AArch64::LDURWi);
677 case AArch64::LDRXpre:
678 return (OpcB == AArch64::LDRXui) || (OpcB == AArch64::LDURXi);
679 case AArch64::LDRSWpre:
680 return (OpcB == AArch64::LDRSWui) || (OpcB == AArch64::LDURSWi);
686 int &MinOffset,
int &MaxOffset) {
704 unsigned PairedRegOp = 0) {
705 assert(PairedRegOp < 2 &&
"Unexpected register operand idx.");
711 return MI.getOperand(Idx);
720 int UnscaledStOffset =
724 int UnscaledLdOffset =
728 return (UnscaledStOffset <= UnscaledLdOffset) &&
729 (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
733 unsigned Opc =
MI.getOpcode();
734 return (
Opc == AArch64::STRWui ||
Opc == AArch64::STURWi ||
735 isNarrowStore(
Opc)) &&
740 switch (
MI.getOpcode()) {
744 case AArch64::LDRBBui:
745 case AArch64::LDRHHui:
746 case AArch64::LDRWui:
747 case AArch64::LDRXui:
749 case AArch64::LDURBBi:
750 case AArch64::LDURHHi:
751 case AArch64::LDURWi:
752 case AArch64::LDURXi:
758 unsigned Opc =
MI.getOpcode();
763 case AArch64::STRBui:
764 case AArch64::STRHui:
765 case AArch64::STRSui:
766 case AArch64::STRDui:
767 case AArch64::STRQui:
768 case AArch64::STRXui:
769 case AArch64::STRWui:
770 case AArch64::STRHHui:
771 case AArch64::STRBBui:
772 case AArch64::LDRBui:
773 case AArch64::LDRHui:
774 case AArch64::LDRSui:
775 case AArch64::LDRDui:
776 case AArch64::LDRQui:
777 case AArch64::LDRXui:
778 case AArch64::LDRWui:
779 case AArch64::LDRHHui:
780 case AArch64::LDRBBui:
784 case AArch64::STZ2Gi:
787 case AArch64::STURSi:
788 case AArch64::STURDi:
789 case AArch64::STURQi:
790 case AArch64::STURWi:
791 case AArch64::STURXi:
792 case AArch64::LDURSi:
793 case AArch64::LDURDi:
794 case AArch64::LDURQi:
795 case AArch64::LDURWi:
796 case AArch64::LDURXi:
799 case AArch64::LDPSWi:
828 unsigned Opc =
MI.getOpcode();
834 case AArch64::LDRBroX:
835 case AArch64::LDRBBroX:
836 case AArch64::LDRSBXroX:
837 case AArch64::LDRSBWroX:
840 case AArch64::LDRHroX:
841 case AArch64::LDRHHroX:
842 case AArch64::LDRSHXroX:
843 case AArch64::LDRSHWroX:
846 case AArch64::LDRWroX:
847 case AArch64::LDRSroX:
848 case AArch64::LDRSWroX:
851 case AArch64::LDRDroX:
852 case AArch64::LDRXroX:
855 case AArch64::LDRQroX:
865 case AArch64::ORRWrs:
866 case AArch64::ADDWri:
874 const LdStPairFlags &Flags) {
876 "Expected promotable zero stores.");
884 if (NextI == MergeMI)
887 unsigned Opc =
I->getOpcode();
888 unsigned MergeMIOpc = MergeMI->getOpcode();
889 bool IsScaled = !
TII->hasUnscaledLdStOffset(
Opc);
890 bool IsMergedMIScaled = !
TII->hasUnscaledLdStOffset(MergeMIOpc);
891 int OffsetStride = IsScaled ?
TII->getMemScale(*
I) : 1;
892 int MergeMIOffsetStride = IsMergedMIScaled ?
TII->getMemScale(*MergeMI) : 1;
894 bool MergeForward =
Flags.getMergeForward();
900 const MachineOperand &BaseRegOp =
902 : AArch64InstrInfo::getLdStBaseOp(*
I);
905 int64_t IOffsetInBytes =
907 int64_t MIOffsetInBytes =
912 if (IOffsetInBytes > MIOffsetInBytes)
913 OffsetImm = MIOffsetInBytes;
915 OffsetImm = IOffsetInBytes;
920 if (!
TII->hasUnscaledLdStOffset(NewOpcode)) {
921 int NewOffsetStride =
TII->getMemScale(NewOpcode);
922 assert(((OffsetImm % NewOffsetStride) == 0) &&
923 "Offset should be a multiple of the store memory scale");
924 OffsetImm = OffsetImm / NewOffsetStride;
930 MachineInstrBuilder MIB;
932 .
addReg(isNarrowStore(
Opc) ? AArch64::WZR : AArch64::XZR)
936 .setMIFlags(
I->mergeFlagsWith(*MergeMI));
939 LLVM_DEBUG(
dbgs() <<
"Creating wider store. Replacing instructions:\n ");
948 I->eraseFromParent();
949 MergeMI->eraseFromParent();
959 auto MBB =
MI.getParent();
967 return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() &&
968 TRI->regsOverlap(MOP.getReg(), DefReg);
982 if (MOP.isReg() && MOP.isKill())
986 if (MOP.isReg() && !MOP.isKill())
987 Units.
addReg(MOP.getReg());
994 unsigned InstrNumToSet,
1001 unsigned OperandNo = 0;
1002 bool RegFound =
false;
1003 for (
const auto Op : MergedInstr.
operands()) {
1004 if (
Op.getReg() ==
Reg) {
1013 {InstrNumToSet, OperandNo});
1019 const LdStPairFlags &Flags) {
1026 if (NextI == Paired)
1029 int SExtIdx =
Flags.getSExtIdx();
1032 bool IsUnscaled =
TII->hasUnscaledLdStOffset(
Opc);
1033 int OffsetStride = IsUnscaled ?
TII->getMemScale(*
I) : 1;
1035 bool MergeForward =
Flags.getMergeForward();
1037 std::optional<MCPhysReg> RenameReg =
Flags.getRenameReg();
1040 DefinedInBB.
addReg(*RenameReg);
1044 auto GetMatchingSubReg =
1045 [
this, RenameReg](
const TargetRegisterClass *
C) ->
MCPhysReg {
1047 TRI->sub_and_superregs_inclusive(*RenameReg)) {
1048 if (
C->contains(SubOrSuper))
1054 std::function<bool(MachineInstr &,
bool)> UpdateMIs =
1055 [
this, RegToRename, GetMatchingSubReg, MergeForward](MachineInstr &
MI,
1058 bool SeenDef =
false;
1060 MachineOperand &MOP =
MI.getOperand(
OpIdx);
1064 (!MergeForward || !SeenDef ||
1066 TRI->regsOverlap(MOP.
getReg(), RegToRename)) {
1069 "Need renamable operands");
1071 if (
const TargetRegisterClass *RC =
1073 MatchingReg = GetMatchingSubReg(RC);
1077 MatchingReg = GetMatchingSubReg(
1078 TRI->getMinimalPhysRegClass(MOP.
getReg()));
1086 MachineOperand &MOP =
MI.getOperand(
OpIdx);
1088 TRI->regsOverlap(MOP.
getReg(), RegToRename)) {
1091 "Need renamable operands");
1093 if (
const TargetRegisterClass *RC =
1095 MatchingReg = GetMatchingSubReg(RC);
1097 MatchingReg = GetMatchingSubReg(
1098 TRI->getMinimalPhysRegClass(MOP.
getReg()));
1099 assert(MatchingReg != AArch64::NoRegister &&
1100 "Cannot find matching regs for renaming");
1109 TRI, UINT32_MAX, UpdateMIs);
1122 RegToCheck = RegToRename;
1125 MergeForward ? std::next(
I) :
I,
1126 MergeForward ? std::next(Paired) : Paired))
1128 [
this, RegToCheck](
const MachineOperand &MOP) {
1129 return !MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1131 !TRI->regsOverlap(MOP.getReg(), RegToCheck);
1133 "Rename register used between paired instruction, trashing the "
1143 const MachineOperand &BaseRegOp =
1145 : AArch64InstrInfo::getLdStBaseOp(*
I);
1149 bool PairedIsUnscaled =
TII->hasUnscaledLdStOffset(Paired->getOpcode());
1150 if (IsUnscaled != PairedIsUnscaled) {
1154 int MemSize =
TII->getMemScale(*Paired);
1155 if (PairedIsUnscaled) {
1158 assert(!(PairedOffset %
TII->getMemScale(*Paired)) &&
1159 "Offset should be a multiple of the stride!");
1160 PairedOffset /= MemSize;
1162 PairedOffset *= MemSize;
1169 MachineInstr *RtMI, *Rt2MI;
1170 if (
Offset == PairedOffset + OffsetStride &&
1178 SExtIdx = (SExtIdx + 1) % 2;
1186 assert(!(OffsetImm %
TII->getMemScale(*RtMI)) &&
1187 "Unscaled offset cannot be scaled.");
1188 OffsetImm /=
TII->getMemScale(*RtMI);
1192 MachineInstrBuilder MIB;
1197 MachineOperand &PairedRegOp = RtMI == &*Paired ? RegOp0 : RegOp1;
1199 if (RegOp0.
isUse()) {
1200 if (!MergeForward) {
1211 for (
auto It = std::next(
I); It != Paired && PairedRegOp.
isKill(); ++It)
1212 if (It->readsRegister(PairedRegOp.
getReg(),
TRI))
1220 for (MachineInstr &
MI :
1221 make_range(std::next(
I->getIterator()), Paired->getIterator()))
1238 .setMIFlags(
I->mergeFlagsWith(*Paired));
1243 dbgs() <<
"Creating pair load/store. Replacing instructions:\n ");
1248 if (SExtIdx != -1) {
1253 MachineOperand &DstMO = MIB->
getOperand(SExtIdx);
1258 Register DstRegW =
TRI->getSubReg(DstRegX, AArch64::sub_32);
1267 MachineInstrBuilder MIBKill =
1268 BuildMI(*
MBB, InsertionPoint,
DL,
TII->get(TargetOpcode::KILL), DstRegW)
1270 .
addReg(DstRegX, RegState::Define);
1273 MachineInstrBuilder MIBSXTW =
1274 BuildMI(*
MBB, InsertionPoint,
DL,
TII->get(AArch64::SBFMXri), DstRegX)
1314 if (
I->peekDebugInstrNum()) {
1322 unsigned NewInstrNum;
1323 if (DstRegX ==
I->getOperand(0).getReg()) {
1332 if (Paired->peekDebugInstrNum()) {
1340 unsigned NewInstrNum;
1341 if (DstRegX == Paired->getOperand(0).getReg()) {
1354 }
else if (
Opc == AArch64::LDR_ZXI ||
Opc == AArch64::STR_ZXI) {
1360 AArch64::ZPRRegClass.contains(MOp1.
getReg()) &&
"Invalid register.");
1361 MOp0.
setReg(AArch64::Q0 + (MOp0.
getReg() - AArch64::Z0));
1362 MOp1.
setReg(AArch64::Q0 + (MOp1.
getReg() - AArch64::Z0));
1393 if (
I->peekDebugInstrNum()) {
1398 if (Paired->peekDebugInstrNum()) {
1418 SmallSetVector<Register, 4>
Ops;
1419 for (
const MachineOperand &MO :
1421 if (MO.isReg() && MO.isImplicit() && MO.isDef())
1422 Ops.insert(MO.getReg());
1423 for (
const MachineOperand &MO :
1425 if (MO.isReg() && MO.isImplicit() && MO.isDef())
1426 Ops.insert(MO.getReg());
1428 MIB.addDef(
Op, RegState::Implicit);
1430 CopyImplicitOps(
I, Paired);
1433 I->eraseFromParent();
1434 Paired->eraseFromParent();
1443 next_nodbg(LoadI, LoadI->getParent()->end());
1445 int LoadSize =
TII->getMemScale(*LoadI);
1446 int StoreSize =
TII->getMemScale(*StoreI);
1450 bool IsStoreXReg =
TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);
1453 TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) &&
1454 "Unexpected RegClass");
1456 MachineInstr *BitExtMI;
1457 if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) {
1460 if (StRt == LdRt && LoadSize == 8) {
1461 for (MachineInstr &
MI :
make_range(StoreI->getIterator(),
1462 LoadI->getIterator())) {
1463 if (
MI.killsRegister(StRt,
TRI)) {
1464 MI.clearRegisterKills(StRt,
TRI);
1471 LoadI->eraseFromParent();
1476 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1477 TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)
1478 .
addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)
1487 bool IsUnscaled =
TII->hasUnscaledLdStOffset(*LoadI);
1488 assert(IsUnscaled ==
TII->hasUnscaledLdStOffset(*StoreI) &&
1489 "Unsupported ld/st match");
1490 assert(LoadSize <= StoreSize &&
"Invalid load size");
1491 int UnscaledLdOffset =
1495 int UnscaledStOffset =
1499 int Width = LoadSize * 8;
1502 LdRt, AArch64::sub_32, &AArch64::GPR64RegClass))
1505 assert((UnscaledLdOffset >= UnscaledStOffset &&
1506 (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
1509 int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
1510 int Imms = Immr + Width - 1;
1511 if (UnscaledLdOffset == UnscaledStOffset) {
1512 uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12)
1518 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1519 TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),
1524 }
else if (IsStoreXReg && Imms == 31) {
1527 assert(Immr <= Imms &&
"Expected LSR alias of UBFM");
1528 BitExtMI =
BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1529 TII->get(AArch64::UBFMWri),
1530 TRI->getSubReg(DestReg, AArch64::sub_32))
1531 .
addReg(
TRI->getSubReg(StRt, AArch64::sub_32))
1537 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1538 TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri),
1548 for (MachineInstr &
MI :
make_range(StoreI->getIterator(),
1550 if (
MI.killsRegister(StRt,
TRI)) {
1551 MI.clearRegisterKills(StRt,
TRI);
1566 LoadI->eraseFromParent();
1576 if (
Offset % OffsetStride)
1588 return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
1605bool AArch64LoadStoreOpt::findMatchingStore(
1610 MachineInstr &LoadMI = *
I;
1620 ModifiedRegUnits.
clear();
1621 UsedRegUnits.
clear();
1626 MachineInstr &
MI = *
MBBI;
1630 if (!
MI.isTransient())
1656 if (!ModifiedRegUnits.
available(BaseReg))
1674 LdStPairFlags &Flags,
1677 if (
MI.hasOrderedMemoryRef() ||
TII->isLdStPairSuppressed(
MI))
1682 !
TII->isLdStPairSuppressed(FirstMI) &&
1683 "FirstMI shouldn't get here if either of these checks are true.");
1690 unsigned OpcB =
MI.getOpcode();
1698 if (OpcA == AArch64::LDR_ZXI || OpcA == AArch64::STR_ZXI ||
1699 OpcB == AArch64::LDR_ZXI || OpcB == AArch64::STR_ZXI)
1707 bool IsValidLdStrOpc, PairIsValidLdStrOpc;
1709 assert(IsValidLdStrOpc &&
1710 "Given Opc should be a Load or Store with an immediate");
1713 Flags.setSExtIdx(NonSExtOpc == OpcA ? 1 : 0);
1719 if (!PairIsValidLdStrOpc)
1724 if (isNarrowStore(OpcA) || isNarrowStore(OpcB))
1727 TII->getMemScale(FirstMI) ==
TII->getMemScale(
MI);
1736 return TII->hasUnscaledLdStOffset(OpcA) !=
TII->hasUnscaledLdStOffset(OpcB) &&
1745 auto *RegClass =
TRI->getMinimalPhysRegClass(MOP.
getReg());
1752 if (RegClass->HasDisjunctSubRegs && RegClass->CoveredBySubRegs &&
1753 (
TRI->getSubRegisterClass(RegClass, AArch64::dsub0) ||
1754 TRI->getSubRegisterClass(RegClass, AArch64::qsub0) ||
1755 TRI->getSubRegisterClass(RegClass, AArch64::zsub0))) {
1758 <<
" Cannot rename operands with multiple disjunct subregisters ("
1769 return TRI->isSuperOrSubRegisterEq(
1792 return MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1793 MOP.isImplicit() && MOP.isKill() &&
1794 TRI->regsOverlap(RegToRename, MOP.getReg());
1800 bool FoundDef =
false;
1831 if (
MI.isPseudo()) {
1832 LLVM_DEBUG(
dbgs() <<
" Cannot rename pseudo/bundle instruction\n");
1836 for (
auto &MOP :
MI.operands()) {
1838 !
TRI->regsOverlap(MOP.
getReg(), RegToRename))
1848 for (
auto &MOP :
MI.operands()) {
1850 !
TRI->regsOverlap(MOP.
getReg(), RegToRename))
1867 LLVM_DEBUG(
dbgs() <<
" Did not find definition for register in BB\n");
1895 LLVM_DEBUG(dbgs() <<
"Checking " << MI);
1897 if (MI.getFlag(MachineInstr::FrameSetup)) {
1898 LLVM_DEBUG(dbgs() <<
" Cannot rename framesetup instructions "
1903 for (
auto &MOP :
MI.operands()) {
1904 if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1905 !TRI->regsOverlap(MOP.getReg(), RegToRename))
1907 if (!canRenameMOP(MOP, TRI)) {
1908 LLVM_DEBUG(dbgs() <<
" Cannot rename " << MOP <<
" in " << MI);
1934 auto AnySubOrSuperRegCalleePreserved = [&MF,
TRI](
MCPhysReg PR) {
1935 return any_of(
TRI->sub_and_superregs_inclusive(PR),
1937 return TRI->isCalleeSavedPhysReg(SubOrSuper, MF);
1943 auto CanBeUsedForAllClasses = [&RequiredClasses,
TRI](
MCPhysReg PR) {
1946 TRI->sub_and_superregs_inclusive(PR),
1947 [
C](
MCPhysReg SubOrSuper) { return C->contains(SubOrSuper); });
1951 auto *RegClass =
TRI->getMinimalPhysRegClass(
Reg);
1954 !
RegInfo.isReserved(PR) && !AnySubOrSuperRegCalleePreserved(PR) &&
1955 CanBeUsedForAllClasses(PR)) {
1963 <<
TRI->getRegClassName(RegClass) <<
"\n");
1964 return std::nullopt;
1975 std::optional<MCPhysReg> RenameReg;
1984 const bool IsLoad = FirstMI.
mayLoad();
1986 if (!MaybeCanRename) {
1989 RequiredClasses,
TRI)};
1995 if (*MaybeCanRename) {
1997 RequiredClasses,
TRI);
2006 LdStPairFlags &Flags,
unsigned Limit,
2007 bool FindNarrowMerge) {
2011 MachineInstr &FirstMI = *
I;
2015 bool IsUnscaled =
TII->hasUnscaledLdStOffset(FirstMI);
2019 int OffsetStride = IsUnscaled ?
TII->getMemScale(FirstMI) : 1;
2022 std::optional<bool> MaybeCanRename;
2024 MaybeCanRename = {
false};
2026 SmallPtrSet<const TargetRegisterClass *, 5> RequiredClasses;
2027 LiveRegUnits UsedInBetween;
2030 Flags.clearRenameReg();
2034 ModifiedRegUnits.
clear();
2035 UsedRegUnits.
clear();
2038 SmallVector<MachineInstr *, 4> MemInsns;
2043 MachineInstr &
MI = *
MBBI;
2050 if (!
MI.isTransient())
2053 Flags.setSExtIdx(-1);
2056 assert(
MI.mayLoadOrStore() &&
"Expected memory operation.");
2065 bool MIIsUnscaled =
TII->hasUnscaledLdStOffset(
MI);
2066 if (IsUnscaled != MIIsUnscaled) {
2070 int MemSize =
TII->getMemScale(
MI);
2074 if (MIOffset % MemSize) {
2080 MIOffset /= MemSize;
2082 MIOffset *= MemSize;
2088 if (BaseReg == MIBaseReg) {
2094 bool IsOutOfBounds = MIOffset !=
TII->getMemScale(
MI);
2095 bool IsBaseRegUsed = !UsedRegUnits.
available(
2097 bool IsBaseRegModified = !ModifiedRegUnits.
available(
2102 bool IsMIRegTheSame =
2105 if (IsOutOfBounds || IsBaseRegUsed || IsBaseRegModified ||
2113 if ((
Offset != MIOffset + OffsetStride) &&
2114 (
Offset + OffsetStride != MIOffset)) {
2123 if (FindNarrowMerge) {
2128 if ((!IsUnscaled &&
alignTo(MinOffset, 2) != MinOffset) ||
2145 <<
"keep looking.\n");
2151 if (IsUnscaled && (
alignTo(MinOffset, OffsetStride) != MinOffset)) {
2156 <<
"Offset doesn't fit due to alignment requirements, "
2157 <<
"keep looking.\n");
2168 if (!ModifiedRegUnits.
available(BaseReg))
2171 const bool SameLoadReg =
MayLoad &&
TRI->isSuperOrSubRegisterEq(
2178 bool RtNotModified =
2180 bool RtNotUsed = !(
MI.mayLoad() && !SameLoadReg &&
2183 LLVM_DEBUG(
dbgs() <<
"Checking, can combine 2nd into 1st insn:\n"
2185 << (RtNotModified ?
"true" :
"false") <<
"\n"
2187 << (RtNotUsed ?
"true" :
"false") <<
"\n");
2189 if (RtNotModified && RtNotUsed && !
mayAlias(
MI, MemInsns, AA)) {
2194 std::optional<MCPhysReg> RenameReg =
2196 Reg, DefinedInBB, UsedInBetween,
2197 RequiredClasses,
TRI);
2203 <<
"keep looking.\n");
2206 Flags.setRenameReg(*RenameReg);
2209 Flags.setMergeForward(
false);
2211 Flags.clearRenameReg();
2222 LLVM_DEBUG(
dbgs() <<
"Checking, can combine 1st into 2nd insn:\n"
2224 <<
"' not modified: "
2225 << (RtNotModified ?
"true" :
"false") <<
"\n");
2227 if (RtNotModified && !
mayAlias(FirstMI, MemInsns, AA)) {
2229 Flags.setMergeForward(
true);
2230 Flags.clearRenameReg();
2235 MaybeCanRename, FirstMI,
MI,
Reg, DefinedInBB, UsedInBetween,
2236 RequiredClasses,
TRI);
2238 Flags.setMergeForward(
true);
2239 Flags.setRenameReg(*RenameReg);
2240 MBBIWithRenameReg =
MBBI;
2243 LLVM_DEBUG(
dbgs() <<
"Unable to combine these instructions due to "
2244 <<
"interference in between, keep looking.\n");
2248 if (
Flags.getRenameReg())
2249 return MBBIWithRenameReg;
2263 if (!ModifiedRegUnits.
available(BaseReg)) {
2269 if (
MI.mayLoadOrStore())
2277 assert((
MI.getOpcode() == AArch64::SUBXri ||
2278 MI.getOpcode() == AArch64::ADDXri) &&
2279 "Expected a register update instruction");
2280 auto End =
MI.getParent()->end();
2281 if (MaybeCFI == End ||
2282 MaybeCFI->getOpcode() != TargetOpcode::CFI_INSTRUCTION ||
2285 MI.getOperand(0).getReg() != AArch64::SP)
2289 unsigned CFIIndex = MaybeCFI->getOperand(0).getCFIIndex();
2300std::optional<MachineBasicBlock::iterator> AArch64LoadStoreOpt::mergeUpdateInsn(
2302 bool IsForward,
bool IsPreIdx,
bool MergeEither) {
2303 assert((Update->getOpcode() == AArch64::ADDXri ||
2304 Update->getOpcode() == AArch64::SUBXri) &&
2305 "Unexpected base register update instruction to merge!");
2321 if (std::any_of(std::next(CFI),
I, [](
const auto &Insn) {
2322 return Insn.getOpcode() == TargetOpcode::CFI_INSTRUCTION;
2324 return std::nullopt;
2326 MachineBasicBlock *
MBB = InsertPt->getParent();
2335 if (NextI == Update)
2338 int Value = Update->getOperand(2).getImm();
2340 "Can't merge 1 << 12 offset into pre-/post-indexed load / store");
2341 if (Update->getOpcode() == AArch64::SUBXri)
2346 MachineInstrBuilder MIB;
2347 int Scale, MinOffset, MaxOffset;
2351 MIB =
BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
2353 .
add(Update->getOperand(0))
2361 MIB =
BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
2363 .
add(Update->getOperand(0))
2388 I->eraseFromParent();
2389 Update->eraseFromParent();
2397 unsigned Offset,
int Scale) {
2398 assert((Update->getOpcode() == AArch64::MOVKWi) &&
2399 "Unexpected const mov instruction to merge!");
2403 MachineInstr &MemMI = *
I;
2404 unsigned Mask = (1 << 12) * Scale - 1;
2409 MachineInstrBuilder AddMIB, MemMIB;
2413 BuildMI(*
I->getParent(),
I,
I->getDebugLoc(),
TII->get(AArch64::ADDXri))
2421 MemMIB =
BuildMI(*
I->getParent(),
I,
I->getDebugLoc(),
TII->get(NewOpc))
2429 ++NumConstOffsetFolded;
2444 I->eraseFromParent();
2445 PrevI->eraseFromParent();
2446 Update->eraseFromParent();
2451bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
2453 unsigned BaseReg,
int Offset) {
2454 switch (
MI.getOpcode()) {
2457 case AArch64::SUBXri:
2458 case AArch64::ADDXri:
2461 if (!
MI.getOperand(2).isImm())
2469 if (
MI.getOperand(0).getReg() != BaseReg ||
2470 MI.getOperand(1).getReg() != BaseReg)
2473 int UpdateOffset =
MI.getOperand(2).getImm();
2474 if (
MI.getOpcode() == AArch64::SUBXri)
2475 UpdateOffset = -UpdateOffset;
2479 int Scale, MinOffset, MaxOffset;
2481 if (UpdateOffset % Scale != 0)
2485 int ScaledOffset = UpdateOffset / Scale;
2486 if (ScaledOffset > MaxOffset || ScaledOffset < MinOffset)
2498bool AArch64LoadStoreOpt::isMatchingMovConstInsn(MachineInstr &MemMI,
2504 if (
MI.getOpcode() == AArch64::MOVKWi &&
2505 TRI->isSuperOrSubRegisterEq(IndexReg,
MI.getOperand(1).getReg())) {
2514 MachineInstr &MovzMI = *
MBBI;
2516 if (MovzMI.
getOpcode() == AArch64::MOVZWi &&
2519 unsigned High =
MI.getOperand(2).getImm() <<
MI.getOperand(3).getImm();
2522 return Offset >> 24 == 0;
2531 MachineInstr &MemMI = *
I;
2536 TII->getMemScale(MemMI);
2541 if (MIUnscaledOffset != UnscaledOffset)
2552 for (
unsigned i = 0, e = IsPairedInsn ? 2 : 1; i !=
e; ++i) {
2554 if (DestReg == BaseReg ||
TRI->isSubRegister(BaseReg, DestReg))
2561 ModifiedRegUnits.
clear();
2562 UsedRegUnits.
clear();
2568 const bool BaseRegSP =
BaseReg == AArch64::SP;
2577 MachineBasicBlock *CurMBB =
I->getParent();
2584 MachineInstr &
MI = *
MBBI;
2588 if (!
MI.isTransient())
2592 if (isMatchingUpdateInsn(*
I,
MI, BaseReg, UnscaledOffset))
2603 if (!ModifiedRegUnits.
available(BaseReg) ||
2605 (BaseRegSP &&
MBBI->mayLoadOrStore()))
2609 if (!VisitSucc || Limit <=
Count)
2614 MachineBasicBlock *SuccToVisit =
nullptr;
2615 unsigned LiveSuccCount = 0;
2616 for (MachineBasicBlock *Succ : CurMBB->
successors()) {
2617 for (MCRegAliasIterator AI(BaseReg,
TRI,
true); AI.isValid(); ++AI) {
2618 if (Succ->isLiveIn(*AI)) {
2619 if (LiveSuccCount++)
2621 if (Succ->pred_size() == 1)
2629 CurMBB = SuccToVisit;
2640 MachineInstr &MemMI = *
I;
2642 MachineFunction &MF = *MemMI.
getMF();
2650 : AArch64::NoRegister};
2659 for (
unsigned i = 0, e = IsPairedInsn ? 2 : 1; i !=
e; ++i)
2660 if (DestReg[i] == BaseReg ||
TRI->isSubRegister(BaseReg, DestReg[i]))
2664 const bool BaseRegSP =
BaseReg == AArch64::SP;
2672 const AArch64Subtarget &Subtarget = MF.
getSubtarget<AArch64Subtarget>();
2673 unsigned RedZoneSize =
2678 ModifiedRegUnits.
clear();
2679 UsedRegUnits.
clear();
2681 bool MemAccessBeforeSPPreInc =
false;
2685 MachineInstr &
MI = *
MBBI;
2689 if (!
MI.isTransient())
2693 if (isMatchingUpdateInsn(*
I,
MI, BaseReg,
Offset)) {
2696 if (MemAccessBeforeSPPreInc &&
MBBI->getOperand(2).getImm() > RedZoneSize)
2706 if (!ModifiedRegUnits.
available(BaseReg) ||
2714 if (
MI.mayLoadOrStore() ||
MI.hasUnmodeledSideEffects() ||
2715 (DestReg[0] != AArch64::NoRegister &&
2716 !(ModifiedRegUnits.
available(DestReg[0]) &&
2718 (DestReg[1] != AArch64::NoRegister &&
2719 !(ModifiedRegUnits.
available(DestReg[1]) &&
2721 MergeEither =
false;
2726 if (BaseRegSP &&
MBBI->mayLoadOrStore())
2727 MemAccessBeforeSPPreInc =
true;
2733AArch64LoadStoreOpt::findMatchingConstOffsetBackward(
2737 MachineInstr &MemMI = *
I;
2756 ModifiedRegUnits.
clear();
2757 UsedRegUnits.
clear();
2761 MachineInstr &
MI = *
MBBI;
2765 if (!
MI.isTransient())
2769 if (isMatchingMovConstInsn(*
I,
MI, IndexReg,
Offset)) {
2778 if (!ModifiedRegUnits.
available(IndexReg) ||
2786bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
2788 MachineInstr &
MI = *
MBBI;
2790 if (
MI.hasOrderedMemoryRef())
2804 ++NumLoadsFromStoresPromoted;
2808 MBBI = promoteLoadFromStore(
MBBI, StoreI);
2815bool AArch64LoadStoreOpt::tryToMergeZeroStInst(
2818 MachineInstr &
MI = *
MBBI;
2821 if (!
TII->isCandidateToMergeOrPair(
MI))
2825 LdStPairFlags
Flags;
2829 ++NumZeroStoresPromoted;
2833 MBBI = mergeNarrowZeroStores(
MBBI, MergeMI, Flags);
2842 MachineInstr &
MI = *
MBBI;
2845 if (!
TII->isCandidateToMergeOrPair(
MI))
2849 if (
MI.mayLoad() && Subtarget->hasDisableLdp())
2853 if (
MI.mayStore() && Subtarget->hasDisableStp())
2859 bool IsUnscaled =
TII->hasUnscaledLdStOffset(
MI);
2861 int OffsetStride = IsUnscaled ?
TII->getMemScale(
MI) : 1;
2869 LdStPairFlags
Flags;
2875 auto Prev = std::prev(
MBBI);
2879 MachineMemOperand *MemOp =
2880 MI.memoperands_empty() ? nullptr :
MI.memoperands().front();
2885 if ((
MI.mayLoad() && Subtarget->hasLdpAlignedOnly()) ||
2886 (
MI.mayStore() && Subtarget->hasStpAlignedOnly())) {
2888 if (!MemOp || !MemOp->getMemoryType().isValid()) {
2889 NumFailedAlignmentCheck++;
2895 uint64_t MemAlignment = MemOp->getAlign().value();
2896 uint64_t TypeAlignment =
2897 Align(MemOp->getSize().getValue().getKnownMinValue()).value();
2899 if (MemAlignment < 2 * TypeAlignment) {
2900 NumFailedAlignmentCheck++;
2906 if (
TII->hasUnscaledLdStOffset(
MI))
2907 ++NumUnscaledPairCreated;
2909 MBBI = mergePairedInsns(
MBBI, Paired, Flags);
2912 for (
auto I = std::next(Prev);
I !=
MBBI;
I++)
2920bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
2922 MachineInstr &
MI = *
MBBI;
2934 if (
auto NextI = mergeUpdateInsn(
MBBI, Update,
false,
2943 if (
TII->hasUnscaledLdStOffset(
MI.getOpcode()))
2952 Update = findMatchingUpdateInsnBackward(
MBBI,
UpdateLimit, MergeEither);
2955 if (
auto NextI = mergeUpdateInsn(
MBBI, Update,
true,
2956 true, MergeEither)) {
2965 int UnscaledOffset =
2973 Update = findMatchingUpdateInsnForward(
MBBI, UnscaledOffset,
UpdateLimit);
2976 if (
auto NextI = mergeUpdateInsn(
MBBI, Update,
false,
2989 MachineInstr &
MI = *
MBBI;
2994 if (
TII->hasUnscaledLdStOffset(
MI.getOpcode()))
3006 if (Update !=
E && (
Offset & (Scale - 1)) == 0) {
3015bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &
MBB,
3016 bool EnableNarrowZeroStOpt) {
3048 if (EnableNarrowZeroStOpt)
3072 DefinedInBB.
clear();
3081 if (
TII->isPairableLdStInst(*
MBBI) && tryToPairLdStInst(
MBBI))
3120bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
3133 bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign();
3134 for (
auto &
MBB : Fn) {
3152bool AArch64LoadStoreOptLegacy::runOnMachineFunction(MachineFunction &MF) {
3155 AArch64LoadStoreOpt Impl;
3156 Impl.AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
3157 return Impl.runOnMachineFunction(MF);
3163 return new AArch64LoadStoreOptLegacy();
3169 AArch64LoadStoreOpt Impl;
3173 bool Changed = Impl.runOnMachineFunction(MF);
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
static cl::opt< bool > EnableRenaming("aarch64-load-store-renaming", cl::init(true), cl::Hidden)
static MachineOperand & getLdStRegOp(MachineInstr &MI, unsigned PairedRegOp=0)
static bool isPromotableLoadFromStore(MachineInstr &MI)
static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale, int &MinOffset, int &MaxOffset)
static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride)
static unsigned getMatchingPairOpcode(unsigned Opc)
static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, LdStPairFlags &Flags, const AArch64InstrInfo *TII)
static std::optional< MCPhysReg > tryToFindRegisterToRename(const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static bool needsWinCFI(const MachineFunction *MF)
static bool canRenameUntilSecondLoad(MachineInstr &FirstLoad, MachineInstr &SecondLoad, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static std::optional< MCPhysReg > findRenameRegForSameLdStRegPair(std::optional< bool > MaybeCanRename, MachineInstr &FirstMI, MachineInstr &MI, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
static cl::opt< unsigned > LdStLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden)
static bool canRenameMOP(const MachineOperand &MOP, const TargetRegisterInfo *TRI)
static bool isRewritableImplicitDef(const MachineOperand &MO)
static unsigned getPreIndexedOpcode(unsigned Opc)
#define AARCH64_LOAD_STORE_OPT_NAME
static void addDebugSubstitutionsToTable(MachineFunction *MF, unsigned InstrNumToSet, MachineInstr &OriginalInstr, MachineInstr &MergedInstr)
This function will add a new entry into the debugValueSubstitutions table when two instruction have b...
static cl::opt< unsigned > UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden)
static bool isPromotableZeroStoreInst(MachineInstr &MI)
static unsigned getMatchingWideOpcode(unsigned Opc)
static unsigned getMatchingNonSExtOpcode(unsigned Opc, bool *IsValidLdStrOpc=nullptr)
static MachineBasicBlock::iterator maybeMoveCFI(MachineInstr &MI, MachineBasicBlock::iterator MaybeCFI)
static bool isTagStore(const MachineInstr &MI)
static unsigned isMatchingStore(MachineInstr &LoadInst, MachineInstr &StoreInst)
static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg, const TargetRegisterInfo *TRI, unsigned Limit, std::function< bool(MachineInstr &, bool)> &Fn)
static unsigned getPostIndexedOpcode(unsigned Opc)
static bool isMergeableLdStUpdate(MachineInstr &MI, AArch64FunctionInfo &AFI)
static cl::opt< unsigned > LdStConstLimit("aarch64-load-store-const-scan-limit", cl::init(10), cl::Hidden)
static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst, MachineInstr &StoreInst, const AArch64InstrInfo *TII)
static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI)
static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale)
static void updateDefinedRegisters(MachineInstr &MI, LiveRegUnits &Units, const TargetRegisterInfo *TRI)
static bool canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static unsigned getBaseAddressOpcode(unsigned Opc)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
const HexagonInstrInfo * TII
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
static const MachineOperand & getLdStAmountOp(const MachineInstr &MI)
Returns the shift amount operator of a load/store.
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
const AArch64RegisterInfo * getRegisterInfo() const override
const AArch64InstrInfo * getInstrInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
bool isLittleEndian() const
unsigned getRedZoneSize(const Function &F) const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
Represents analyses that only rely on functions' control flow.
static bool shouldExecute(CounterInfo &Counter)
FunctionPass class - This class is used to implement most global optimizations.
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
A set of register units used to track register liveness.
static void accumulateUsedDefed(const MachineInstr &MI, LiveRegUnits &ModifiedRegUnits, LiveRegUnits &UsedRegUnits, const TargetRegisterInfo *TRI)
For a machine instruction MI, adds all register units used in UsedRegUnits and defined or clobbered i...
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
void removeReg(MCRegister Reg)
Removes all register units covered by physical register Reg.
LLVM_ABI void addLiveIns(const MachineBasicBlock &MBB)
Adds registers living into block MBB.
void clear()
Clears the set.
LLVM_ABI void accumulate(const MachineInstr &MI)
Adds all register units used, defined or clobbered in MI.
An instruction for reading from memory.
bool usesWindowsCFI() const
OpType getOperation() const
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const std::vector< MCCFIInstruction > & getFrameInstructions() const
Returns a reference to a list of cfi instructions in the function's prologue.
void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair, unsigned SubReg=0)
Create a substitution between one <instr,operand> value to a different, new value.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
LLVM_ABI bool mayAlias(BatchAAResults *AA, const MachineInstr &Other, bool UseTBAA) const
Returns true if this instruction's memory access aliases the memory access of Other.
unsigned peekDebugInstrNum() const
Examine the instruction number of this MachineInstr.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
bool isPseudo(QueryType Type=IgnoreBundle) const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
LLVM_ABI void dump() const
LLVM_ABI unsigned getDebugInstrNum()
Fetch the instruction number of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
void setImplicit(bool Val=true)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
LLVM_ABI bool isRenamable() const
isRenamable - Returns true if this register may be renamed, i.e.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Wrapper class representing virtual and physical registers.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
self_iterator getIterator()
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
initializer< Ty > init(const Ty &Val)
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< filter_iterator< ConstMIBundleOperands, bool(*)(const MachineOperand &)> > phys_regs_and_masks(const MachineInstr &MI)
Returns an iterator range over all physical register and mask operands for MI and bundled instruction...
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
FunctionPass * createAArch64LoadStoreOptLegacyPass()
createAArch64LoadStoreOptimizationPass - returns an instance of the load / store optimization pass.
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
DWARFExpression::Operation Op
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.