54#define DEBUG_TYPE "aarch64-ldst-opt"
56STATISTIC(NumPairCreated,
"Number of load/store pair instructions generated");
57STATISTIC(NumPostFolded,
"Number of post-index updates folded");
58STATISTIC(NumPreFolded,
"Number of pre-index updates folded");
60 "Number of load/store from unscaled generated");
61STATISTIC(NumZeroStoresPromoted,
"Number of narrow zero stores promoted");
62STATISTIC(NumLoadsFromStoresPromoted,
"Number of loads from stores promoted");
63STATISTIC(NumFailedAlignmentCheck,
"Number of load/store pair transformation "
64 "not passed the alignment check");
66 "Number of const offset of index address folded");
69 "Controls which pairs are considered for renaming");
89#define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
93using LdStPairFlags =
struct LdStPairFlags {
97 bool MergeForward =
false;
108 std::optional<MCPhysReg> RenameReg;
110 LdStPairFlags() =
default;
112 void setMergeForward(
bool V =
true) { MergeForward = V; }
113 bool getMergeForward()
const {
return MergeForward; }
115 void setSExtIdx(
int V) { SExtIdx = V; }
116 int getSExtIdx()
const {
return SExtIdx; }
118 void setRenameReg(
MCPhysReg R) { RenameReg = R; }
119 void clearRenameReg() { RenameReg = std::nullopt; }
120 std::optional<MCPhysReg> getRenameReg()
const {
return RenameReg; }
146 LdStPairFlags &Flags,
148 bool FindNarrowMerge);
159 const LdStPairFlags &Flags);
165 const LdStPairFlags &Flags);
177 int UnscaledOffset,
unsigned Limit);
199 unsigned BaseReg,
int Offset);
202 unsigned IndexReg,
unsigned &
Offset);
205 std::optional<MachineBasicBlock::iterator>
208 bool IsPreIdx,
bool MergeEither);
241char AArch64LoadStoreOpt::ID = 0;
248static
bool isNarrowStore(
unsigned Opc) {
252 case AArch64::STRBBui:
253 case AArch64::STURBBi:
254 case AArch64::STRHHui:
255 case AArch64::STURHHi:
263 switch (
MI.getOpcode()) {
269 case AArch64::STZ2Gi:
275 bool *IsValidLdStrOpc =
nullptr) {
277 *IsValidLdStrOpc =
true;
281 *IsValidLdStrOpc =
false;
282 return std::numeric_limits<unsigned>::max();
283 case AArch64::STRDui:
284 case AArch64::STURDi:
285 case AArch64::STRDpre:
286 case AArch64::STRQui:
287 case AArch64::STURQi:
288 case AArch64::STRQpre:
289 case AArch64::STRBBui:
290 case AArch64::STURBBi:
291 case AArch64::STRHHui:
292 case AArch64::STURHHi:
293 case AArch64::STRWui:
294 case AArch64::STRWpre:
295 case AArch64::STURWi:
296 case AArch64::STRXui:
297 case AArch64::STRXpre:
298 case AArch64::STURXi:
299 case AArch64::STR_ZXI:
300 case AArch64::LDRDui:
301 case AArch64::LDURDi:
302 case AArch64::LDRDpre:
303 case AArch64::LDRQui:
304 case AArch64::LDURQi:
305 case AArch64::LDRQpre:
306 case AArch64::LDRWui:
307 case AArch64::LDURWi:
308 case AArch64::LDRWpre:
309 case AArch64::LDRXui:
310 case AArch64::LDURXi:
311 case AArch64::LDRXpre:
312 case AArch64::STRSui:
313 case AArch64::STURSi:
314 case AArch64::STRSpre:
315 case AArch64::LDRSui:
316 case AArch64::LDURSi:
317 case AArch64::LDRSpre:
318 case AArch64::LDR_ZXI:
320 case AArch64::LDRSWui:
321 return AArch64::LDRWui;
322 case AArch64::LDURSWi:
323 return AArch64::LDURWi;
324 case AArch64::LDRSWpre:
325 return AArch64::LDRWpre;
333 case AArch64::STRBBui:
334 return AArch64::STRHHui;
335 case AArch64::STRHHui:
336 return AArch64::STRWui;
337 case AArch64::STURBBi:
338 return AArch64::STURHHi;
339 case AArch64::STURHHi:
340 return AArch64::STURWi;
341 case AArch64::STURWi:
342 return AArch64::STURXi;
343 case AArch64::STRWui:
344 return AArch64::STRXui;
352 case AArch64::STRSui:
353 case AArch64::STURSi:
354 return AArch64::STPSi;
355 case AArch64::STRSpre:
356 return AArch64::STPSpre;
357 case AArch64::STRDui:
358 case AArch64::STURDi:
359 return AArch64::STPDi;
360 case AArch64::STRDpre:
361 return AArch64::STPDpre;
362 case AArch64::STRQui:
363 case AArch64::STURQi:
364 case AArch64::STR_ZXI:
365 return AArch64::STPQi;
366 case AArch64::STRQpre:
367 return AArch64::STPQpre;
368 case AArch64::STRWui:
369 case AArch64::STURWi:
370 return AArch64::STPWi;
371 case AArch64::STRWpre:
372 return AArch64::STPWpre;
373 case AArch64::STRXui:
374 case AArch64::STURXi:
375 return AArch64::STPXi;
376 case AArch64::STRXpre:
377 return AArch64::STPXpre;
378 case AArch64::LDRSui:
379 case AArch64::LDURSi:
380 return AArch64::LDPSi;
381 case AArch64::LDRSpre:
382 return AArch64::LDPSpre;
383 case AArch64::LDRDui:
384 case AArch64::LDURDi:
385 return AArch64::LDPDi;
386 case AArch64::LDRDpre:
387 return AArch64::LDPDpre;
388 case AArch64::LDRQui:
389 case AArch64::LDURQi:
390 case AArch64::LDR_ZXI:
391 return AArch64::LDPQi;
392 case AArch64::LDRQpre:
393 return AArch64::LDPQpre;
394 case AArch64::LDRWui:
395 case AArch64::LDURWi:
396 return AArch64::LDPWi;
397 case AArch64::LDRWpre:
398 return AArch64::LDPWpre;
399 case AArch64::LDRXui:
400 case AArch64::LDURXi:
401 return AArch64::LDPXi;
402 case AArch64::LDRXpre:
403 return AArch64::LDPXpre;
404 case AArch64::LDRSWui:
405 case AArch64::LDURSWi:
406 return AArch64::LDPSWi;
407 case AArch64::LDRSWpre:
408 return AArch64::LDPSWpre;
419 case AArch64::LDRBBui:
420 return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui ||
421 StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
422 case AArch64::LDURBBi:
423 return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi ||
424 StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
425 case AArch64::LDRHHui:
426 return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui ||
427 StOpc == AArch64::STRXui;
428 case AArch64::LDURHHi:
429 return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi ||
430 StOpc == AArch64::STURXi;
431 case AArch64::LDRWui:
432 return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
433 case AArch64::LDURWi:
434 return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
435 case AArch64::LDRXui:
436 return StOpc == AArch64::STRXui;
437 case AArch64::LDURXi:
438 return StOpc == AArch64::STURXi;
450 case AArch64::STRBui:
451 return AArch64::STRBpre;
452 case AArch64::STRHui:
453 return AArch64::STRHpre;
454 case AArch64::STRSui:
455 return AArch64::STRSpre;
456 case AArch64::STRDui:
457 return AArch64::STRDpre;
458 case AArch64::STRQui:
459 return AArch64::STRQpre;
460 case AArch64::STRBBui:
461 return AArch64::STRBBpre;
462 case AArch64::STRHHui:
463 return AArch64::STRHHpre;
464 case AArch64::STRWui:
465 return AArch64::STRWpre;
466 case AArch64::STRXui:
467 return AArch64::STRXpre;
468 case AArch64::LDRBui:
469 return AArch64::LDRBpre;
470 case AArch64::LDRHui:
471 return AArch64::LDRHpre;
472 case AArch64::LDRSui:
473 return AArch64::LDRSpre;
474 case AArch64::LDRDui:
475 return AArch64::LDRDpre;
476 case AArch64::LDRQui:
477 return AArch64::LDRQpre;
478 case AArch64::LDRBBui:
479 return AArch64::LDRBBpre;
480 case AArch64::LDRHHui:
481 return AArch64::LDRHHpre;
482 case AArch64::LDRWui:
483 return AArch64::LDRWpre;
484 case AArch64::LDRXui:
485 return AArch64::LDRXpre;
486 case AArch64::LDRSWui:
487 return AArch64::LDRSWpre;
489 return AArch64::LDPSpre;
490 case AArch64::LDPSWi:
491 return AArch64::LDPSWpre;
493 return AArch64::LDPDpre;
495 return AArch64::LDPQpre;
497 return AArch64::LDPWpre;
499 return AArch64::LDPXpre;
501 return AArch64::STPSpre;
503 return AArch64::STPDpre;
505 return AArch64::STPQpre;
507 return AArch64::STPWpre;
509 return AArch64::STPXpre;
511 return AArch64::STGPreIndex;
513 return AArch64::STZGPreIndex;
515 return AArch64::ST2GPreIndex;
516 case AArch64::STZ2Gi:
517 return AArch64::STZ2GPreIndex;
519 return AArch64::STGPpre;
528 case AArch64::LDRBroX:
529 return AArch64::LDRBui;
530 case AArch64::LDRBBroX:
531 return AArch64::LDRBBui;
532 case AArch64::LDRSBXroX:
533 return AArch64::LDRSBXui;
534 case AArch64::LDRSBWroX:
535 return AArch64::LDRSBWui;
536 case AArch64::LDRHroX:
537 return AArch64::LDRHui;
538 case AArch64::LDRHHroX:
539 return AArch64::LDRHHui;
540 case AArch64::LDRSHXroX:
541 return AArch64::LDRSHXui;
542 case AArch64::LDRSHWroX:
543 return AArch64::LDRSHWui;
544 case AArch64::LDRWroX:
545 return AArch64::LDRWui;
546 case AArch64::LDRSroX:
547 return AArch64::LDRSui;
548 case AArch64::LDRSWroX:
549 return AArch64::LDRSWui;
550 case AArch64::LDRDroX:
551 return AArch64::LDRDui;
552 case AArch64::LDRXroX:
553 return AArch64::LDRXui;
554 case AArch64::LDRQroX:
555 return AArch64::LDRQui;
563 case AArch64::STRBui:
564 return AArch64::STRBpost;
565 case AArch64::STRHui:
566 return AArch64::STRHpost;
567 case AArch64::STRSui:
568 case AArch64::STURSi:
569 return AArch64::STRSpost;
570 case AArch64::STRDui:
571 case AArch64::STURDi:
572 return AArch64::STRDpost;
573 case AArch64::STRQui:
574 case AArch64::STURQi:
575 return AArch64::STRQpost;
576 case AArch64::STRBBui:
577 return AArch64::STRBBpost;
578 case AArch64::STRHHui:
579 return AArch64::STRHHpost;
580 case AArch64::STRWui:
581 case AArch64::STURWi:
582 return AArch64::STRWpost;
583 case AArch64::STRXui:
584 case AArch64::STURXi:
585 return AArch64::STRXpost;
586 case AArch64::LDRBui:
587 return AArch64::LDRBpost;
588 case AArch64::LDRHui:
589 return AArch64::LDRHpost;
590 case AArch64::LDRSui:
591 case AArch64::LDURSi:
592 return AArch64::LDRSpost;
593 case AArch64::LDRDui:
594 case AArch64::LDURDi:
595 return AArch64::LDRDpost;
596 case AArch64::LDRQui:
597 case AArch64::LDURQi:
598 return AArch64::LDRQpost;
599 case AArch64::LDRBBui:
600 return AArch64::LDRBBpost;
601 case AArch64::LDRHHui:
602 return AArch64::LDRHHpost;
603 case AArch64::LDRWui:
604 case AArch64::LDURWi:
605 return AArch64::LDRWpost;
606 case AArch64::LDRXui:
607 case AArch64::LDURXi:
608 return AArch64::LDRXpost;
609 case AArch64::LDRSWui:
610 return AArch64::LDRSWpost;
612 return AArch64::LDPSpost;
613 case AArch64::LDPSWi:
614 return AArch64::LDPSWpost;
616 return AArch64::LDPDpost;
618 return AArch64::LDPQpost;
620 return AArch64::LDPWpost;
622 return AArch64::LDPXpost;
624 return AArch64::STPSpost;
626 return AArch64::STPDpost;
628 return AArch64::STPQpost;
630 return AArch64::STPWpost;
632 return AArch64::STPXpost;
634 return AArch64::STGPostIndex;
636 return AArch64::STZGPostIndex;
638 return AArch64::ST2GPostIndex;
639 case AArch64::STZ2Gi:
640 return AArch64::STZ2GPostIndex;
642 return AArch64::STGPpost;
649 unsigned OpcB =
MI.getOpcode();
654 case AArch64::STRSpre:
655 return (OpcB == AArch64::STRSui) || (OpcB == AArch64::STURSi);
656 case AArch64::STRDpre:
657 return (OpcB == AArch64::STRDui) || (OpcB == AArch64::STURDi);
658 case AArch64::STRQpre:
659 return (OpcB == AArch64::STRQui) || (OpcB == AArch64::STURQi);
660 case AArch64::STRWpre:
661 return (OpcB == AArch64::STRWui) || (OpcB == AArch64::STURWi);
662 case AArch64::STRXpre:
663 return (OpcB == AArch64::STRXui) || (OpcB == AArch64::STURXi);
664 case AArch64::LDRSpre:
665 return (OpcB == AArch64::LDRSui) || (OpcB == AArch64::LDURSi);
666 case AArch64::LDRDpre:
667 return (OpcB == AArch64::LDRDui) || (OpcB == AArch64::LDURDi);
668 case AArch64::LDRQpre:
669 return (OpcB == AArch64::LDRQui) || (OpcB == AArch64::LDURQi);
670 case AArch64::LDRWpre:
671 return (OpcB == AArch64::LDRWui) || (OpcB == AArch64::LDURWi);
672 case AArch64::LDRXpre:
673 return (OpcB == AArch64::LDRXui) || (OpcB == AArch64::LDURXi);
674 case AArch64::LDRSWpre:
675 return (OpcB == AArch64::LDRSWui) || (OpcB == AArch64::LDURSWi);
681 int &MinOffset,
int &MaxOffset) {
699 unsigned PairedRegOp = 0) {
700 assert(PairedRegOp < 2 &&
"Unexpected register operand idx.");
706 return MI.getOperand(Idx);
715 int UnscaledStOffset =
719 int UnscaledLdOffset =
723 return (UnscaledStOffset <= UnscaledLdOffset) &&
724 (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
728 unsigned Opc =
MI.getOpcode();
729 return (
Opc == AArch64::STRWui ||
Opc == AArch64::STURWi ||
730 isNarrowStore(
Opc)) &&
735 switch (
MI.getOpcode()) {
739 case AArch64::LDRBBui:
740 case AArch64::LDRHHui:
741 case AArch64::LDRWui:
742 case AArch64::LDRXui:
744 case AArch64::LDURBBi:
745 case AArch64::LDURHHi:
746 case AArch64::LDURWi:
747 case AArch64::LDURXi:
753 unsigned Opc =
MI.getOpcode();
758 case AArch64::STRBui:
759 case AArch64::STRHui:
760 case AArch64::STRSui:
761 case AArch64::STRDui:
762 case AArch64::STRQui:
763 case AArch64::STRXui:
764 case AArch64::STRWui:
765 case AArch64::STRHHui:
766 case AArch64::STRBBui:
767 case AArch64::LDRBui:
768 case AArch64::LDRHui:
769 case AArch64::LDRSui:
770 case AArch64::LDRDui:
771 case AArch64::LDRQui:
772 case AArch64::LDRXui:
773 case AArch64::LDRWui:
774 case AArch64::LDRHHui:
775 case AArch64::LDRBBui:
779 case AArch64::STZ2Gi:
782 case AArch64::STURSi:
783 case AArch64::STURDi:
784 case AArch64::STURQi:
785 case AArch64::STURWi:
786 case AArch64::STURXi:
787 case AArch64::LDURSi:
788 case AArch64::LDURDi:
789 case AArch64::LDURQi:
790 case AArch64::LDURWi:
791 case AArch64::LDURXi:
794 case AArch64::LDPSWi:
823 unsigned Opc =
MI.getOpcode();
829 case AArch64::LDRBroX:
830 case AArch64::LDRBBroX:
831 case AArch64::LDRSBXroX:
832 case AArch64::LDRSBWroX:
835 case AArch64::LDRHroX:
836 case AArch64::LDRHHroX:
837 case AArch64::LDRSHXroX:
838 case AArch64::LDRSHWroX:
841 case AArch64::LDRWroX:
842 case AArch64::LDRSroX:
843 case AArch64::LDRSWroX:
846 case AArch64::LDRDroX:
847 case AArch64::LDRXroX:
850 case AArch64::LDRQroX:
860 case AArch64::ORRWrs:
861 case AArch64::ADDWri:
869 const LdStPairFlags &Flags) {
871 "Expected promotable zero stores.");
879 if (NextI == MergeMI)
882 unsigned Opc =
I->getOpcode();
883 unsigned MergeMIOpc = MergeMI->getOpcode();
884 bool IsScaled = !
TII->hasUnscaledLdStOffset(
Opc);
885 bool IsMergedMIScaled = !
TII->hasUnscaledLdStOffset(MergeMIOpc);
886 int OffsetStride = IsScaled ?
TII->getMemScale(*
I) : 1;
887 int MergeMIOffsetStride = IsMergedMIScaled ?
TII->getMemScale(*MergeMI) : 1;
889 bool MergeForward =
Flags.getMergeForward();
895 const MachineOperand &BaseRegOp =
897 : AArch64InstrInfo::getLdStBaseOp(*
I);
900 int64_t IOffsetInBytes =
902 int64_t MIOffsetInBytes =
907 if (IOffsetInBytes > MIOffsetInBytes)
908 OffsetImm = MIOffsetInBytes;
910 OffsetImm = IOffsetInBytes;
915 if (!
TII->hasUnscaledLdStOffset(NewOpcode)) {
916 int NewOffsetStride =
TII->getMemScale(NewOpcode);
917 assert(((OffsetImm % NewOffsetStride) == 0) &&
918 "Offset should be a multiple of the store memory scale");
919 OffsetImm = OffsetImm / NewOffsetStride;
925 MachineInstrBuilder MIB;
927 .
addReg(isNarrowStore(
Opc) ? AArch64::WZR : AArch64::XZR)
931 .setMIFlags(
I->mergeFlagsWith(*MergeMI));
934 LLVM_DEBUG(
dbgs() <<
"Creating wider store. Replacing instructions:\n ");
943 I->eraseFromParent();
944 MergeMI->eraseFromParent();
954 auto MBB =
MI.getParent();
962 return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() &&
963 TRI->regsOverlap(MOP.getReg(), DefReg);
977 if (MOP.isReg() && MOP.isKill())
981 if (MOP.isReg() && !MOP.isKill())
982 Units.
addReg(MOP.getReg());
989 unsigned InstrNumToSet,
996 unsigned OperandNo = 0;
997 bool RegFound =
false;
998 for (
const auto Op : MergedInstr.
operands()) {
999 if (
Op.getReg() ==
Reg) {
1008 {InstrNumToSet, OperandNo});
1014 const LdStPairFlags &Flags) {
1021 if (NextI == Paired)
1024 int SExtIdx =
Flags.getSExtIdx();
1027 bool IsUnscaled =
TII->hasUnscaledLdStOffset(
Opc);
1028 int OffsetStride = IsUnscaled ?
TII->getMemScale(*
I) : 1;
1030 bool MergeForward =
Flags.getMergeForward();
1032 std::optional<MCPhysReg> RenameReg =
Flags.getRenameReg();
1035 DefinedInBB.
addReg(*RenameReg);
1039 auto GetMatchingSubReg =
1040 [
this, RenameReg](
const TargetRegisterClass *
C) ->
MCPhysReg {
1042 TRI->sub_and_superregs_inclusive(*RenameReg)) {
1043 if (
C->contains(SubOrSuper))
1049 std::function<bool(MachineInstr &,
bool)> UpdateMIs =
1050 [
this, RegToRename, GetMatchingSubReg, MergeForward](MachineInstr &
MI,
1053 bool SeenDef =
false;
1055 MachineOperand &MOP =
MI.getOperand(
OpIdx);
1059 (!MergeForward || !SeenDef ||
1061 TRI->regsOverlap(MOP.
getReg(), RegToRename)) {
1064 "Need renamable operands");
1066 if (
const TargetRegisterClass *RC =
1068 MatchingReg = GetMatchingSubReg(RC);
1072 MatchingReg = GetMatchingSubReg(
1073 TRI->getMinimalPhysRegClass(MOP.
getReg()));
1081 MachineOperand &MOP =
MI.getOperand(
OpIdx);
1083 TRI->regsOverlap(MOP.
getReg(), RegToRename)) {
1086 "Need renamable operands");
1088 if (
const TargetRegisterClass *RC =
1090 MatchingReg = GetMatchingSubReg(RC);
1092 MatchingReg = GetMatchingSubReg(
1093 TRI->getMinimalPhysRegClass(MOP.
getReg()));
1094 assert(MatchingReg != AArch64::NoRegister &&
1095 "Cannot find matching regs for renaming");
1104 TRI, UINT32_MAX, UpdateMIs);
1117 RegToCheck = RegToRename;
1120 MergeForward ? std::next(
I) :
I,
1121 MergeForward ? std::next(Paired) : Paired))
1123 [
this, RegToCheck](
const MachineOperand &MOP) {
1124 return !MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1126 !TRI->regsOverlap(MOP.getReg(), RegToCheck);
1128 "Rename register used between paired instruction, trashing the "
1138 const MachineOperand &BaseRegOp =
1140 : AArch64InstrInfo::getLdStBaseOp(*
I);
1144 bool PairedIsUnscaled =
TII->hasUnscaledLdStOffset(Paired->getOpcode());
1145 if (IsUnscaled != PairedIsUnscaled) {
1149 int MemSize =
TII->getMemScale(*Paired);
1150 if (PairedIsUnscaled) {
1153 assert(!(PairedOffset %
TII->getMemScale(*Paired)) &&
1154 "Offset should be a multiple of the stride!");
1155 PairedOffset /= MemSize;
1157 PairedOffset *= MemSize;
1164 MachineInstr *RtMI, *Rt2MI;
1165 if (
Offset == PairedOffset + OffsetStride &&
1173 SExtIdx = (SExtIdx + 1) % 2;
1181 assert(!(OffsetImm %
TII->getMemScale(*RtMI)) &&
1182 "Unscaled offset cannot be scaled.");
1183 OffsetImm /=
TII->getMemScale(*RtMI);
1187 MachineInstrBuilder MIB;
1192 MachineOperand &PairedRegOp = RtMI == &*Paired ? RegOp0 : RegOp1;
1194 if (RegOp0.
isUse()) {
1195 if (!MergeForward) {
1206 for (
auto It = std::next(
I); It != Paired && PairedRegOp.
isKill(); ++It)
1207 if (It->readsRegister(PairedRegOp.
getReg(),
TRI))
1215 for (MachineInstr &
MI :
1216 make_range(std::next(
I->getIterator()), Paired->getIterator()))
1233 .setMIFlags(
I->mergeFlagsWith(*Paired));
1238 dbgs() <<
"Creating pair load/store. Replacing instructions:\n ");
1243 if (SExtIdx != -1) {
1248 MachineOperand &DstMO = MIB->
getOperand(SExtIdx);
1253 Register DstRegW =
TRI->getSubReg(DstRegX, AArch64::sub_32);
1262 MachineInstrBuilder MIBKill =
1263 BuildMI(*
MBB, InsertionPoint,
DL,
TII->get(TargetOpcode::KILL), DstRegW)
1265 .
addReg(DstRegX, RegState::Define);
1268 MachineInstrBuilder MIBSXTW =
1269 BuildMI(*
MBB, InsertionPoint,
DL,
TII->get(AArch64::SBFMXri), DstRegX)
1309 if (
I->peekDebugInstrNum()) {
1317 unsigned NewInstrNum;
1318 if (DstRegX ==
I->getOperand(0).getReg()) {
1327 if (Paired->peekDebugInstrNum()) {
1335 unsigned NewInstrNum;
1336 if (DstRegX == Paired->getOperand(0).getReg()) {
1349 }
else if (
Opc == AArch64::LDR_ZXI ||
Opc == AArch64::STR_ZXI) {
1355 AArch64::ZPRRegClass.contains(MOp1.
getReg()) &&
"Invalid register.");
1356 MOp0.
setReg(AArch64::Q0 + (MOp0.
getReg() - AArch64::Z0));
1357 MOp1.
setReg(AArch64::Q0 + (MOp1.
getReg() - AArch64::Z0));
1388 if (
I->peekDebugInstrNum()) {
1393 if (Paired->peekDebugInstrNum()) {
1413 SmallSetVector<Register, 4>
Ops;
1414 for (
const MachineOperand &MO :
1416 if (MO.isReg() && MO.isImplicit() && MO.isDef())
1417 Ops.insert(MO.getReg());
1418 for (
const MachineOperand &MO :
1420 if (MO.isReg() && MO.isImplicit() && MO.isDef())
1421 Ops.insert(MO.getReg());
1423 MIB.addDef(
Op, RegState::Implicit);
1425 CopyImplicitOps(
I, Paired);
1428 I->eraseFromParent();
1429 Paired->eraseFromParent();
1438 next_nodbg(LoadI, LoadI->getParent()->end());
1440 int LoadSize =
TII->getMemScale(*LoadI);
1441 int StoreSize =
TII->getMemScale(*StoreI);
1445 bool IsStoreXReg =
TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);
1448 TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) &&
1449 "Unexpected RegClass");
1451 MachineInstr *BitExtMI;
1452 if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) {
1455 if (StRt == LdRt && LoadSize == 8) {
1456 for (MachineInstr &
MI :
make_range(StoreI->getIterator(),
1457 LoadI->getIterator())) {
1458 if (
MI.killsRegister(StRt,
TRI)) {
1459 MI.clearRegisterKills(StRt,
TRI);
1466 LoadI->eraseFromParent();
1471 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1472 TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)
1473 .
addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)
1482 bool IsUnscaled =
TII->hasUnscaledLdStOffset(*LoadI);
1483 assert(IsUnscaled ==
TII->hasUnscaledLdStOffset(*StoreI) &&
1484 "Unsupported ld/st match");
1485 assert(LoadSize <= StoreSize &&
"Invalid load size");
1486 int UnscaledLdOffset =
1490 int UnscaledStOffset =
1494 int Width = LoadSize * 8;
1497 LdRt, AArch64::sub_32, &AArch64::GPR64RegClass))
1500 assert((UnscaledLdOffset >= UnscaledStOffset &&
1501 (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
1504 int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
1505 int Imms = Immr + Width - 1;
1506 if (UnscaledLdOffset == UnscaledStOffset) {
1507 uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12)
1513 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1514 TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),
1519 }
else if (IsStoreXReg && Imms == 31) {
1522 assert(Immr <= Imms &&
"Expected LSR alias of UBFM");
1523 BitExtMI =
BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1524 TII->get(AArch64::UBFMWri),
1525 TRI->getSubReg(DestReg, AArch64::sub_32))
1526 .
addReg(
TRI->getSubReg(StRt, AArch64::sub_32))
1532 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1533 TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri),
1543 for (MachineInstr &
MI :
make_range(StoreI->getIterator(),
1545 if (
MI.killsRegister(StRt,
TRI)) {
1546 MI.clearRegisterKills(StRt,
TRI);
1561 LoadI->eraseFromParent();
1571 if (
Offset % OffsetStride)
1583 return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
1600bool AArch64LoadStoreOpt::findMatchingStore(
1605 MachineInstr &LoadMI = *
I;
1615 ModifiedRegUnits.
clear();
1616 UsedRegUnits.
clear();
1621 MachineInstr &
MI = *
MBBI;
1625 if (!
MI.isTransient())
1651 if (!ModifiedRegUnits.
available(BaseReg))
1669 LdStPairFlags &Flags,
1672 if (
MI.hasOrderedMemoryRef() ||
TII->isLdStPairSuppressed(
MI))
1677 !
TII->isLdStPairSuppressed(FirstMI) &&
1678 "FirstMI shouldn't get here if either of these checks are true.");
1685 unsigned OpcB =
MI.getOpcode();
1693 if (OpcA == AArch64::LDR_ZXI || OpcA == AArch64::STR_ZXI ||
1694 OpcB == AArch64::LDR_ZXI || OpcB == AArch64::STR_ZXI)
1702 bool IsValidLdStrOpc, PairIsValidLdStrOpc;
1704 assert(IsValidLdStrOpc &&
1705 "Given Opc should be a Load or Store with an immediate");
1708 Flags.setSExtIdx(NonSExtOpc == OpcA ? 1 : 0);
1714 if (!PairIsValidLdStrOpc)
1719 if (isNarrowStore(OpcA) || isNarrowStore(OpcB))
1722 TII->getMemScale(FirstMI) ==
TII->getMemScale(
MI);
1731 return TII->hasUnscaledLdStOffset(OpcA) !=
TII->hasUnscaledLdStOffset(OpcB) &&
1740 auto *RegClass =
TRI->getMinimalPhysRegClass(MOP.
getReg());
1747 if (RegClass->HasDisjunctSubRegs && RegClass->CoveredBySubRegs &&
1748 (
TRI->getSubRegisterClass(RegClass, AArch64::dsub0) ||
1749 TRI->getSubRegisterClass(RegClass, AArch64::qsub0) ||
1750 TRI->getSubRegisterClass(RegClass, AArch64::zsub0))) {
1753 <<
" Cannot rename operands with multiple disjunct subregisters ("
1764 return TRI->isSuperOrSubRegisterEq(
1787 return MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1788 MOP.isImplicit() && MOP.isKill() &&
1789 TRI->regsOverlap(RegToRename, MOP.getReg());
1795 bool FoundDef =
false;
1826 if (
MI.isPseudo()) {
1827 LLVM_DEBUG(
dbgs() <<
" Cannot rename pseudo/bundle instruction\n");
1831 for (
auto &MOP :
MI.operands()) {
1833 !
TRI->regsOverlap(MOP.
getReg(), RegToRename))
1843 for (
auto &MOP :
MI.operands()) {
1845 !
TRI->regsOverlap(MOP.
getReg(), RegToRename))
1862 LLVM_DEBUG(
dbgs() <<
" Did not find definition for register in BB\n");
1890 LLVM_DEBUG(dbgs() <<
"Checking " << MI);
1892 if (MI.getFlag(MachineInstr::FrameSetup)) {
1893 LLVM_DEBUG(dbgs() <<
" Cannot rename framesetup instructions "
1898 for (
auto &MOP :
MI.operands()) {
1899 if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1900 !TRI->regsOverlap(MOP.getReg(), RegToRename))
1902 if (!canRenameMOP(MOP, TRI)) {
1903 LLVM_DEBUG(dbgs() <<
" Cannot rename " << MOP <<
" in " << MI);
1929 auto AnySubOrSuperRegCalleePreserved = [&MF,
TRI](
MCPhysReg PR) {
1930 return any_of(
TRI->sub_and_superregs_inclusive(PR),
1932 return TRI->isCalleeSavedPhysReg(SubOrSuper, MF);
1938 auto CanBeUsedForAllClasses = [&RequiredClasses,
TRI](
MCPhysReg PR) {
1941 TRI->sub_and_superregs_inclusive(PR),
1942 [
C](
MCPhysReg SubOrSuper) { return C->contains(SubOrSuper); });
1946 auto *RegClass =
TRI->getMinimalPhysRegClass(
Reg);
1949 !
RegInfo.isReserved(PR) && !AnySubOrSuperRegCalleePreserved(PR) &&
1950 CanBeUsedForAllClasses(PR)) {
1958 <<
TRI->getRegClassName(RegClass) <<
"\n");
1959 return std::nullopt;
1970 std::optional<MCPhysReg> RenameReg;
1979 const bool IsLoad = FirstMI.
mayLoad();
1981 if (!MaybeCanRename) {
1984 RequiredClasses,
TRI)};
1990 if (*MaybeCanRename) {
1992 RequiredClasses,
TRI);
2001 LdStPairFlags &Flags,
unsigned Limit,
2002 bool FindNarrowMerge) {
2006 MachineInstr &FirstMI = *
I;
2010 bool IsUnscaled =
TII->hasUnscaledLdStOffset(FirstMI);
2014 int OffsetStride = IsUnscaled ?
TII->getMemScale(FirstMI) : 1;
2017 std::optional<bool> MaybeCanRename;
2019 MaybeCanRename = {
false};
2021 SmallPtrSet<const TargetRegisterClass *, 5> RequiredClasses;
2022 LiveRegUnits UsedInBetween;
2025 Flags.clearRenameReg();
2029 ModifiedRegUnits.
clear();
2030 UsedRegUnits.
clear();
2033 SmallVector<MachineInstr *, 4> MemInsns;
2038 MachineInstr &
MI = *
MBBI;
2045 if (!
MI.isTransient())
2048 Flags.setSExtIdx(-1);
2051 assert(
MI.mayLoadOrStore() &&
"Expected memory operation.");
2060 bool MIIsUnscaled =
TII->hasUnscaledLdStOffset(
MI);
2061 if (IsUnscaled != MIIsUnscaled) {
2065 int MemSize =
TII->getMemScale(
MI);
2069 if (MIOffset % MemSize) {
2075 MIOffset /= MemSize;
2077 MIOffset *= MemSize;
2083 if (BaseReg == MIBaseReg) {
2089 bool IsOutOfBounds = MIOffset !=
TII->getMemScale(
MI);
2090 bool IsBaseRegUsed = !UsedRegUnits.
available(
2092 bool IsBaseRegModified = !ModifiedRegUnits.
available(
2097 bool IsMIRegTheSame =
2100 if (IsOutOfBounds || IsBaseRegUsed || IsBaseRegModified ||
2108 if ((
Offset != MIOffset + OffsetStride) &&
2109 (
Offset + OffsetStride != MIOffset)) {
2118 if (FindNarrowMerge) {
2123 if ((!IsUnscaled &&
alignTo(MinOffset, 2) != MinOffset) ||
2140 <<
"keep looking.\n");
2146 if (IsUnscaled && (
alignTo(MinOffset, OffsetStride) != MinOffset)) {
2151 <<
"Offset doesn't fit due to alignment requirements, "
2152 <<
"keep looking.\n");
2163 if (!ModifiedRegUnits.
available(BaseReg))
2166 const bool SameLoadReg =
MayLoad &&
TRI->isSuperOrSubRegisterEq(
2173 bool RtNotModified =
2175 bool RtNotUsed = !(
MI.mayLoad() && !SameLoadReg &&
2178 LLVM_DEBUG(
dbgs() <<
"Checking, can combine 2nd into 1st insn:\n"
2180 << (RtNotModified ?
"true" :
"false") <<
"\n"
2182 << (RtNotUsed ?
"true" :
"false") <<
"\n");
2184 if (RtNotModified && RtNotUsed && !
mayAlias(
MI, MemInsns, AA)) {
2189 std::optional<MCPhysReg> RenameReg =
2191 Reg, DefinedInBB, UsedInBetween,
2192 RequiredClasses,
TRI);
2198 <<
"keep looking.\n");
2201 Flags.setRenameReg(*RenameReg);
2204 Flags.setMergeForward(
false);
2206 Flags.clearRenameReg();
2217 LLVM_DEBUG(
dbgs() <<
"Checking, can combine 1st into 2nd insn:\n"
2219 <<
"' not modified: "
2220 << (RtNotModified ?
"true" :
"false") <<
"\n");
2222 if (RtNotModified && !
mayAlias(FirstMI, MemInsns, AA)) {
2224 Flags.setMergeForward(
true);
2225 Flags.clearRenameReg();
2230 MaybeCanRename, FirstMI,
MI,
Reg, DefinedInBB, UsedInBetween,
2231 RequiredClasses,
TRI);
2233 Flags.setMergeForward(
true);
2234 Flags.setRenameReg(*RenameReg);
2235 MBBIWithRenameReg =
MBBI;
2238 LLVM_DEBUG(
dbgs() <<
"Unable to combine these instructions due to "
2239 <<
"interference in between, keep looking.\n");
2243 if (
Flags.getRenameReg())
2244 return MBBIWithRenameReg;
2258 if (!ModifiedRegUnits.
available(BaseReg)) {
2264 if (
MI.mayLoadOrStore())
2272 assert((
MI.getOpcode() == AArch64::SUBXri ||
2273 MI.getOpcode() == AArch64::ADDXri) &&
2274 "Expected a register update instruction");
2275 auto End =
MI.getParent()->end();
2276 if (MaybeCFI == End ||
2277 MaybeCFI->getOpcode() != TargetOpcode::CFI_INSTRUCTION ||
2280 MI.getOperand(0).getReg() != AArch64::SP)
2284 unsigned CFIIndex = MaybeCFI->getOperand(0).getCFIIndex();
2295std::optional<MachineBasicBlock::iterator> AArch64LoadStoreOpt::mergeUpdateInsn(
2297 bool IsForward,
bool IsPreIdx,
bool MergeEither) {
2298 assert((Update->getOpcode() == AArch64::ADDXri ||
2299 Update->getOpcode() == AArch64::SUBXri) &&
2300 "Unexpected base register update instruction to merge!");
2316 if (std::any_of(std::next(CFI),
I, [](
const auto &Insn) {
2317 return Insn.getOpcode() == TargetOpcode::CFI_INSTRUCTION;
2319 return std::nullopt;
2321 MachineBasicBlock *
MBB = InsertPt->getParent();
2330 if (NextI == Update)
2333 int Value = Update->getOperand(2).getImm();
2335 "Can't merge 1 << 12 offset into pre-/post-indexed load / store");
2336 if (Update->getOpcode() == AArch64::SUBXri)
2341 MachineInstrBuilder MIB;
2342 int Scale, MinOffset, MaxOffset;
2346 MIB =
BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
2348 .
add(Update->getOperand(0))
2356 MIB =
BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
2358 .
add(Update->getOperand(0))
2383 I->eraseFromParent();
2384 Update->eraseFromParent();
2392 unsigned Offset,
int Scale) {
2393 assert((Update->getOpcode() == AArch64::MOVKWi) &&
2394 "Unexpected const mov instruction to merge!");
2398 MachineInstr &MemMI = *
I;
2399 unsigned Mask = (1 << 12) * Scale - 1;
2404 MachineInstrBuilder AddMIB, MemMIB;
2408 BuildMI(*
I->getParent(),
I,
I->getDebugLoc(),
TII->get(AArch64::ADDXri))
2416 MemMIB =
BuildMI(*
I->getParent(),
I,
I->getDebugLoc(),
TII->get(NewOpc))
2424 ++NumConstOffsetFolded;
2439 I->eraseFromParent();
2440 PrevI->eraseFromParent();
2441 Update->eraseFromParent();
2446bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
2448 unsigned BaseReg,
int Offset) {
2449 switch (
MI.getOpcode()) {
2452 case AArch64::SUBXri:
2453 case AArch64::ADDXri:
2456 if (!
MI.getOperand(2).isImm())
2464 if (
MI.getOperand(0).getReg() != BaseReg ||
2465 MI.getOperand(1).getReg() != BaseReg)
2468 int UpdateOffset =
MI.getOperand(2).getImm();
2469 if (
MI.getOpcode() == AArch64::SUBXri)
2470 UpdateOffset = -UpdateOffset;
2474 int Scale, MinOffset, MaxOffset;
2476 if (UpdateOffset % Scale != 0)
2480 int ScaledOffset = UpdateOffset / Scale;
2481 if (ScaledOffset > MaxOffset || ScaledOffset < MinOffset)
2493bool AArch64LoadStoreOpt::isMatchingMovConstInsn(MachineInstr &MemMI,
2499 if (
MI.getOpcode() == AArch64::MOVKWi &&
2500 TRI->isSuperOrSubRegisterEq(IndexReg,
MI.getOperand(1).getReg())) {
2509 MachineInstr &MovzMI = *
MBBI;
2511 if (MovzMI.
getOpcode() == AArch64::MOVZWi &&
2514 unsigned High =
MI.getOperand(2).getImm() <<
MI.getOperand(3).getImm();
2517 return Offset >> 24 == 0;
2526 MachineInstr &MemMI = *
I;
2531 TII->getMemScale(MemMI);
2536 if (MIUnscaledOffset != UnscaledOffset)
2547 for (
unsigned i = 0, e = IsPairedInsn ? 2 : 1; i !=
e; ++i) {
2549 if (DestReg == BaseReg ||
TRI->isSubRegister(BaseReg, DestReg))
2556 ModifiedRegUnits.
clear();
2557 UsedRegUnits.
clear();
2563 const bool BaseRegSP =
BaseReg == AArch64::SP;
2572 MachineBasicBlock *CurMBB =
I->getParent();
2579 MachineInstr &
MI = *
MBBI;
2583 if (!
MI.isTransient())
2587 if (isMatchingUpdateInsn(*
I,
MI, BaseReg, UnscaledOffset))
2598 if (!ModifiedRegUnits.
available(BaseReg) ||
2600 (BaseRegSP &&
MBBI->mayLoadOrStore()))
2604 if (!VisitSucc || Limit <=
Count)
2609 MachineBasicBlock *SuccToVisit =
nullptr;
2610 unsigned LiveSuccCount = 0;
2611 for (MachineBasicBlock *Succ : CurMBB->
successors()) {
2612 for (MCRegAliasIterator AI(BaseReg,
TRI,
true); AI.isValid(); ++AI) {
2613 if (Succ->isLiveIn(*AI)) {
2614 if (LiveSuccCount++)
2616 if (Succ->pred_size() == 1)
2624 CurMBB = SuccToVisit;
2635 MachineInstr &MemMI = *
I;
2637 MachineFunction &MF = *MemMI.
getMF();
2645 : AArch64::NoRegister};
2654 for (
unsigned i = 0, e = IsPairedInsn ? 2 : 1; i !=
e; ++i)
2655 if (DestReg[i] == BaseReg ||
TRI->isSubRegister(BaseReg, DestReg[i]))
2659 const bool BaseRegSP =
BaseReg == AArch64::SP;
2667 const AArch64Subtarget &Subtarget = MF.
getSubtarget<AArch64Subtarget>();
2668 unsigned RedZoneSize =
2673 ModifiedRegUnits.
clear();
2674 UsedRegUnits.
clear();
2676 bool MemAccessBeforeSPPreInc =
false;
2680 MachineInstr &
MI = *
MBBI;
2684 if (!
MI.isTransient())
2688 if (isMatchingUpdateInsn(*
I,
MI, BaseReg,
Offset)) {
2691 if (MemAccessBeforeSPPreInc &&
MBBI->getOperand(2).getImm() > RedZoneSize)
2701 if (!ModifiedRegUnits.
available(BaseReg) ||
2709 if (
MI.mayLoadOrStore() ||
MI.hasUnmodeledSideEffects() ||
2710 (DestReg[0] != AArch64::NoRegister &&
2711 !(ModifiedRegUnits.
available(DestReg[0]) &&
2713 (DestReg[1] != AArch64::NoRegister &&
2714 !(ModifiedRegUnits.
available(DestReg[1]) &&
2716 MergeEither =
false;
2721 if (BaseRegSP &&
MBBI->mayLoadOrStore())
2722 MemAccessBeforeSPPreInc =
true;
2728AArch64LoadStoreOpt::findMatchingConstOffsetBackward(
2732 MachineInstr &MemMI = *
I;
2751 ModifiedRegUnits.
clear();
2752 UsedRegUnits.
clear();
2756 MachineInstr &
MI = *
MBBI;
2760 if (!
MI.isTransient())
2764 if (isMatchingMovConstInsn(*
I,
MI, IndexReg,
Offset)) {
2773 if (!ModifiedRegUnits.
available(IndexReg) ||
2781bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
2783 MachineInstr &
MI = *
MBBI;
2785 if (
MI.hasOrderedMemoryRef())
2799 ++NumLoadsFromStoresPromoted;
2803 MBBI = promoteLoadFromStore(
MBBI, StoreI);
2810bool AArch64LoadStoreOpt::tryToMergeZeroStInst(
2813 MachineInstr &
MI = *
MBBI;
2816 if (!
TII->isCandidateToMergeOrPair(
MI))
2820 LdStPairFlags
Flags;
2824 ++NumZeroStoresPromoted;
2828 MBBI = mergeNarrowZeroStores(
MBBI, MergeMI, Flags);
2837 MachineInstr &
MI = *
MBBI;
2840 if (!
TII->isCandidateToMergeOrPair(
MI))
2844 if (
MI.mayLoad() && Subtarget->hasDisableLdp())
2848 if (
MI.mayStore() && Subtarget->hasDisableStp())
2854 bool IsUnscaled =
TII->hasUnscaledLdStOffset(
MI);
2856 int OffsetStride = IsUnscaled ?
TII->getMemScale(
MI) : 1;
2864 LdStPairFlags
Flags;
2870 auto Prev = std::prev(
MBBI);
2874 MachineMemOperand *MemOp =
2875 MI.memoperands_empty() ? nullptr :
MI.memoperands().front();
2880 if ((
MI.mayLoad() && Subtarget->hasLdpAlignedOnly()) ||
2881 (
MI.mayStore() && Subtarget->hasStpAlignedOnly())) {
2883 if (!MemOp || !MemOp->getMemoryType().isValid()) {
2884 NumFailedAlignmentCheck++;
2890 uint64_t MemAlignment = MemOp->getAlign().value();
2891 uint64_t TypeAlignment =
2892 Align(MemOp->getSize().getValue().getKnownMinValue()).value();
2894 if (MemAlignment < 2 * TypeAlignment) {
2895 NumFailedAlignmentCheck++;
2901 if (
TII->hasUnscaledLdStOffset(
MI))
2902 ++NumUnscaledPairCreated;
2904 MBBI = mergePairedInsns(
MBBI, Paired, Flags);
2907 for (
auto I = std::next(Prev);
I !=
MBBI;
I++)
2915bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
2917 MachineInstr &
MI = *
MBBI;
2929 if (
auto NextI = mergeUpdateInsn(
MBBI, Update,
false,
2938 if (
TII->hasUnscaledLdStOffset(
MI.getOpcode()))
2947 Update = findMatchingUpdateInsnBackward(
MBBI,
UpdateLimit, MergeEither);
2950 if (
auto NextI = mergeUpdateInsn(
MBBI, Update,
true,
2951 true, MergeEither)) {
2960 int UnscaledOffset =
2968 Update = findMatchingUpdateInsnForward(
MBBI, UnscaledOffset,
UpdateLimit);
2971 if (
auto NextI = mergeUpdateInsn(
MBBI, Update,
false,
2984 MachineInstr &
MI = *
MBBI;
2989 if (
TII->hasUnscaledLdStOffset(
MI.getOpcode()))
3001 if (Update !=
E && (
Offset & (Scale - 1)) == 0) {
3010bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &
MBB,
3011 bool EnableNarrowZeroStOpt) {
3043 if (EnableNarrowZeroStOpt)
3067 DefinedInBB.
clear();
3076 if (
TII->isPairableLdStInst(*
MBBI) && tryToPairLdStInst(
MBBI))
3115bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
3122 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
3132 bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign();
3133 for (
auto &
MBB : Fn) {
3154 return new AArch64LoadStoreOpt();
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
static cl::opt< bool > EnableRenaming("aarch64-load-store-renaming", cl::init(true), cl::Hidden)
static MachineOperand & getLdStRegOp(MachineInstr &MI, unsigned PairedRegOp=0)
static bool isPromotableLoadFromStore(MachineInstr &MI)
static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale, int &MinOffset, int &MaxOffset)
static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride)
static unsigned getMatchingPairOpcode(unsigned Opc)
static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, LdStPairFlags &Flags, const AArch64InstrInfo *TII)
static std::optional< MCPhysReg > tryToFindRegisterToRename(const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static bool needsWinCFI(const MachineFunction *MF)
static bool canRenameUntilSecondLoad(MachineInstr &FirstLoad, MachineInstr &SecondLoad, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static std::optional< MCPhysReg > findRenameRegForSameLdStRegPair(std::optional< bool > MaybeCanRename, MachineInstr &FirstMI, MachineInstr &MI, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
static cl::opt< unsigned > LdStLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden)
static bool canRenameMOP(const MachineOperand &MOP, const TargetRegisterInfo *TRI)
static bool isRewritableImplicitDef(const MachineOperand &MO)
static unsigned getPreIndexedOpcode(unsigned Opc)
#define AARCH64_LOAD_STORE_OPT_NAME
static void addDebugSubstitutionsToTable(MachineFunction *MF, unsigned InstrNumToSet, MachineInstr &OriginalInstr, MachineInstr &MergedInstr)
This function will add a new entry into the debugValueSubstitutions table when two instruction have b...
static cl::opt< unsigned > UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden)
static bool isPromotableZeroStoreInst(MachineInstr &MI)
static unsigned getMatchingWideOpcode(unsigned Opc)
static unsigned getMatchingNonSExtOpcode(unsigned Opc, bool *IsValidLdStrOpc=nullptr)
static MachineBasicBlock::iterator maybeMoveCFI(MachineInstr &MI, MachineBasicBlock::iterator MaybeCFI)
static bool isTagStore(const MachineInstr &MI)
static unsigned isMatchingStore(MachineInstr &LoadInst, MachineInstr &StoreInst)
static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg, const TargetRegisterInfo *TRI, unsigned Limit, std::function< bool(MachineInstr &, bool)> &Fn)
static unsigned getPostIndexedOpcode(unsigned Opc)
static bool isMergeableLdStUpdate(MachineInstr &MI, AArch64FunctionInfo &AFI)
static cl::opt< unsigned > LdStConstLimit("aarch64-load-store-const-scan-limit", cl::init(10), cl::Hidden)
static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst, MachineInstr &StoreInst, const AArch64InstrInfo *TII)
static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI)
static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale)
static void updateDefinedRegisters(MachineInstr &MI, LiveRegUnits &Units, const TargetRegisterInfo *TRI)
static bool canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static unsigned getBaseAddressOpcode(unsigned Opc)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
const HexagonInstrInfo * TII
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
static const MachineOperand & getLdStAmountOp(const MachineInstr &MI)
Returns the shift amount operator of a load/store.
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
const AArch64RegisterInfo * getRegisterInfo() const override
const AArch64InstrInfo * getInstrInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
bool isLittleEndian() const
unsigned getRedZoneSize(const Function &F) const
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
static bool shouldExecute(CounterInfo &Counter)
FunctionPass class - This class is used to implement most global optimizations.
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
A set of register units used to track register liveness.
static void accumulateUsedDefed(const MachineInstr &MI, LiveRegUnits &ModifiedRegUnits, LiveRegUnits &UsedRegUnits, const TargetRegisterInfo *TRI)
For a machine instruction MI, adds all register units used in UsedRegUnits and defined or clobbered i...
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
void removeReg(MCRegister Reg)
Removes all register units covered by physical register Reg.
LLVM_ABI void addLiveIns(const MachineBasicBlock &MBB)
Adds registers living into block MBB.
void clear()
Clears the set.
LLVM_ABI void accumulate(const MachineInstr &MI)
Adds all register units used, defined or clobbered in MI.
An instruction for reading from memory.
bool usesWindowsCFI() const
OpType getOperation() const
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const std::vector< MCCFIInstruction > & getFrameInstructions() const
Returns a reference to a list of cfi instructions in the function's prologue.
void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair, unsigned SubReg=0)
Create a substitution between one <instr,operand> value to a different, new value.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
LLVM_ABI bool mayAlias(BatchAAResults *AA, const MachineInstr &Other, bool UseTBAA) const
Returns true if this instruction's memory access aliases the memory access of Other.
unsigned peekDebugInstrNum() const
Examine the instruction number of this MachineInstr.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
bool isPseudo(QueryType Type=IgnoreBundle) const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
LLVM_ABI void dump() const
LLVM_ABI unsigned getDebugInstrNum()
Fetch the instruction number of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
void setImplicit(bool Val=true)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
LLVM_ABI bool isRenamable() const
isRenamable - Returns true if this register may be renamed, i.e.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
Wrapper class representing virtual and physical registers.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
self_iterator getIterator()
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
initializer< Ty > init(const Ty &Val)
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< filter_iterator< ConstMIBundleOperands, bool(*)(const MachineOperand &)> > phys_regs_and_masks(const MachineInstr &MI)
Returns an iterator range over all physical register and mask operands for MI and bundled instruction...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
FunctionPass * createAArch64LoadStoreOptimizationPass()
createAArch64LoadStoreOptimizationPass - returns an instance of the load / store optimization pass.
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
DWARFExpression::Operation Op
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.