55#define DEBUG_TYPE "aarch64-ldst-opt"
57STATISTIC(NumPairCreated,
"Number of load/store pair instructions generated");
58STATISTIC(NumPostFolded,
"Number of post-index updates folded");
59STATISTIC(NumPreFolded,
"Number of pre-index updates folded");
61 "Number of load/store from unscaled generated");
62STATISTIC(NumZeroStoresPromoted,
"Number of narrow zero stores promoted");
63STATISTIC(NumLoadsFromStoresPromoted,
"Number of loads from stores promoted");
64STATISTIC(NumFailedAlignmentCheck,
"Number of load/store pair transformation "
65 "not passed the alignment check");
67 "Number of const offset of index address folded");
70 "Controls which pairs are considered for renaming");
90#define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
94using LdStPairFlags =
struct LdStPairFlags {
98 bool MergeForward =
false;
109 std::optional<MCPhysReg> RenameReg;
111 LdStPairFlags() =
default;
113 void setMergeForward(
bool V =
true) { MergeForward = V; }
114 bool getMergeForward()
const {
return MergeForward; }
116 void setSExtIdx(
int V) { SExtIdx = V; }
117 int getSExtIdx()
const {
return SExtIdx; }
119 void setRenameReg(
MCPhysReg R) { RenameReg = R; }
120 void clearRenameReg() { RenameReg = std::nullopt; }
121 std::optional<MCPhysReg> getRenameReg()
const {
return RenameReg; }
149 LdStPairFlags &Flags,
151 bool FindNarrowMerge);
162 const LdStPairFlags &Flags);
168 const LdStPairFlags &Flags);
180 int UnscaledOffset,
unsigned Limit);
202 unsigned BaseReg,
int Offset);
205 unsigned IndexReg,
unsigned &
Offset);
208 std::optional<MachineBasicBlock::iterator>
211 bool IsPreIdx,
bool MergeEither);
239 MachineFunctionProperties::Property::NoVRegs);
245char AArch64LoadStoreOpt::ID = 0;
252static
bool isNarrowStore(
unsigned Opc) {
256 case AArch64::STRBBui:
257 case AArch64::STURBBi:
258 case AArch64::STRHHui:
259 case AArch64::STURHHi:
267 switch (
MI.getOpcode()) {
273 case AArch64::STZ2Gi:
279 bool *IsValidLdStrOpc =
nullptr) {
281 *IsValidLdStrOpc =
true;
285 *IsValidLdStrOpc =
false;
286 return std::numeric_limits<unsigned>::max();
287 case AArch64::STRDui:
288 case AArch64::STURDi:
289 case AArch64::STRDpre:
290 case AArch64::STRQui:
291 case AArch64::STURQi:
292 case AArch64::STRQpre:
293 case AArch64::STRBBui:
294 case AArch64::STURBBi:
295 case AArch64::STRHHui:
296 case AArch64::STURHHi:
297 case AArch64::STRWui:
298 case AArch64::STRWpre:
299 case AArch64::STURWi:
300 case AArch64::STRXui:
301 case AArch64::STRXpre:
302 case AArch64::STURXi:
303 case AArch64::LDRDui:
304 case AArch64::LDURDi:
305 case AArch64::LDRDpre:
306 case AArch64::LDRQui:
307 case AArch64::LDURQi:
308 case AArch64::LDRQpre:
309 case AArch64::LDRWui:
310 case AArch64::LDURWi:
311 case AArch64::LDRWpre:
312 case AArch64::LDRXui:
313 case AArch64::LDURXi:
314 case AArch64::LDRXpre:
315 case AArch64::STRSui:
316 case AArch64::STURSi:
317 case AArch64::STRSpre:
318 case AArch64::LDRSui:
319 case AArch64::LDURSi:
320 case AArch64::LDRSpre:
322 case AArch64::LDRSWui:
323 return AArch64::LDRWui;
324 case AArch64::LDURSWi:
325 return AArch64::LDURWi;
326 case AArch64::LDRSWpre:
327 return AArch64::LDRWpre;
335 case AArch64::STRBBui:
336 return AArch64::STRHHui;
337 case AArch64::STRHHui:
338 return AArch64::STRWui;
339 case AArch64::STURBBi:
340 return AArch64::STURHHi;
341 case AArch64::STURHHi:
342 return AArch64::STURWi;
343 case AArch64::STURWi:
344 return AArch64::STURXi;
345 case AArch64::STRWui:
346 return AArch64::STRXui;
354 case AArch64::STRSui:
355 case AArch64::STURSi:
356 return AArch64::STPSi;
357 case AArch64::STRSpre:
358 return AArch64::STPSpre;
359 case AArch64::STRDui:
360 case AArch64::STURDi:
361 return AArch64::STPDi;
362 case AArch64::STRDpre:
363 return AArch64::STPDpre;
364 case AArch64::STRQui:
365 case AArch64::STURQi:
366 return AArch64::STPQi;
367 case AArch64::STRQpre:
368 return AArch64::STPQpre;
369 case AArch64::STRWui:
370 case AArch64::STURWi:
371 return AArch64::STPWi;
372 case AArch64::STRWpre:
373 return AArch64::STPWpre;
374 case AArch64::STRXui:
375 case AArch64::STURXi:
376 return AArch64::STPXi;
377 case AArch64::STRXpre:
378 return AArch64::STPXpre;
379 case AArch64::LDRSui:
380 case AArch64::LDURSi:
381 return AArch64::LDPSi;
382 case AArch64::LDRSpre:
383 return AArch64::LDPSpre;
384 case AArch64::LDRDui:
385 case AArch64::LDURDi:
386 return AArch64::LDPDi;
387 case AArch64::LDRDpre:
388 return AArch64::LDPDpre;
389 case AArch64::LDRQui:
390 case AArch64::LDURQi:
391 return AArch64::LDPQi;
392 case AArch64::LDRQpre:
393 return AArch64::LDPQpre;
394 case AArch64::LDRWui:
395 case AArch64::LDURWi:
396 return AArch64::LDPWi;
397 case AArch64::LDRWpre:
398 return AArch64::LDPWpre;
399 case AArch64::LDRXui:
400 case AArch64::LDURXi:
401 return AArch64::LDPXi;
402 case AArch64::LDRXpre:
403 return AArch64::LDPXpre;
404 case AArch64::LDRSWui:
405 case AArch64::LDURSWi:
406 return AArch64::LDPSWi;
407 case AArch64::LDRSWpre:
408 return AArch64::LDPSWpre;
419 case AArch64::LDRBBui:
420 return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui ||
421 StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
422 case AArch64::LDURBBi:
423 return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi ||
424 StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
425 case AArch64::LDRHHui:
426 return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui ||
427 StOpc == AArch64::STRXui;
428 case AArch64::LDURHHi:
429 return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi ||
430 StOpc == AArch64::STURXi;
431 case AArch64::LDRWui:
432 return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
433 case AArch64::LDURWi:
434 return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
435 case AArch64::LDRXui:
436 return StOpc == AArch64::STRXui;
437 case AArch64::LDURXi:
438 return StOpc == AArch64::STURXi;
450 case AArch64::STRSui:
451 return AArch64::STRSpre;
452 case AArch64::STRDui:
453 return AArch64::STRDpre;
454 case AArch64::STRQui:
455 return AArch64::STRQpre;
456 case AArch64::STRBBui:
457 return AArch64::STRBBpre;
458 case AArch64::STRHHui:
459 return AArch64::STRHHpre;
460 case AArch64::STRWui:
461 return AArch64::STRWpre;
462 case AArch64::STRXui:
463 return AArch64::STRXpre;
464 case AArch64::LDRSui:
465 return AArch64::LDRSpre;
466 case AArch64::LDRDui:
467 return AArch64::LDRDpre;
468 case AArch64::LDRQui:
469 return AArch64::LDRQpre;
470 case AArch64::LDRBBui:
471 return AArch64::LDRBBpre;
472 case AArch64::LDRHHui:
473 return AArch64::LDRHHpre;
474 case AArch64::LDRWui:
475 return AArch64::LDRWpre;
476 case AArch64::LDRXui:
477 return AArch64::LDRXpre;
478 case AArch64::LDRSWui:
479 return AArch64::LDRSWpre;
481 return AArch64::LDPSpre;
482 case AArch64::LDPSWi:
483 return AArch64::LDPSWpre;
485 return AArch64::LDPDpre;
487 return AArch64::LDPQpre;
489 return AArch64::LDPWpre;
491 return AArch64::LDPXpre;
493 return AArch64::STPSpre;
495 return AArch64::STPDpre;
497 return AArch64::STPQpre;
499 return AArch64::STPWpre;
501 return AArch64::STPXpre;
503 return AArch64::STGPreIndex;
505 return AArch64::STZGPreIndex;
507 return AArch64::ST2GPreIndex;
508 case AArch64::STZ2Gi:
509 return AArch64::STZ2GPreIndex;
511 return AArch64::STGPpre;
520 case AArch64::LDRBroX:
521 return AArch64::LDRBui;
522 case AArch64::LDRBBroX:
523 return AArch64::LDRBBui;
524 case AArch64::LDRSBXroX:
525 return AArch64::LDRSBXui;
526 case AArch64::LDRSBWroX:
527 return AArch64::LDRSBWui;
528 case AArch64::LDRHroX:
529 return AArch64::LDRHui;
530 case AArch64::LDRHHroX:
531 return AArch64::LDRHHui;
532 case AArch64::LDRSHXroX:
533 return AArch64::LDRSHXui;
534 case AArch64::LDRSHWroX:
535 return AArch64::LDRSHWui;
536 case AArch64::LDRWroX:
537 return AArch64::LDRWui;
538 case AArch64::LDRSroX:
539 return AArch64::LDRSui;
540 case AArch64::LDRSWroX:
541 return AArch64::LDRSWui;
542 case AArch64::LDRDroX:
543 return AArch64::LDRDui;
544 case AArch64::LDRXroX:
545 return AArch64::LDRXui;
546 case AArch64::LDRQroX:
547 return AArch64::LDRQui;
555 case AArch64::STRSui:
556 case AArch64::STURSi:
557 return AArch64::STRSpost;
558 case AArch64::STRDui:
559 case AArch64::STURDi:
560 return AArch64::STRDpost;
561 case AArch64::STRQui:
562 case AArch64::STURQi:
563 return AArch64::STRQpost;
564 case AArch64::STRBBui:
565 return AArch64::STRBBpost;
566 case AArch64::STRHHui:
567 return AArch64::STRHHpost;
568 case AArch64::STRWui:
569 case AArch64::STURWi:
570 return AArch64::STRWpost;
571 case AArch64::STRXui:
572 case AArch64::STURXi:
573 return AArch64::STRXpost;
574 case AArch64::LDRSui:
575 case AArch64::LDURSi:
576 return AArch64::LDRSpost;
577 case AArch64::LDRDui:
578 case AArch64::LDURDi:
579 return AArch64::LDRDpost;
580 case AArch64::LDRQui:
581 case AArch64::LDURQi:
582 return AArch64::LDRQpost;
583 case AArch64::LDRBBui:
584 return AArch64::LDRBBpost;
585 case AArch64::LDRHHui:
586 return AArch64::LDRHHpost;
587 case AArch64::LDRWui:
588 case AArch64::LDURWi:
589 return AArch64::LDRWpost;
590 case AArch64::LDRXui:
591 case AArch64::LDURXi:
592 return AArch64::LDRXpost;
593 case AArch64::LDRSWui:
594 return AArch64::LDRSWpost;
596 return AArch64::LDPSpost;
597 case AArch64::LDPSWi:
598 return AArch64::LDPSWpost;
600 return AArch64::LDPDpost;
602 return AArch64::LDPQpost;
604 return AArch64::LDPWpost;
606 return AArch64::LDPXpost;
608 return AArch64::STPSpost;
610 return AArch64::STPDpost;
612 return AArch64::STPQpost;
614 return AArch64::STPWpost;
616 return AArch64::STPXpost;
618 return AArch64::STGPostIndex;
620 return AArch64::STZGPostIndex;
622 return AArch64::ST2GPostIndex;
623 case AArch64::STZ2Gi:
624 return AArch64::STZ2GPostIndex;
626 return AArch64::STGPpost;
633 unsigned OpcB =
MI.getOpcode();
638 case AArch64::STRSpre:
639 return (OpcB == AArch64::STRSui) || (OpcB == AArch64::STURSi);
640 case AArch64::STRDpre:
641 return (OpcB == AArch64::STRDui) || (OpcB == AArch64::STURDi);
642 case AArch64::STRQpre:
643 return (OpcB == AArch64::STRQui) || (OpcB == AArch64::STURQi);
644 case AArch64::STRWpre:
645 return (OpcB == AArch64::STRWui) || (OpcB == AArch64::STURWi);
646 case AArch64::STRXpre:
647 return (OpcB == AArch64::STRXui) || (OpcB == AArch64::STURXi);
648 case AArch64::LDRSpre:
649 return (OpcB == AArch64::LDRSui) || (OpcB == AArch64::LDURSi);
650 case AArch64::LDRDpre:
651 return (OpcB == AArch64::LDRDui) || (OpcB == AArch64::LDURDi);
652 case AArch64::LDRQpre:
653 return (OpcB == AArch64::LDRQui) || (OpcB == AArch64::LDURQi);
654 case AArch64::LDRWpre:
655 return (OpcB == AArch64::LDRWui) || (OpcB == AArch64::LDURWi);
656 case AArch64::LDRXpre:
657 return (OpcB == AArch64::LDRXui) || (OpcB == AArch64::LDURXi);
658 case AArch64::LDRSWpre:
659 return (OpcB == AArch64::LDRSWui) || (OpcB == AArch64::LDURSWi);
665 int &MinOffset,
int &MaxOffset) {
683 unsigned PairedRegOp = 0) {
684 assert(PairedRegOp < 2 &&
"Unexpected register operand idx.");
690 return MI.getOperand(
Idx);
699 int UnscaledStOffset =
703 int UnscaledLdOffset =
707 return (UnscaledStOffset <= UnscaledLdOffset) &&
708 (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
712 unsigned Opc =
MI.getOpcode();
713 return (Opc == AArch64::STRWui || Opc == AArch64::STURWi ||
714 isNarrowStore(Opc)) &&
719 switch (
MI.getOpcode()) {
723 case AArch64::LDRBBui:
724 case AArch64::LDRHHui:
725 case AArch64::LDRWui:
726 case AArch64::LDRXui:
728 case AArch64::LDURBBi:
729 case AArch64::LDURHHi:
730 case AArch64::LDURWi:
731 case AArch64::LDURXi:
737 unsigned Opc =
MI.getOpcode();
742 case AArch64::STRSui:
743 case AArch64::STRDui:
744 case AArch64::STRQui:
745 case AArch64::STRXui:
746 case AArch64::STRWui:
747 case AArch64::STRHHui:
748 case AArch64::STRBBui:
749 case AArch64::LDRSui:
750 case AArch64::LDRDui:
751 case AArch64::LDRQui:
752 case AArch64::LDRXui:
753 case AArch64::LDRWui:
754 case AArch64::LDRHHui:
755 case AArch64::LDRBBui:
759 case AArch64::STZ2Gi:
762 case AArch64::STURSi:
763 case AArch64::STURDi:
764 case AArch64::STURQi:
765 case AArch64::STURWi:
766 case AArch64::STURXi:
767 case AArch64::LDURSi:
768 case AArch64::LDURDi:
769 case AArch64::LDURQi:
770 case AArch64::LDURWi:
771 case AArch64::LDURXi:
774 case AArch64::LDPSWi:
803 unsigned Opc =
MI.getOpcode();
809 case AArch64::LDRBroX:
810 case AArch64::LDRBBroX:
811 case AArch64::LDRSBXroX:
812 case AArch64::LDRSBWroX:
815 case AArch64::LDRHroX:
816 case AArch64::LDRHHroX:
817 case AArch64::LDRSHXroX:
818 case AArch64::LDRSHWroX:
821 case AArch64::LDRWroX:
822 case AArch64::LDRSroX:
823 case AArch64::LDRSWroX:
826 case AArch64::LDRDroX:
827 case AArch64::LDRXroX:
830 case AArch64::LDRQroX:
840 case AArch64::ORRWrs:
841 case AArch64::ADDWri:
849 const LdStPairFlags &Flags) {
851 "Expected promotable zero stores.");
859 if (NextI == MergeMI)
862 unsigned Opc =
I->getOpcode();
863 unsigned MergeMIOpc = MergeMI->getOpcode();
864 bool IsScaled = !
TII->hasUnscaledLdStOffset(Opc);
865 bool IsMergedMIScaled = !
TII->hasUnscaledLdStOffset(MergeMIOpc);
866 int OffsetStride = IsScaled ?
TII->getMemScale(*
I) : 1;
867 int MergeMIOffsetStride = IsMergedMIScaled ?
TII->getMemScale(*MergeMI) : 1;
869 bool MergeForward =
Flags.getMergeForward();
880 int64_t IOffsetInBytes =
882 int64_t MIOffsetInBytes =
887 if (IOffsetInBytes > MIOffsetInBytes)
888 OffsetImm = MIOffsetInBytes;
890 OffsetImm = IOffsetInBytes;
893 bool FinalIsScaled = !
TII->hasUnscaledLdStOffset(NewOpcode);
897 int NewOffsetStride = FinalIsScaled ?
TII->getMemScale(NewOpcode) : 1;
898 assert(((OffsetImm % NewOffsetStride) == 0) &&
899 "Offset should be a multiple of the store memory scale");
900 OffsetImm = OffsetImm / NewOffsetStride;
908 .
addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR)
912 .setMIFlags(
I->mergeFlagsWith(*MergeMI));
915 LLVM_DEBUG(
dbgs() <<
"Creating wider store. Replacing instructions:\n ");
924 I->eraseFromParent();
925 MergeMI->eraseFromParent();
935 auto MBB =
MI.getParent();
943 return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() &&
944 TRI->regsOverlap(MOP.getReg(), DefReg);
958 if (MOP.isReg() && MOP.isKill())
962 if (MOP.isReg() && !MOP.isKill())
963 Units.
addReg(MOP.getReg());
969 const LdStPairFlags &Flags) {
979 int SExtIdx =
Flags.getSExtIdx();
982 bool IsUnscaled =
TII->hasUnscaledLdStOffset(Opc);
983 int OffsetStride = IsUnscaled ?
TII->getMemScale(*
I) : 1;
985 bool MergeForward =
Flags.getMergeForward();
987 std::optional<MCPhysReg> RenameReg =
Flags.getRenameReg();
990 DefinedInBB.addReg(*RenameReg);
994 auto GetMatchingSubReg =
997 TRI->sub_and_superregs_inclusive(*RenameReg)) {
998 if (
C->contains(SubOrSuper))
1005 [
this, RegToRename, GetMatchingSubReg, MergeForward](
MachineInstr &
MI,
1008 bool SeenDef =
false;
1009 for (
unsigned OpIdx = 0; OpIdx <
MI.getNumOperands(); ++OpIdx) {
1014 (!MergeForward || !SeenDef ||
1016 TRI->regsOverlap(MOP.
getReg(), RegToRename)) {
1019 "Need renamable operands");
1022 MI.getRegClassConstraint(OpIdx,
TII,
TRI))
1023 MatchingReg = GetMatchingSubReg(RC);
1027 MatchingReg = GetMatchingSubReg(
1028 TRI->getMinimalPhysRegClass(MOP.
getReg()));
1035 for (
unsigned OpIdx = 0; OpIdx <
MI.getNumOperands(); ++OpIdx) {
1038 TRI->regsOverlap(MOP.
getReg(), RegToRename)) {
1041 "Need renamable operands");
1044 MI.getRegClassConstraint(OpIdx,
TII,
TRI))
1045 MatchingReg = GetMatchingSubReg(RC);
1047 MatchingReg = GetMatchingSubReg(
1048 TRI->getMinimalPhysRegClass(MOP.
getReg()));
1049 assert(MatchingReg != AArch64::NoRegister &&
1050 "Cannot find matching regs for renaming");
1059 UINT32_MAX, UpdateMIs);
1072 RegToCheck = RegToRename;
1075 MergeForward ? std::next(
I) :
I,
1076 MergeForward ? std::next(Paired) : Paired))
1079 return !MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1081 !TRI->regsOverlap(MOP.getReg(), RegToCheck);
1083 "Rename register used between paired instruction, trashing the "
1099 bool PairedIsUnscaled =
TII->hasUnscaledLdStOffset(Paired->getOpcode());
1100 if (IsUnscaled != PairedIsUnscaled) {
1104 int MemSize =
TII->getMemScale(*Paired);
1105 if (PairedIsUnscaled) {
1108 assert(!(PairedOffset %
TII->getMemScale(*Paired)) &&
1109 "Offset should be a multiple of the stride!");
1110 PairedOffset /= MemSize;
1112 PairedOffset *= MemSize;
1120 if (
Offset == PairedOffset + OffsetStride &&
1128 SExtIdx = (SExtIdx + 1) % 2;
1136 assert(!(OffsetImm %
TII->getMemScale(*RtMI)) &&
1137 "Unscaled offset cannot be scaled.");
1138 OffsetImm /=
TII->getMemScale(*RtMI);
1147 MachineOperand &PairedRegOp = RtMI == &*Paired ? RegOp0 : RegOp1;
1149 if (RegOp0.
isUse()) {
1150 if (!MergeForward) {
1161 for (
auto It = std::next(
I); It != Paired && PairedRegOp.
isKill(); ++It)
1162 if (It->readsRegister(PairedRegOp.
getReg(),
TRI))
1171 MI.clearRegisterKills(Reg,
TRI);
1187 .setMIFlags(
I->mergeFlagsWith(*Paired));
1192 dbgs() <<
"Creating pair load/store. Replacing instructions:\n ");
1197 if (SExtIdx != -1) {
1207 Register DstRegW =
TRI->getSubReg(DstRegX, AArch64::sub_32);
1238 DefinedInBB.addReg(MOP.
getReg());
1241 I->eraseFromParent();
1242 Paired->eraseFromParent();
1251 next_nodbg(LoadI, LoadI->getParent()->end());
1253 int LoadSize =
TII->getMemScale(*LoadI);
1254 int StoreSize =
TII->getMemScale(*StoreI);
1258 bool IsStoreXReg =
TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);
1261 TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) &&
1262 "Unexpected RegClass");
1265 if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) {
1268 if (StRt == LdRt && LoadSize == 8) {
1270 LoadI->getIterator())) {
1271 if (
MI.killsRegister(StRt,
TRI)) {
1272 MI.clearRegisterKills(StRt,
TRI);
1279 LoadI->eraseFromParent();
1284 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1285 TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)
1286 .
addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)
1293 if (!Subtarget->isLittleEndian())
1295 bool IsUnscaled =
TII->hasUnscaledLdStOffset(*LoadI);
1296 assert(IsUnscaled ==
TII->hasUnscaledLdStOffset(*StoreI) &&
1297 "Unsupported ld/st match");
1298 assert(LoadSize <= StoreSize &&
"Invalid load size");
1299 int UnscaledLdOffset =
1303 int UnscaledStOffset =
1307 int Width = LoadSize * 8;
1310 LdRt, AArch64::sub_32, &AArch64::GPR64RegClass))
1313 assert((UnscaledLdOffset >= UnscaledStOffset &&
1314 (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
1317 int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
1318 int Imms = Immr + Width - 1;
1319 if (UnscaledLdOffset == UnscaledStOffset) {
1320 uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12)
1326 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1327 TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),
1332 }
else if (IsStoreXReg && Imms == 31) {
1335 assert(Immr <= Imms &&
"Expected LSR alias of UBFM");
1336 BitExtMI =
BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1337 TII->get(AArch64::UBFMWri),
1338 TRI->getSubReg(DestReg, AArch64::sub_32))
1339 .
addReg(
TRI->getSubReg(StRt, AArch64::sub_32))
1345 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1346 TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri),
1358 if (
MI.killsRegister(StRt,
TRI)) {
1359 MI.clearRegisterKills(StRt,
TRI);
1374 LoadI->eraseFromParent();
1384 if (
Offset % OffsetStride)
1388 return Offset <= 63 && Offset >= -64;
1396 return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
1403 if (MIa.
mayAlias(AA, *MIb,
false)) {
1413bool AArch64LoadStoreOpt::findMatchingStore(
1428 ModifiedRegUnits.clear();
1429 UsedRegUnits.clear();
1438 if (!
MI.isTransient())
1464 if (!ModifiedRegUnits.available(BaseReg))
1470 }
while (
MBBI !=
B && Count < Limit);
1482 LdStPairFlags &Flags,
1485 if (
MI.hasOrderedMemoryRef() ||
TII->isLdStPairSuppressed(
MI))
1490 !
TII->isLdStPairSuppressed(FirstMI) &&
1491 "FirstMI shouldn't get here if either of these checks are true.");
1498 unsigned OpcB =
MI.getOpcode();
1509 bool IsValidLdStrOpc, PairIsValidLdStrOpc;
1511 assert(IsValidLdStrOpc &&
1512 "Given Opc should be a Load or Store with an immediate");
1515 Flags.setSExtIdx(NonSExtOpc == (
unsigned)OpcA ? 1 : 0);
1521 if (!PairIsValidLdStrOpc)
1526 if (isNarrowStore(OpcA) || isNarrowStore(OpcB))
1536 return TII->hasUnscaledLdStOffset(OpcA) !=
TII->hasUnscaledLdStOffset(OpcB) &&
1545 auto *RegClass =
TRI->getMinimalPhysRegClass(MOP.
getReg());
1552 if (RegClass->HasDisjunctSubRegs && RegClass->CoveredBySubRegs &&
1553 (
TRI->getSubRegisterClass(RegClass, AArch64::dsub0) ||
1554 TRI->getSubRegisterClass(RegClass, AArch64::qsub0) ||
1555 TRI->getSubRegisterClass(RegClass, AArch64::zsub0))) {
1558 <<
" Cannot rename operands with multiple disjunct subregisters ("
1569 return TRI->isSuperOrSubRegisterEq(
1592 return MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1593 MOP.isImplicit() && MOP.isKill() &&
1594 TRI->regsOverlap(RegToRename, MOP.getReg());
1600 bool FoundDef =
false;
1631 if (
MI.isPseudo()) {
1632 LLVM_DEBUG(
dbgs() <<
" Cannot rename pseudo/bundle instruction\n");
1636 for (
auto &MOP :
MI.operands()) {
1638 !
TRI->regsOverlap(MOP.
getReg(), RegToRename))
1648 for (
auto &MOP :
MI.operands()) {
1650 !
TRI->regsOverlap(MOP.
getReg(), RegToRename))
1667 LLVM_DEBUG(
dbgs() <<
" Did not find definition for register in BB\n");
1695 LLVM_DEBUG(dbgs() <<
"Checking " << MI);
1697 if (MI.getFlag(MachineInstr::FrameSetup)) {
1698 LLVM_DEBUG(dbgs() <<
" Cannot rename framesetup instructions "
1703 for (
auto &MOP :
MI.operands()) {
1704 if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1705 !TRI->regsOverlap(MOP.getReg(), RegToRename))
1707 if (!canRenameMOP(MOP, TRI)) {
1708 LLVM_DEBUG(dbgs() <<
" Cannot rename " << MOP <<
" in " << MI);
1734 auto AnySubOrSuperRegCalleePreserved = [&MF,
TRI](
MCPhysReg PR) {
1735 return any_of(
TRI->sub_and_superregs_inclusive(PR),
1737 return TRI->isCalleeSavedPhysReg(SubOrSuper, MF);
1743 auto CanBeUsedForAllClasses = [&RequiredClasses,
TRI](
MCPhysReg PR) {
1746 TRI->sub_and_superregs_inclusive(PR),
1747 [
C](
MCPhysReg SubOrSuper) { return C->contains(SubOrSuper); });
1751 auto *RegClass =
TRI->getMinimalPhysRegClass(Reg);
1754 !
RegInfo.isReserved(PR) && !AnySubOrSuperRegCalleePreserved(PR) &&
1755 CanBeUsedForAllClasses(PR)) {
1763 <<
TRI->getRegClassName(RegClass) <<
"\n");
1764 return std::nullopt;
1775 std::optional<MCPhysReg> RenameReg;
1784 const bool IsLoad = FirstMI.
mayLoad();
1786 if (!MaybeCanRename) {
1789 RequiredClasses,
TRI)};
1795 if (*MaybeCanRename) {
1797 RequiredClasses,
TRI);
1806 LdStPairFlags &Flags,
unsigned Limit,
1807 bool FindNarrowMerge) {
1815 bool IsUnscaled =
TII->hasUnscaledLdStOffset(FirstMI);
1819 int OffsetStride = IsUnscaled ?
TII->getMemScale(FirstMI) : 1;
1822 std::optional<bool> MaybeCanRename;
1824 MaybeCanRename = {
false};
1830 Flags.clearRenameReg();
1834 ModifiedRegUnits.clear();
1835 UsedRegUnits.clear();
1841 for (
unsigned Count = 0;
MBBI != E && Count < Limit;
1850 if (!
MI.isTransient())
1853 Flags.setSExtIdx(-1);
1856 assert(
MI.mayLoadOrStore() &&
"Expected memory operation.");
1865 bool MIIsUnscaled =
TII->hasUnscaledLdStOffset(
MI);
1866 if (IsUnscaled != MIIsUnscaled) {
1870 int MemSize =
TII->getMemScale(
MI);
1874 if (MIOffset % MemSize) {
1880 MIOffset /= MemSize;
1882 MIOffset *= MemSize;
1888 if (BaseReg == MIBaseReg) {
1894 bool IsOutOfBounds = MIOffset !=
TII->getMemScale(
MI);
1895 bool IsBaseRegUsed = !UsedRegUnits.available(
1897 bool IsBaseRegModified = !ModifiedRegUnits.available(
1902 bool IsMIRegTheSame =
1905 if (IsOutOfBounds || IsBaseRegUsed || IsBaseRegModified ||
1913 if ((
Offset != MIOffset + OffsetStride) &&
1914 (
Offset + OffsetStride != MIOffset)) {
1923 if (FindNarrowMerge) {
1928 if ((!IsUnscaled &&
alignTo(MinOffset, 2) != MinOffset) ||
1945 <<
"keep looking.\n");
1951 if (IsUnscaled && (
alignTo(MinOffset, OffsetStride) != MinOffset)) {
1956 <<
"Offset doesn't fit due to alignment requirements, "
1957 <<
"keep looking.\n");
1968 if (!ModifiedRegUnits.available(BaseReg))
1971 const bool SameLoadReg =
MayLoad &&
TRI->isSuperOrSubRegisterEq(
1978 bool RtNotModified =
1980 bool RtNotUsed = !(
MI.mayLoad() && !SameLoadReg &&
1983 LLVM_DEBUG(
dbgs() <<
"Checking, can combine 2nd into 1st insn:\n"
1985 << (RtNotModified ?
"true" :
"false") <<
"\n"
1987 << (RtNotUsed ?
"true" :
"false") <<
"\n");
1989 if (RtNotModified && RtNotUsed && !
mayAlias(
MI, MemInsns, AA)) {
1994 std::optional<MCPhysReg> RenameReg =
1996 Reg, DefinedInBB, UsedInBetween,
1997 RequiredClasses,
TRI);
2003 <<
"keep looking.\n");
2006 Flags.setRenameReg(*RenameReg);
2009 Flags.setMergeForward(
false);
2011 Flags.clearRenameReg();
2022 LLVM_DEBUG(
dbgs() <<
"Checking, can combine 1st into 2nd insn:\n"
2024 <<
"' not modified: "
2025 << (RtNotModified ?
"true" :
"false") <<
"\n");
2027 if (RtNotModified && !
mayAlias(FirstMI, MemInsns, AA)) {
2029 Flags.setMergeForward(
true);
2030 Flags.clearRenameReg();
2035 MaybeCanRename, FirstMI,
MI, Reg, DefinedInBB, UsedInBetween,
2036 RequiredClasses,
TRI);
2038 Flags.setMergeForward(
true);
2039 Flags.setRenameReg(*RenameReg);
2040 MBBIWithRenameReg =
MBBI;
2043 LLVM_DEBUG(
dbgs() <<
"Unable to combine these instructions due to "
2044 <<
"interference in between, keep looking.\n");
2048 if (
Flags.getRenameReg())
2049 return MBBIWithRenameReg;
2063 if (!ModifiedRegUnits.available(BaseReg)) {
2069 if (
MI.mayLoadOrStore())
2077 assert((
MI.getOpcode() == AArch64::SUBXri ||
2078 MI.getOpcode() == AArch64::ADDXri) &&
2079 "Expected a register update instruction");
2080 auto End =
MI.getParent()->end();
2081 if (MaybeCFI ==
End ||
2082 MaybeCFI->getOpcode() != TargetOpcode::CFI_INSTRUCTION ||
2085 MI.getOperand(0).getReg() != AArch64::SP)
2089 unsigned CFIIndex = MaybeCFI->getOperand(0).getCFIIndex();
2100std::optional<MachineBasicBlock::iterator> AArch64LoadStoreOpt::mergeUpdateInsn(
2102 bool IsForward,
bool IsPreIdx,
bool MergeEither) {
2103 assert((Update->getOpcode() == AArch64::ADDXri ||
2104 Update->getOpcode() == AArch64::SUBXri) &&
2105 "Unexpected base register update instruction to merge!");
2121 if (std::any_of(std::next(CFI),
I, [](
const auto &
Insn) {
2122 return Insn.getOpcode() == TargetOpcode::CFI_INSTRUCTION;
2124 return std::nullopt;
2135 if (NextI == Update)
2138 int Value = Update->getOperand(2).getImm();
2140 "Can't merge 1 << 12 offset into pre-/post-indexed load / store");
2141 if (Update->getOpcode() == AArch64::SUBXri)
2147 int Scale, MinOffset, MaxOffset;
2151 MIB =
BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
2153 .
add(Update->getOperand(0))
2161 MIB =
BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
2163 .
add(Update->getOperand(0))
2188 I->eraseFromParent();
2189 Update->eraseFromParent();
2197 unsigned Offset,
int Scale) {
2198 assert((Update->getOpcode() == AArch64::MOVKWi) &&
2199 "Unexpected const mov instruction to merge!");
2204 unsigned Mask = (1 << 12) * Scale - 1;
2213 BuildMI(*
I->getParent(),
I,
I->getDebugLoc(),
TII->get(AArch64::ADDXri))
2221 MemMIB =
BuildMI(*
I->getParent(),
I,
I->getDebugLoc(),
TII->get(NewOpc))
2229 ++NumConstOffsetFolded;
2244 I->eraseFromParent();
2245 PrevI->eraseFromParent();
2246 Update->eraseFromParent();
2251bool AArch64LoadStoreOpt::isMatchingUpdateInsn(
MachineInstr &MemMI,
2253 unsigned BaseReg,
int Offset) {
2254 switch (
MI.getOpcode()) {
2257 case AArch64::SUBXri:
2258 case AArch64::ADDXri:
2261 if (!
MI.getOperand(2).isImm())
2269 if (
MI.getOperand(0).getReg() != BaseReg ||
2270 MI.getOperand(1).getReg() != BaseReg)
2273 int UpdateOffset =
MI.getOperand(2).getImm();
2274 if (
MI.getOpcode() == AArch64::SUBXri)
2275 UpdateOffset = -UpdateOffset;
2279 int Scale, MinOffset, MaxOffset;
2281 if (UpdateOffset % Scale != 0)
2285 int ScaledOffset = UpdateOffset / Scale;
2286 if (ScaledOffset > MaxOffset || ScaledOffset < MinOffset)
2298bool AArch64LoadStoreOpt::isMatchingMovConstInsn(
MachineInstr &MemMI,
2304 if (
MI.getOpcode() == AArch64::MOVKWi &&
2305 TRI->isSuperOrSubRegisterEq(IndexReg,
MI.getOperand(1).getReg())) {
2316 if (MovzMI.
getOpcode() == AArch64::MOVZWi &&
2319 unsigned High =
MI.getOperand(2).getImm() <<
MI.getOperand(3).getImm();
2322 return Offset >> 24 == 0;
2336 TII->getMemScale(MemMI);
2341 if (MIUnscaledOffset != UnscaledOffset)
2352 for (
unsigned i = 0, e = IsPairedInsn ? 2 : 1; i !=
e; ++i) {
2354 if (DestReg == BaseReg ||
TRI->isSubRegister(BaseReg, DestReg))
2361 ModifiedRegUnits.clear();
2362 UsedRegUnits.clear();
2368 const bool BaseRegSP = BaseReg == AArch64::SP;
2376 for (
unsigned Count = 0;
MBBI != E && Count < Limit;
2382 if (!
MI.isTransient())
2386 if (isMatchingUpdateInsn(*
I,
MI, BaseReg, UnscaledOffset))
2396 if (!ModifiedRegUnits.available(BaseReg) ||
2397 !UsedRegUnits.available(BaseReg) ||
2398 (BaseRegSP &&
MBBI->mayLoadOrStore()))
2418 : AArch64::NoRegister};
2427 for (
unsigned i = 0, e = IsPairedInsn ? 2 : 1; i !=
e; ++i)
2428 if (DestReg[i] == BaseReg ||
TRI->isSubRegister(BaseReg, DestReg[i]))
2432 const bool BaseRegSP = BaseReg == AArch64::SP;
2441 unsigned RedZoneSize =
2446 ModifiedRegUnits.clear();
2447 UsedRegUnits.clear();
2449 bool MemAcessBeforeSPPreInc =
false;
2457 if (!
MI.isTransient())
2461 if (isMatchingUpdateInsn(*
I,
MI, BaseReg,
Offset)) {
2464 if (MemAcessBeforeSPPreInc &&
MBBI->getOperand(2).getImm() > RedZoneSize)
2474 if (!ModifiedRegUnits.available(BaseReg) ||
2475 !UsedRegUnits.available(BaseReg))
2482 if (
MI.mayLoadOrStore() ||
MI.hasUnmodeledSideEffects() ||
2483 (DestReg[0] != AArch64::NoRegister &&
2484 !(ModifiedRegUnits.available(DestReg[0]) &&
2485 UsedRegUnits.available(DestReg[0]))) ||
2486 (DestReg[1] != AArch64::NoRegister &&
2487 !(ModifiedRegUnits.available(DestReg[1]) &&
2488 UsedRegUnits.available(DestReg[1]))))
2489 MergeEither =
false;
2494 if (BaseRegSP &&
MBBI->mayLoadOrStore())
2495 MemAcessBeforeSPPreInc =
true;
2496 }
while (
MBBI !=
B && Count < Limit);
2501AArch64LoadStoreOpt::findMatchingConstOffsetBackward(
2524 ModifiedRegUnits.clear();
2525 UsedRegUnits.clear();
2533 if (!
MI.isTransient())
2537 if (isMatchingMovConstInsn(*
I,
MI, IndexReg,
Offset)) {
2546 if (!ModifiedRegUnits.available(IndexReg) ||
2547 !UsedRegUnits.available(IndexReg))
2550 }
while (
MBBI !=
B && Count < Limit);
2554bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
2558 if (
MI.hasOrderedMemoryRef())
2572 ++NumLoadsFromStoresPromoted;
2576 MBBI = promoteLoadFromStore(
MBBI, StoreI);
2583bool AArch64LoadStoreOpt::tryToMergeZeroStInst(
2589 if (!
TII->isCandidateToMergeOrPair(
MI))
2593 LdStPairFlags
Flags;
2597 ++NumZeroStoresPromoted;
2601 MBBI = mergeNarrowZeroStores(
MBBI, MergeMI, Flags);
2613 if (!
TII->isCandidateToMergeOrPair(
MI))
2617 if (
MI.mayLoad() && Subtarget->hasDisableLdp())
2621 if (
MI.mayStore() && Subtarget->hasDisableStp())
2627 bool IsUnscaled =
TII->hasUnscaledLdStOffset(
MI);
2629 int OffsetStride = IsUnscaled ?
TII->getMemScale(
MI) : 1;
2637 LdStPairFlags
Flags;
2643 auto Prev = std::prev(
MBBI);
2648 MI.memoperands_empty() ? nullptr :
MI.memoperands().front();
2653 if ((
MI.mayLoad() && Subtarget->hasLdpAlignedOnly()) ||
2654 (
MI.mayStore() && Subtarget->hasStpAlignedOnly())) {
2656 if (!
MemOp || !
MemOp->getMemoryType().isValid()) {
2657 NumFailedAlignmentCheck++;
2666 if (MemAlignment < 2 * TypeAlignment) {
2667 NumFailedAlignmentCheck++;
2673 if (
TII->hasUnscaledLdStOffset(
MI))
2674 ++NumUnscaledPairCreated;
2676 MBBI = mergePairedInsns(
MBBI, Paired, Flags);
2679 for (
auto I = std::next(Prev);
I !=
MBBI;
I++)
2687bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
2701 if (
auto NextI = mergeUpdateInsn(
MBBI, Update,
false,
2710 if (
TII->hasUnscaledLdStOffset(
MI.getOpcode()))
2719 Update = findMatchingUpdateInsnBackward(
MBBI,
UpdateLimit, MergeEither);
2722 if (
auto NextI = mergeUpdateInsn(
MBBI, Update,
true,
2723 true, MergeEither)) {
2732 int UnscaledOffset =
2740 Update = findMatchingUpdateInsnForward(
MBBI, UnscaledOffset,
UpdateLimit);
2743 if (
auto NextI = mergeUpdateInsn(
MBBI, Update,
false,
2761 if (
TII->hasUnscaledLdStOffset(
MI.getOpcode()))
2773 if (Update != E && (
Offset & (Scale - 1)) == 0) {
2783 bool EnableNarrowZeroStOpt) {
2815 if (EnableNarrowZeroStOpt)
2832 DefinedInBB.
clear();
2833 DefinedInBB.addLiveIns(
MBB);
2841 if (
TII->isPairableLdStInst(*
MBBI) && tryToPairLdStInst(
MBBI))
2887 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2892 ModifiedRegUnits.init(*
TRI);
2893 UsedRegUnits.init(*
TRI);
2894 DefinedInBB.init(*
TRI);
2897 bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign();
2898 for (
auto &
MBB : Fn) {
2919 return new AArch64LoadStoreOpt();
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
static cl::opt< bool > EnableRenaming("aarch64-load-store-renaming", cl::init(true), cl::Hidden)
static MachineOperand & getLdStRegOp(MachineInstr &MI, unsigned PairedRegOp=0)
static bool isPromotableLoadFromStore(MachineInstr &MI)
static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale, int &MinOffset, int &MaxOffset)
static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride)
static unsigned getMatchingPairOpcode(unsigned Opc)
static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, LdStPairFlags &Flags, const AArch64InstrInfo *TII)
static std::optional< MCPhysReg > tryToFindRegisterToRename(const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static bool needsWinCFI(const MachineFunction *MF)
static bool canRenameUntilSecondLoad(MachineInstr &FirstLoad, MachineInstr &SecondLoad, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static std::optional< MCPhysReg > findRenameRegForSameLdStRegPair(std::optional< bool > MaybeCanRename, MachineInstr &FirstMI, MachineInstr &MI, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
static cl::opt< unsigned > LdStLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden)
static bool canRenameMOP(const MachineOperand &MOP, const TargetRegisterInfo *TRI)
static unsigned getPreIndexedOpcode(unsigned Opc)
#define AARCH64_LOAD_STORE_OPT_NAME
static cl::opt< unsigned > UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden)
static bool isPromotableZeroStoreInst(MachineInstr &MI)
static unsigned getMatchingWideOpcode(unsigned Opc)
static unsigned getMatchingNonSExtOpcode(unsigned Opc, bool *IsValidLdStrOpc=nullptr)
static MachineBasicBlock::iterator maybeMoveCFI(MachineInstr &MI, MachineBasicBlock::iterator MaybeCFI)
static int alignTo(int Num, int PowOf2)
static bool isTagStore(const MachineInstr &MI)
static unsigned isMatchingStore(MachineInstr &LoadInst, MachineInstr &StoreInst)
static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg, const TargetRegisterInfo *TRI, unsigned Limit, std::function< bool(MachineInstr &, bool)> &Fn)
static bool isRewritableImplicitDef(unsigned Opc)
static unsigned getPostIndexedOpcode(unsigned Opc)
static bool isMergeableLdStUpdate(MachineInstr &MI, AArch64FunctionInfo &AFI)
static cl::opt< unsigned > LdStConstLimit("aarch64-load-store-const-scan-limit", cl::init(10), cl::Hidden)
static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst, MachineInstr &StoreInst, const AArch64InstrInfo *TII)
static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI)
static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale)
static void updateDefinedRegisters(MachineInstr &MI, LiveRegUnits &Units, const TargetRegisterInfo *TRI)
static bool canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static unsigned getBaseAddressOpcode(unsigned Opc)
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
const HexagonInstrInfo * TII
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
static const MachineOperand & getLdStAmountOp(const MachineInstr &MI)
Returns the shift amount operator of a load/store.
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
const AArch64RegisterInfo * getRegisterInfo() const override
const AArch64InstrInfo * getInstrInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
unsigned getRedZoneSize(const Function &F) const
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
static bool shouldExecute(unsigned CounterName)
FunctionPass class - This class is used to implement most global optimizations.
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
A set of register units used to track register liveness.
static void accumulateUsedDefed(const MachineInstr &MI, LiveRegUnits &ModifiedRegUnits, LiveRegUnits &UsedRegUnits, const TargetRegisterInfo *TRI)
For a machine instruction MI, adds all register units used in UsedRegUnits and defined or clobbered i...
bool available(MCPhysReg Reg) const
Returns true if no part of physical register Reg is live.
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
void addReg(MCPhysReg Reg)
Adds register units covered by physical register Reg.
void removeReg(MCPhysReg Reg)
Removes all register units covered by physical register Reg.
void accumulate(const MachineInstr &MI)
Adds all register units used, defined or clobbered in MI.
An instruction for reading from memory.
bool usesWindowsCFI() const
OpType getOperation() const
Wrapper class representing physical registers. Should be passed by value.
reverse_instr_iterator instr_rend()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const std::vector< MCCFIInstruction > & getFrameInstructions() const
Returns a reference to a list of cfi instructions in the function's prologue.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool mayAlias(AAResults *AA, const MachineInstr &Other, bool UseTBAA) const
Returns true if this instruction's memory access aliases the memory access of Other.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
iterator_range< mop_iterator > operands()
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
bool isPseudo(QueryType Type=IgnoreBundle) const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
MachineOperand class - Representation of each machine instruction operand.
void setImplicit(bool Val=true)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
bool isRenamable() const
isRenamable - Returns true if this register may be renamed, i.e.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
LLVM Value Representation.
self_iterator getIterator()
A range adaptor for a pair of iterators.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Define
Register definition.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< filter_iterator< ConstMIBundleOperands, bool(*)(const MachineOperand &)> > phys_regs_and_masks(const MachineInstr &MI)
Returns an iterator range over all physical register and mask operands for MI and bundled instruction...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionPass * createAArch64LoadStoreOptimizationPass()
createAArch64LoadStoreOptimizationPass - returns an instance of the load / store optimization pass.
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
void initializeAArch64LoadStoreOptPass(PassRegistry &)
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.