36#define DEBUG_TYPE "si-memory-legalizer"
37#define PASS_NAME "SI Memory Legalizer"
41 cl::desc(
"Use this to skip inserting cache invalidating instructions."));
63enum class SIAtomicScope {
75enum class SIAtomicAddrSpace {
84 FLAT = GLOBAL |
LDS | SCRATCH,
87 ATOMIC = GLOBAL |
LDS | SCRATCH | GDS,
90 ALL = GLOBAL |
LDS | SCRATCH | GDS | OTHER,
98 case SIAtomicScope::NONE:
100 case SIAtomicScope::SINGLETHREAD:
101 return "singlethread";
102 case SIAtomicScope::WAVEFRONT:
104 case SIAtomicScope::WORKGROUP:
106 case SIAtomicScope::CLUSTER:
108 case SIAtomicScope::AGENT:
110 case SIAtomicScope::SYSTEM:
117 if (AS == SIAtomicAddrSpace::NONE) {
122 if ((AS & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE)
123 OS <<
LS <<
"global";
124 if ((AS & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE)
126 if ((AS & SIAtomicAddrSpace::SCRATCH) != SIAtomicAddrSpace::NONE)
127 OS <<
LS <<
"scratch";
128 if ((AS & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE)
130 if ((AS & SIAtomicAddrSpace::OTHER) != SIAtomicAddrSpace::NONE)
136class SIMemOpInfo final {
139 friend class SIMemOpAccess;
143 SIAtomicScope Scope = SIAtomicScope::SYSTEM;
144 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
145 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
146 bool IsCrossAddressSpaceOrdering =
false;
147 bool IsVolatile =
false;
148 bool IsNonTemporal =
false;
149 bool IsLastUse =
false;
150 bool IsCooperative =
false;
154 const GCNSubtarget &ST,
156 SIAtomicScope Scope = SIAtomicScope::SYSTEM,
157 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::ATOMIC,
158 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::ALL,
159 bool IsCrossAddressSpaceOrdering =
true,
160 AtomicOrdering FailureOrdering = AtomicOrdering::SequentiallyConsistent,
161 bool IsVolatile =
false,
bool IsNonTemporal =
false,
162 bool IsLastUse =
false,
bool IsCooperative =
false,
163 bool CanDemoteWorkgroupToWavefront =
false)
164 : Ordering(Ordering), FailureOrdering(FailureOrdering), Scope(Scope),
165 OrderingAddrSpace(OrderingAddrSpace), InstrAddrSpace(InstrAddrSpace),
166 IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
167 IsVolatile(IsVolatile), IsNonTemporal(IsNonTemporal),
168 IsLastUse(IsLastUse), IsCooperative(IsCooperative) {
170 if (Ordering == AtomicOrdering::NotAtomic) {
171 assert(!IsCooperative &&
"Cannot be cooperative & non-atomic!");
172 assert(Scope == SIAtomicScope::NONE &&
173 OrderingAddrSpace == SIAtomicAddrSpace::NONE &&
174 !IsCrossAddressSpaceOrdering &&
175 FailureOrdering == AtomicOrdering::NotAtomic);
179 assert(Scope != SIAtomicScope::NONE &&
180 (OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
181 SIAtomicAddrSpace::NONE &&
182 (InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
183 SIAtomicAddrSpace::NONE);
188 if ((OrderingAddrSpace == InstrAddrSpace) &&
190 this->IsCrossAddressSpaceOrdering =
false;
194 if ((InstrAddrSpace & ~SIAtomicAddrSpace::SCRATCH) ==
195 SIAtomicAddrSpace::NONE) {
196 this->Scope = std::min(Scope, SIAtomicScope::SINGLETHREAD);
197 }
else if ((InstrAddrSpace &
198 ~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS)) ==
199 SIAtomicAddrSpace::NONE) {
200 this->Scope = std::min(Scope, SIAtomicScope::WORKGROUP);
201 }
else if ((InstrAddrSpace &
202 ~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS |
203 SIAtomicAddrSpace::GDS)) == SIAtomicAddrSpace::NONE) {
204 this->Scope = std::min(Scope, SIAtomicScope::AGENT);
209 if (this->Scope == SIAtomicScope::CLUSTER && !
ST.hasClusters())
210 this->Scope = SIAtomicScope::AGENT;
216 if (CanDemoteWorkgroupToWavefront &&
217 this->Scope == SIAtomicScope::WORKGROUP &&
220 AtomicOrdering::Monotonic)))
221 this->Scope = SIAtomicScope::WAVEFRONT;
227 SIAtomicScope getScope()
const {
240 return FailureOrdering;
245 SIAtomicAddrSpace getInstrAddrSpace()
const {
246 return InstrAddrSpace;
251 SIAtomicAddrSpace getOrderingAddrSpace()
const {
252 return OrderingAddrSpace;
257 bool getIsCrossAddressSpaceOrdering()
const {
258 return IsCrossAddressSpaceOrdering;
263 bool isVolatile()
const {
269 bool isNonTemporal()
const {
270 return IsNonTemporal;
275 bool isLastUse()
const {
return IsLastUse; }
278 bool isCooperative()
const {
return IsCooperative; }
282 bool isAtomic()
const {
283 return Ordering != AtomicOrdering::NotAtomic;
288class SIMemOpAccess final {
290 const AMDGPUMachineModuleInfo *MMI =
nullptr;
291 const GCNSubtarget &ST;
292 const bool CanDemoteWorkgroupToWavefront;
296 const char *Msg)
const;
302 std::optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
303 toSIAtomicScope(
SyncScope::ID SSID, SIAtomicAddrSpace InstrAddrSpace)
const;
306 SIAtomicAddrSpace toSIAtomicAddrSpace(
unsigned AS)
const;
310 std::optional<SIMemOpInfo>
316 SIMemOpAccess(
const AMDGPUMachineModuleInfo &MMI,
const GCNSubtarget &ST,
320 std::optional<SIMemOpInfo>
325 std::optional<SIMemOpInfo>
330 std::optional<SIMemOpInfo>
335 std::optional<SIMemOpInfo>
341 std::optional<SIMemOpInfo>
345class SICacheControl {
349 const GCNSubtarget &ST;
352 const SIInstrInfo *TII =
nullptr;
359 SICacheControl(
const GCNSubtarget &ST);
364 unsigned Bits)
const;
368 bool canAffectGlobalAddrSpace(SIAtomicAddrSpace AS)
const;
374 static std::unique_ptr<SICacheControl> create(
const GCNSubtarget &ST);
381 SIAtomicAddrSpace AddrSpace)
const = 0;
388 SIAtomicAddrSpace AddrSpace)
const = 0;
395 SIAtomicAddrSpace AddrSpace)
const = 0;
401 SIAtomicAddrSpace AddrSpace,
402 SIMemOp
Op,
bool IsVolatile,
404 bool IsLastUse =
false)
const = 0;
411 virtual bool finalizeStore(MachineInstr &
MI,
bool Atomic)
const {
416 virtual bool handleCooperativeAtomic(MachineInstr &
MI)
const {
418 "cooperative atomics are not available on this architecture");
431 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
432 bool IsCrossAddrSpaceOrdering, Position Pos,
442 SIAtomicAddrSpace AddrSpace,
443 Position Pos)
const = 0;
453 SIAtomicAddrSpace AddrSpace,
454 bool IsCrossAddrSpaceOrdering,
455 Position Pos)
const = 0;
459 virtual bool handleNonVolatile(MachineInstr &
MI)
const {
return false; }
462 virtual ~SICacheControl() =
default;
467class SIGfx6CacheControl final :
public SICacheControl {
470 SIGfx6CacheControl(
const GCNSubtarget &ST) : SICacheControl(
ST) {}
474 SIAtomicAddrSpace AddrSpace)
const override;
478 SIAtomicAddrSpace AddrSpace)
const override;
482 SIAtomicAddrSpace AddrSpace)
const override;
485 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
486 bool IsVolatile,
bool IsNonTemporal,
487 bool IsLastUse)
const override;
490 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
491 bool IsCrossAddrSpaceOrdering, Position Pos,
496 SIAtomicAddrSpace AddrSpace,
497 Position Pos)
const override;
501 SIAtomicAddrSpace AddrSpace,
502 bool IsCrossAddrSpaceOrdering,
503 Position Pos)
const override;
507class SIGfx10CacheControl final :
public SICacheControl {
509 SIGfx10CacheControl(
const GCNSubtarget &ST) : SICacheControl(
ST) {}
513 SIAtomicAddrSpace AddrSpace)
const override;
517 SIAtomicAddrSpace AddrSpace)
const override {
523 SIAtomicAddrSpace AddrSpace)
const override {
528 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
529 bool IsVolatile,
bool IsNonTemporal,
530 bool IsLastUse)
const override;
533 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
534 bool IsCrossAddrSpaceOrdering, Position Pos,
538 SIAtomicAddrSpace AddrSpace, Position Pos)
const override;
541 SIAtomicAddrSpace AddrSpace,
bool IsCrossAddrSpaceOrdering,
542 Position Pos)
const override {
543 return insertWait(
MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
544 IsCrossAddrSpaceOrdering, Pos, AtomicOrdering::Release,
549class SIGfx12CacheControl final :
public SICacheControl {
571 SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace)
const;
574 SIGfx12CacheControl(
const GCNSubtarget &ST) : SICacheControl(
ST) {
577 assert(!
ST.hasGFX1250Insts() ||
ST.hasGFX13Insts() ||
ST.isCuModeEnabled());
581 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
582 bool IsCrossAddrSpaceOrdering, Position Pos,
586 SIAtomicAddrSpace AddrSpace, Position Pos)
const override;
589 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
590 bool IsVolatile,
bool IsNonTemporal,
591 bool IsLastUse)
const override;
593 bool finalizeStore(MachineInstr &
MI,
bool Atomic)
const override;
595 bool handleCooperativeAtomic(MachineInstr &
MI)
const override;
598 SIAtomicAddrSpace AddrSpace,
bool IsCrossAddrSpaceOrdering,
599 Position Pos)
const override;
603 SIAtomicAddrSpace AddrSpace)
const override {
604 return setAtomicScope(
MI, Scope, AddrSpace);
609 SIAtomicAddrSpace AddrSpace)
const override {
610 return setAtomicScope(
MI, Scope, AddrSpace);
615 SIAtomicAddrSpace AddrSpace)
const override {
616 return setAtomicScope(
MI, Scope, AddrSpace);
619 bool handleNonVolatile(MachineInstr &
MI)
const override;
622class SIMemoryLegalizer final {
624 const MachineModuleInfo &MMI;
626 std::unique_ptr<SICacheControl> CC =
nullptr;
629 std::list<MachineBasicBlock::iterator> AtomicPseudoMIs;
633 bool isAtomicRet(
const MachineInstr &
MI)
const {
639 bool removeAtomicPseudoMIs();
643 bool expandLoad(
const SIMemOpInfo &MOI,
647 bool expandStore(
const SIMemOpInfo &MOI,
651 bool expandAtomicFence(
const SIMemOpInfo &MOI,
655 bool expandAtomicCmpxchgOrRmw(
const SIMemOpInfo &MOI,
662 SIMemoryLegalizer(
const MachineModuleInfo &MMI) : MMI(MMI) {};
663 bool run(MachineFunction &MF);
670 SIMemoryLegalizerLegacy() : MachineFunctionPass(ID) {}
672 void getAnalysisUsage(AnalysisUsage &AU)
const override {
677 StringRef getPassName()
const override {
681 bool runOnMachineFunction(MachineFunction &MF)
override;
685 {
"global", SIAtomicAddrSpace::GLOBAL},
686 {
"local", SIAtomicAddrSpace::LDS},
694 OS <<
"unknown address space '" << AS <<
"'; expected one of ";
696 for (
const auto &[Name, Val] : ASNames)
697 OS <<
LS <<
'\'' <<
Name <<
'\'';
705static std::optional<SIAtomicAddrSpace>
707 static constexpr StringLiteral FenceASPrefix =
"amdgpu-synchronize-as";
713 SIAtomicAddrSpace
Result = SIAtomicAddrSpace::NONE;
714 for (
const auto &[Prefix, Suffix] : MMRA) {
715 if (Prefix != FenceASPrefix)
718 if (
auto It = ASNames.find(Suffix); It != ASNames.end())
721 diagnoseUnknownMMRAASName(
MI, Suffix);
724 if (Result == SIAtomicAddrSpace::NONE)
733 const char *Msg)
const {
735 Func.getContext().diagnose(
736 DiagnosticInfoUnsupported(Func, Msg,
MI->getDebugLoc()));
739std::optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
741 SIAtomicAddrSpace InstrAddrSpace)
const {
743 return std::tuple(SIAtomicScope::SYSTEM, SIAtomicAddrSpace::ATOMIC,
true);
745 return std::tuple(SIAtomicScope::AGENT, SIAtomicAddrSpace::ATOMIC,
true);
747 return std::tuple(SIAtomicScope::CLUSTER, SIAtomicAddrSpace::ATOMIC,
true);
749 return std::tuple(SIAtomicScope::WORKGROUP, SIAtomicAddrSpace::ATOMIC,
752 return std::tuple(SIAtomicScope::WAVEFRONT, SIAtomicAddrSpace::ATOMIC,
755 return std::tuple(SIAtomicScope::SINGLETHREAD, SIAtomicAddrSpace::ATOMIC,
758 return std::tuple(SIAtomicScope::SYSTEM,
759 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
761 return std::tuple(SIAtomicScope::AGENT,
762 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
764 return std::tuple(SIAtomicScope::CLUSTER,
765 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
767 return std::tuple(SIAtomicScope::WORKGROUP,
768 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
770 return std::tuple(SIAtomicScope::WAVEFRONT,
771 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
773 return std::tuple(SIAtomicScope::SINGLETHREAD,
774 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
778SIAtomicAddrSpace SIMemOpAccess::toSIAtomicAddrSpace(
unsigned AS)
const {
780 return SIAtomicAddrSpace::FLAT;
782 return SIAtomicAddrSpace::GLOBAL;
784 return SIAtomicAddrSpace::LDS;
786 return SIAtomicAddrSpace::SCRATCH;
788 return SIAtomicAddrSpace::GDS;
791 return SIAtomicAddrSpace::GLOBAL;
793 return SIAtomicAddrSpace::OTHER;
799SIMemOpAccess::SIMemOpAccess(
const AMDGPUMachineModuleInfo &MMI_,
800 const GCNSubtarget &ST,
const Function &
F)
801 : MMI(&MMI_),
ST(
ST),
802 CanDemoteWorkgroupToWavefront(
ST.isSingleWavefrontWorkgroup(
F)) {}
804std::optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
806 assert(
MI->getNumMemOperands() > 0);
811 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
812 bool IsNonTemporal =
true;
814 bool IsLastUse =
false;
815 bool IsCooperative =
false;
819 for (
const auto &MMO :
MI->memoperands()) {
820 IsNonTemporal &= MMO->isNonTemporal();
822 IsLastUse |= MMO->getFlags() &
MOLastUse;
825 toSIAtomicAddrSpace(MMO->getPointerInfo().getAddrSpace());
827 if (OpOrdering != AtomicOrdering::NotAtomic) {
828 const auto &IsSyncScopeInclusion =
830 if (!IsSyncScopeInclusion) {
831 reportUnsupported(
MI,
832 "Unsupported non-inclusive atomic synchronization scope");
836 SSID = *IsSyncScopeInclusion ? SSID : MMO->getSyncScopeID();
838 assert(MMO->getFailureOrdering() != AtomicOrdering::Release &&
839 MMO->getFailureOrdering() != AtomicOrdering::AcquireRelease);
850 Ordering = AtomicOrdering::Monotonic;
852 SIAtomicScope
Scope = SIAtomicScope::NONE;
853 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
854 bool IsCrossAddressSpaceOrdering =
false;
855 if (Ordering != AtomicOrdering::NotAtomic) {
856 auto ScopeOrNone = toSIAtomicScope(SSID, InstrAddrSpace);
858 reportUnsupported(
MI,
"Unsupported atomic synchronization scope");
861 std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
863 if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
864 ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace) ||
865 ((InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) == SIAtomicAddrSpace::NONE)) {
866 reportUnsupported(
MI,
"Unsupported atomic address space");
870 return SIMemOpInfo(ST, Ordering, Scope, OrderingAddrSpace, InstrAddrSpace,
871 IsCrossAddressSpaceOrdering, FailureOrdering, IsVolatile,
872 IsNonTemporal, IsLastUse, IsCooperative,
873 CanDemoteWorkgroupToWavefront);
876std::optional<SIMemOpInfo>
880 if (!(
MI->mayLoad() && !
MI->mayStore()))
884 if (
MI->getNumMemOperands() == 0)
885 return SIMemOpInfo(ST);
887 return constructFromMIWithMMO(
MI);
890std::optional<SIMemOpInfo>
894 if (!(!
MI->mayLoad() &&
MI->mayStore()))
898 if (
MI->getNumMemOperands() == 0)
899 return SIMemOpInfo(ST);
901 return constructFromMIWithMMO(
MI);
904std::optional<SIMemOpInfo>
908 if (
MI->getOpcode() != AMDGPU::ATOMIC_FENCE)
915 auto ScopeOrNone = toSIAtomicScope(SSID, SIAtomicAddrSpace::ATOMIC);
917 reportUnsupported(
MI,
"Unsupported atomic synchronization scope");
921 SIAtomicScope
Scope = SIAtomicScope::NONE;
922 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
923 bool IsCrossAddressSpaceOrdering =
false;
924 std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
927 if (OrderingAddrSpace != SIAtomicAddrSpace::ATOMIC) {
932 reportUnsupported(
MI,
"Unsupported atomic address space");
936 auto SynchronizeAS = getSynchronizeAddrSpaceMD(*
MI);
938 OrderingAddrSpace = *SynchronizeAS;
940 return SIMemOpInfo(ST, Ordering, Scope, OrderingAddrSpace,
941 SIAtomicAddrSpace::ATOMIC, IsCrossAddressSpaceOrdering,
942 AtomicOrdering::NotAtomic,
false,
false,
false,
false,
943 CanDemoteWorkgroupToWavefront);
946std::optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
950 if (!(
MI->mayLoad() &&
MI->mayStore()))
954 if (
MI->getNumMemOperands() == 0)
955 return SIMemOpInfo(ST);
957 return constructFromMIWithMMO(
MI);
960std::optional<SIMemOpInfo>
967 return constructFromMIWithMMO(
MI);
975 if (
MI.getNumMemOperands() == 0)
978 return MMO->getFlags() & (MOThreadPrivate | MachineMemOperand::MOInvariant);
982SICacheControl::SICacheControl(
const GCNSubtarget &ST) :
ST(
ST) {
983 TII =
ST.getInstrInfo();
989 unsigned Bits)
const {
990 MachineOperand *CPol =
TII->getNamedOperand(*
MI, AMDGPU::OpName::cpol);
994 CPol->setImm(
CPol->getImm() | Bits);
998bool SICacheControl::canAffectGlobalAddrSpace(SIAtomicAddrSpace AS)
const {
999 assert((!
ST.hasGloballyAddressableScratch() ||
1000 (AS & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ||
1001 (AS & SIAtomicAddrSpace::SCRATCH) == SIAtomicAddrSpace::NONE) &&
1002 "scratch instructions should already be replaced by flat "
1003 "instructions if GloballyAddressableScratch is enabled");
1004 return (AS & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE;
1008std::unique_ptr<SICacheControl> SICacheControl::create(
const GCNSubtarget &ST) {
1009 GCNSubtarget::Generation Generation =
ST.getGeneration();
1010 if (Generation < AMDGPUSubtarget::GFX10)
1011 return std::make_unique<SIGfx6CacheControl>(ST);
1012 if (Generation < AMDGPUSubtarget::GFX12)
1013 return std::make_unique<SIGfx10CacheControl>(ST);
1014 return std::make_unique<SIGfx12CacheControl>(ST);
1017bool SIGfx6CacheControl::enableLoadCacheBypass(
1019 SIAtomicScope Scope,
1020 SIAtomicAddrSpace AddrSpace)
const {
1023 if (!canAffectGlobalAddrSpace(AddrSpace)) {
1035 case SIAtomicScope::SYSTEM:
1036 if (
ST.hasGFX940Insts()) {
1042 case SIAtomicScope::AGENT:
1043 if (
ST.hasGFX940Insts()) {
1052 case SIAtomicScope::WORKGROUP:
1053 if (
ST.hasGFX940Insts()) {
1060 }
else if (
ST.hasGFX90AInsts()) {
1065 if (
ST.isTgSplitEnabled())
1069 case SIAtomicScope::WAVEFRONT:
1070 case SIAtomicScope::SINGLETHREAD:
1080bool SIGfx6CacheControl::enableStoreCacheBypass(
1082 SIAtomicScope Scope,
1083 SIAtomicAddrSpace AddrSpace)
const {
1091 if (
ST.hasGFX940Insts() && canAffectGlobalAddrSpace(AddrSpace)) {
1093 case SIAtomicScope::SYSTEM:
1097 case SIAtomicScope::AGENT:
1101 case SIAtomicScope::WORKGROUP:
1105 case SIAtomicScope::WAVEFRONT:
1106 case SIAtomicScope::SINGLETHREAD:
1124bool SIGfx6CacheControl::enableRMWCacheBypass(
1126 SIAtomicScope Scope,
1127 SIAtomicAddrSpace AddrSpace)
const {
1137 if (
ST.hasGFX940Insts() && canAffectGlobalAddrSpace(AddrSpace)) {
1139 case SIAtomicScope::SYSTEM:
1143 case SIAtomicScope::AGENT:
1144 case SIAtomicScope::WORKGROUP:
1145 case SIAtomicScope::WAVEFRONT:
1146 case SIAtomicScope::SINGLETHREAD:
1160bool SIGfx6CacheControl::enableVolatileAndOrNonTemporal(
1162 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
1172 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
1177 if (
ST.hasGFX940Insts()) {
1180 }
else if (
Op == SIMemOp::LOAD) {
1192 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1193 Position::AFTER, AtomicOrdering::Unordered,
1199 if (IsNonTemporal) {
1200 if (
ST.hasGFX940Insts()) {
1214 SIAtomicScope Scope,
1215 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
1216 bool IsCrossAddrSpaceOrdering, Position Pos,
1218 bool AtomicsOnly)
const {
1221 MachineBasicBlock &
MBB = *
MI->getParent();
1224 if (Pos == Position::AFTER)
1228 if (
ST.hasGFX90AInsts() &&
ST.isTgSplitEnabled()) {
1236 if (((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH |
1237 SIAtomicAddrSpace::GDS)) != SIAtomicAddrSpace::NONE) &&
1238 (Scope == SIAtomicScope::WORKGROUP)) {
1240 Scope = SIAtomicScope::AGENT;
1244 AddrSpace &= ~SIAtomicAddrSpace
::LDS;
1248 bool LGKMCnt =
false;
1250 if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
1251 SIAtomicAddrSpace::NONE) {
1253 case SIAtomicScope::SYSTEM:
1254 case SIAtomicScope::AGENT:
1257 case SIAtomicScope::WORKGROUP:
1258 case SIAtomicScope::WAVEFRONT:
1259 case SIAtomicScope::SINGLETHREAD:
1268 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1270 case SIAtomicScope::SYSTEM:
1271 case SIAtomicScope::AGENT:
1272 case SIAtomicScope::WORKGROUP:
1279 LGKMCnt |= IsCrossAddrSpaceOrdering;
1281 case SIAtomicScope::WAVEFRONT:
1282 case SIAtomicScope::SINGLETHREAD:
1291 if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
1293 case SIAtomicScope::SYSTEM:
1294 case SIAtomicScope::AGENT:
1301 LGKMCnt |= IsCrossAddrSpaceOrdering;
1303 case SIAtomicScope::WORKGROUP:
1304 case SIAtomicScope::WAVEFRONT:
1305 case SIAtomicScope::SINGLETHREAD:
1314 if (VMCnt || LGKMCnt) {
1315 unsigned WaitCntImmediate =
1321 .
addImm(WaitCntImmediate);
1329 Scope == SIAtomicScope::WORKGROUP &&
1330 (AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1335 if (Pos == Position::AFTER)
1344 return !ST.isAmdPalOS() && !ST.isMesa3DOS();
1348 SIAtomicScope Scope,
1349 SIAtomicAddrSpace AddrSpace,
1350 Position Pos)
const {
1351 if (!InsertCacheInv)
1356 MachineBasicBlock &
MBB = *
MI->getParent();
1359 if (Pos == Position::AFTER)
1363 ? AMDGPU::BUFFER_WBINVL1_VOL
1364 : AMDGPU::BUFFER_WBINVL1;
1366 if (canAffectGlobalAddrSpace(AddrSpace)) {
1368 case SIAtomicScope::SYSTEM:
1369 if (
ST.hasGFX940Insts()) {
1385 if (
ST.hasGFX90AInsts()) {
1400 case SIAtomicScope::AGENT:
1401 if (
ST.hasGFX940Insts()) {
1416 case SIAtomicScope::WORKGROUP:
1417 if (
ST.isTgSplitEnabled()) {
1418 if (
ST.hasGFX940Insts()) {
1437 }
else if (
ST.hasGFX90AInsts()) {
1443 case SIAtomicScope::WAVEFRONT:
1444 case SIAtomicScope::SINGLETHREAD:
1461 if (Pos == Position::AFTER)
1468 SIAtomicScope Scope,
1469 SIAtomicAddrSpace AddrSpace,
1470 bool IsCrossAddrSpaceOrdering,
1471 Position Pos)
const {
1474 if (
ST.hasGFX90AInsts()) {
1475 MachineBasicBlock &
MBB = *
MI->getParent();
1478 if (Pos == Position::AFTER)
1481 if (canAffectGlobalAddrSpace(AddrSpace)) {
1483 case SIAtomicScope::SYSTEM:
1495 case SIAtomicScope::AGENT:
1496 if (
ST.hasGFX940Insts()) {
1507 case SIAtomicScope::WORKGROUP:
1508 case SIAtomicScope::WAVEFRONT:
1509 case SIAtomicScope::SINGLETHREAD:
1519 if (Pos == Position::AFTER)
1525 Changed |= insertWait(
MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
1526 IsCrossAddrSpaceOrdering, Pos, AtomicOrdering::Release,
1532bool SIGfx10CacheControl::enableLoadCacheBypass(
1534 SIAtomicAddrSpace AddrSpace)
const {
1538 if (canAffectGlobalAddrSpace(AddrSpace)) {
1540 case SIAtomicScope::SYSTEM:
1541 case SIAtomicScope::AGENT:
1548 case SIAtomicScope::WORKGROUP:
1553 if (!
ST.isCuModeEnabled())
1556 case SIAtomicScope::WAVEFRONT:
1557 case SIAtomicScope::SINGLETHREAD:
1575bool SIGfx10CacheControl::enableVolatileAndOrNonTemporal(
1577 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
1588 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
1596 if (
Op == SIMemOp::LOAD) {
1609 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1610 Position::AFTER, AtomicOrdering::Unordered,
1615 if (IsNonTemporal) {
1620 if (
Op == SIMemOp::STORE)
1635 SIAtomicScope Scope,
1636 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
1637 bool IsCrossAddrSpaceOrdering,
1639 bool AtomicsOnly)
const {
1642 MachineBasicBlock &
MBB = *
MI->getParent();
1645 if (Pos == Position::AFTER)
1650 bool LGKMCnt =
false;
1652 if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
1653 SIAtomicAddrSpace::NONE) {
1655 case SIAtomicScope::SYSTEM:
1656 case SIAtomicScope::AGENT:
1657 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
1659 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
1662 case SIAtomicScope::WORKGROUP:
1672 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
1674 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
1678 case SIAtomicScope::WAVEFRONT:
1679 case SIAtomicScope::SINGLETHREAD:
1688 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1690 case SIAtomicScope::SYSTEM:
1691 case SIAtomicScope::AGENT:
1692 case SIAtomicScope::WORKGROUP:
1699 LGKMCnt |= IsCrossAddrSpaceOrdering;
1701 case SIAtomicScope::WAVEFRONT:
1702 case SIAtomicScope::SINGLETHREAD:
1711 if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
1713 case SIAtomicScope::SYSTEM:
1714 case SIAtomicScope::AGENT:
1721 LGKMCnt |= IsCrossAddrSpaceOrdering;
1723 case SIAtomicScope::WORKGROUP:
1724 case SIAtomicScope::WAVEFRONT:
1725 case SIAtomicScope::SINGLETHREAD:
1734 if (VMCnt || LGKMCnt) {
1735 unsigned WaitCntImmediate =
1741 .
addImm(WaitCntImmediate);
1749 Scope == SIAtomicScope::WORKGROUP &&
1750 (AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1757 .
addReg(AMDGPU::SGPR_NULL, RegState::Undef)
1762 if (Pos == Position::AFTER)
1769 SIAtomicScope Scope,
1770 SIAtomicAddrSpace AddrSpace,
1771 Position Pos)
const {
1772 if (!InsertCacheInv)
1777 MachineBasicBlock &
MBB = *
MI->getParent();
1780 if (Pos == Position::AFTER)
1783 if (canAffectGlobalAddrSpace(AddrSpace)) {
1785 case SIAtomicScope::SYSTEM:
1786 case SIAtomicScope::AGENT:
1794 case SIAtomicScope::WORKGROUP:
1799 if (!
ST.isCuModeEnabled()) {
1804 case SIAtomicScope::WAVEFRONT:
1805 case SIAtomicScope::SINGLETHREAD:
1820 if (Pos == Position::AFTER)
1828 MachineOperand *
CPol =
TII->getNamedOperand(*
MI, OpName::cpol);
1843 MachineOperand *
CPol =
TII->getNamedOperand(*
MI, OpName::cpol);
1856bool SIGfx12CacheControl::insertWaitsBeforeSystemScopeStore(
1860 MachineBasicBlock &
MBB = *
MI->getParent();
1864 if (
ST.hasImageInsts()) {
1875 SIAtomicScope Scope,
1876 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
1877 bool IsCrossAddrSpaceOrdering,
1879 bool AtomicsOnly)
const {
1882 MachineBasicBlock &
MBB = *
MI->getParent();
1885 bool LOADCnt =
false;
1887 bool STORECnt =
false;
1889 if (Pos == Position::AFTER)
1892 if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
1893 SIAtomicAddrSpace::NONE) {
1895 case SIAtomicScope::SYSTEM:
1896 case SIAtomicScope::AGENT:
1897 case SIAtomicScope::CLUSTER:
1898 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
1900 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
1903 case SIAtomicScope::WORKGROUP:
1920 if (!
ST.isCuModeEnabled() ||
ST.hasGFX1250Insts() ||
1922 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
1924 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
1928 case SIAtomicScope::WAVEFRONT:
1929 case SIAtomicScope::SINGLETHREAD:
1938 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1940 case SIAtomicScope::SYSTEM:
1941 case SIAtomicScope::AGENT:
1942 case SIAtomicScope::CLUSTER:
1943 case SIAtomicScope::WORKGROUP:
1950 DSCnt |= IsCrossAddrSpaceOrdering;
1952 case SIAtomicScope::WAVEFRONT:
1953 case SIAtomicScope::SINGLETHREAD:
1974 if (!AtomicsOnly &&
ST.hasImageInsts()) {
1992 if (Pos == Position::AFTER)
1999 SIAtomicScope Scope,
2000 SIAtomicAddrSpace AddrSpace,
2001 Position Pos)
const {
2002 if (!InsertCacheInv)
2005 MachineBasicBlock &
MBB = *
MI->getParent();
2014 if (!canAffectGlobalAddrSpace(AddrSpace))
2019 case SIAtomicScope::SYSTEM:
2022 case SIAtomicScope::AGENT:
2025 case SIAtomicScope::CLUSTER:
2028 case SIAtomicScope::WORKGROUP:
2036 if (
ST.isCuModeEnabled())
2041 case SIAtomicScope::WAVEFRONT:
2042 case SIAtomicScope::SINGLETHREAD:
2049 if (Pos == Position::AFTER)
2054 if (Pos == Position::AFTER)
2059 if (
ST.hasINVWBL2WaitCntRequirement() && Scope > SIAtomicScope::CLUSTER) {
2060 insertWait(
MI, Scope, AddrSpace, SIMemOp::LOAD,
2061 false, Pos, AtomicOrdering::Acquire,
2064 if (Pos == Position::AFTER)
2072 SIAtomicScope Scope,
2073 SIAtomicAddrSpace AddrSpace,
2074 bool IsCrossAddrSpaceOrdering,
2075 Position Pos)
const {
2078 MachineBasicBlock &
MBB = *
MI->getParent();
2085 if (canAffectGlobalAddrSpace(AddrSpace)) {
2086 if (Pos == Position::AFTER)
2095 std::optional<AMDGPU::CPol::CPol> NeedsWB;
2097 case SIAtomicScope::SYSTEM:
2100 case SIAtomicScope::AGENT:
2102 if (
ST.hasGFX1250Insts())
2105 case SIAtomicScope::CLUSTER:
2106 case SIAtomicScope::WORKGROUP:
2108 case SIAtomicScope::WAVEFRONT:
2109 case SIAtomicScope::SINGLETHREAD:
2120 if (
ST.hasINVWBL2WaitCntRequirement())
2121 insertWait(
MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
2123 AtomicOrdering::Release,
2130 if (Pos == Position::AFTER)
2137 Changed |= insertWait(
MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
2138 IsCrossAddrSpaceOrdering, Pos, AtomicOrdering::Release,
2144bool SIGfx12CacheControl::handleNonVolatile(MachineInstr &
MI)
const {
2146 if (!
ST.hasGFX1250Insts())
2148 MachineOperand *
CPol =
TII->getNamedOperand(
MI, OpName::cpol);
2155bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal(
2157 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
2166 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
2173 }
else if (IsNonTemporal) {
2181 if (
ST.requiresWaitXCntForSingleAccessInstructions() &&
2183 MachineBasicBlock &
MBB = *
MI->getParent();
2193 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
2194 Position::AFTER, AtomicOrdering::Unordered,
2201bool SIGfx12CacheControl::finalizeStore(MachineInstr &
MI,
bool Atomic)
const {
2202 assert(
MI.mayStore() &&
"Not a Store inst");
2203 const bool IsRMW = (
MI.mayLoad() &&
MI.mayStore());
2206 if (Atomic &&
ST.requiresWaitXCntForSingleAccessInstructions() &&
2208 MachineBasicBlock &
MBB = *
MI.getParent();
2217 MachineOperand *
CPol =
TII->getNamedOperand(
MI, OpName::cpol);
2223 if (
ST.requiresWaitsBeforeSystemScopeStores() && !Atomic &&
2225 Changed |= insertWaitsBeforeSystemScopeStore(
MI.getIterator());
2230bool SIGfx12CacheControl::handleCooperativeAtomic(MachineInstr &
MI)
const {
2231 if (!
ST.hasGFX1250Insts())
2235 MachineOperand *
CPol =
TII->getNamedOperand(
MI, OpName::cpol);
2236 assert(CPol &&
"No CPol operand?");
2244 SIAtomicScope Scope,
2245 SIAtomicAddrSpace AddrSpace)
const {
2248 if (canAffectGlobalAddrSpace(AddrSpace)) {
2250 case SIAtomicScope::SYSTEM:
2253 case SIAtomicScope::AGENT:
2256 case SIAtomicScope::CLUSTER:
2259 case SIAtomicScope::WORKGROUP:
2262 if (!
ST.isCuModeEnabled())
2265 case SIAtomicScope::WAVEFRONT:
2266 case SIAtomicScope::SINGLETHREAD:
2284bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
2285 if (AtomicPseudoMIs.empty())
2288 for (
auto &
MI : AtomicPseudoMIs)
2289 MI->eraseFromParent();
2291 AtomicPseudoMIs.clear();
2295bool SIMemoryLegalizer::expandLoad(
const SIMemOpInfo &MOI,
2303 if (MOI.isAtomic()) {
2305 <<
", scope=" <<
toString(MOI.getScope())
2306 <<
", ordering-AS=" << MOI.getOrderingAddrSpace()
2307 <<
", instr-AS=" << MOI.getInstrAddrSpace() <<
"\n");
2309 if (Order == AtomicOrdering::Monotonic ||
2310 Order == AtomicOrdering::Acquire ||
2311 Order == AtomicOrdering::SequentiallyConsistent) {
2312 Changed |= CC->enableLoadCacheBypass(
MI, MOI.getScope(),
2313 MOI.getOrderingAddrSpace());
2318 if (MOI.isCooperative())
2319 Changed |= CC->handleCooperativeAtomic(*
MI);
2321 if (Order == AtomicOrdering::SequentiallyConsistent)
2322 Changed |= CC->insertWait(
MI, MOI.getScope(), MOI.getOrderingAddrSpace(),
2323 SIMemOp::LOAD | SIMemOp::STORE,
2324 MOI.getIsCrossAddressSpaceOrdering(),
2325 Position::BEFORE, Order,
false);
2327 if (Order == AtomicOrdering::Acquire ||
2328 Order == AtomicOrdering::SequentiallyConsistent) {
2331 CC->insertWait(
MI, MOI.getScope(), MOI.getInstrAddrSpace(),
2332 SIMemOp::LOAD, MOI.getIsCrossAddressSpaceOrdering(),
2333 Position::AFTER, Order,
true);
2334 Changed |= CC->insertAcquire(
MI, MOI.getScope(),
2335 MOI.getOrderingAddrSpace(),
2345 Changed |= CC->enableVolatileAndOrNonTemporal(
2346 MI, MOI.getInstrAddrSpace(), SIMemOp::LOAD, MOI.isVolatile(),
2347 MOI.isNonTemporal(), MOI.isLastUse());
2352bool SIMemoryLegalizer::expandStore(
const SIMemOpInfo &MOI,
2360 MachineInstr &StoreMI = *
MI;
2362 if (MOI.isAtomic()) {
2364 <<
", scope=" <<
toString(MOI.getScope())
2365 <<
", ordering-AS=" << MOI.getOrderingAddrSpace()
2366 <<
", instr-AS=" << MOI.getInstrAddrSpace() <<
"\n");
2367 if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
2368 MOI.getOrdering() == AtomicOrdering::Release ||
2369 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
2370 Changed |= CC->enableStoreCacheBypass(
MI, MOI.getScope(),
2371 MOI.getOrderingAddrSpace());
2376 if (MOI.isCooperative())
2377 Changed |= CC->handleCooperativeAtomic(*
MI);
2379 if (MOI.getOrdering() == AtomicOrdering::Release ||
2380 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
2381 Changed |= CC->insertRelease(
MI, MOI.getScope(),
2382 MOI.getOrderingAddrSpace(),
2383 MOI.getIsCrossAddressSpaceOrdering(),
2386 Changed |= CC->finalizeStore(StoreMI,
true);
2393 Changed |= CC->enableVolatileAndOrNonTemporal(
2394 MI, MOI.getInstrAddrSpace(), SIMemOp::STORE, MOI.isVolatile(),
2395 MOI.isNonTemporal());
2399 Changed |= CC->finalizeStore(StoreMI,
false);
2403bool SIMemoryLegalizer::expandAtomicFence(
const SIMemOpInfo &MOI,
2405 assert(
MI->getOpcode() == AMDGPU::ATOMIC_FENCE);
2409 AtomicPseudoMIs.push_back(
MI);
2412 const SIAtomicAddrSpace OrderingAddrSpace = MOI.getOrderingAddrSpace();
2414 if (MOI.isAtomic()) {
2416 <<
", scope=" <<
toString(MOI.getScope())
2417 <<
", ordering-AS=" << OrderingAddrSpace <<
"\n");
2419 if (Order == AtomicOrdering::Acquire) {
2421 Changed |= CC->insertWait(
MI, MOI.getScope(), OrderingAddrSpace,
2422 SIMemOp::LOAD | SIMemOp::STORE,
2423 MOI.getIsCrossAddressSpaceOrdering(),
2424 Position::BEFORE, Order,
true);
2427 if (Order == AtomicOrdering::Release ||
2428 Order == AtomicOrdering::AcquireRelease ||
2429 Order == AtomicOrdering::SequentiallyConsistent)
2437 Changed |= CC->insertRelease(
MI, MOI.getScope(), OrderingAddrSpace,
2438 MOI.getIsCrossAddressSpaceOrdering(),
2446 if (Order == AtomicOrdering::Acquire ||
2447 Order == AtomicOrdering::AcquireRelease ||
2448 Order == AtomicOrdering::SequentiallyConsistent)
2449 Changed |= CC->insertAcquire(
MI, MOI.getScope(), OrderingAddrSpace,
2458bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(
const SIMemOpInfo &MOI,
2465 MachineInstr &RMWMI = *
MI;
2467 if (MOI.isAtomic()) {
2469 <<
", failure-ordering="
2471 <<
", scope=" <<
toString(MOI.getScope())
2472 <<
", ordering-AS=" << MOI.getOrderingAddrSpace()
2473 <<
", instr-AS=" << MOI.getInstrAddrSpace() <<
"\n");
2475 if (Order == AtomicOrdering::Monotonic ||
2476 Order == AtomicOrdering::Acquire || Order == AtomicOrdering::Release ||
2477 Order == AtomicOrdering::AcquireRelease ||
2478 Order == AtomicOrdering::SequentiallyConsistent) {
2479 Changed |= CC->enableRMWCacheBypass(
MI, MOI.getScope(),
2480 MOI.getInstrAddrSpace());
2483 if (Order == AtomicOrdering::Release ||
2484 Order == AtomicOrdering::AcquireRelease ||
2485 Order == AtomicOrdering::SequentiallyConsistent ||
2486 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent)
2487 Changed |= CC->insertRelease(
MI, MOI.getScope(),
2488 MOI.getOrderingAddrSpace(),
2489 MOI.getIsCrossAddressSpaceOrdering(),
2492 if (Order == AtomicOrdering::Acquire ||
2493 Order == AtomicOrdering::AcquireRelease ||
2494 Order == AtomicOrdering::SequentiallyConsistent ||
2495 MOI.getFailureOrdering() == AtomicOrdering::Acquire ||
2496 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) {
2499 CC->insertWait(
MI, MOI.getScope(), MOI.getInstrAddrSpace(),
2500 isAtomicRet(*
MI) ? SIMemOp::LOAD : SIMemOp::STORE,
2501 MOI.getIsCrossAddressSpaceOrdering(), Position::AFTER,
2503 Changed |= CC->insertAcquire(
MI, MOI.getScope(),
2504 MOI.getOrderingAddrSpace(),
2508 Changed |= CC->finalizeStore(RMWMI,
true);
2515bool SIMemoryLegalizer::expandLDSDMA(
const SIMemOpInfo &MOI,
2529 return CC->enableVolatileAndOrNonTemporal(
2530 MI, MOI.getInstrAddrSpace(), OpKind, MOI.isVolatile(),
2531 MOI.isNonTemporal(), MOI.isLastUse());
2534bool SIMemoryLegalizerLegacy::runOnMachineFunction(MachineFunction &MF) {
2535 const MachineModuleInfo &MMI =
2536 getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
2537 return SIMemoryLegalizer(MMI).run(MF);
2544 .getCachedResult<MachineModuleAnalysis>(
2546 assert(MMI &&
"MachineModuleAnalysis must be available");
2547 if (!SIMemoryLegalizer(MMI->getMMI()).run(MF))
2558 CC = SICacheControl::create(ST);
2560 for (
auto &
MBB : MF) {
2564 if (
MI->isBundle() &&
MI->mayLoadOrStore()) {
2567 I != E &&
I->isBundledWithPred(); ++
I) {
2568 I->unbundleFromPred();
2571 MO.setIsInternalRead(
false);
2574 MI =
MI->eraseFromParent();
2578 if (
const auto &MOI = MOA.getLoadInfo(
MI))
2580 else if (
const auto &MOI = MOA.getStoreInfo(
MI))
2582 else if (
const auto &MOI = MOA.getLDSDMAInfo(
MI))
2584 else if (
const auto &MOI = MOA.getAtomicFenceInfo(
MI))
2586 else if (
const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(
MI))
2587 Changed |= expandAtomicCmpxchgOrRmw(*MOI,
MI);
2595 Changed |= removeAtomicPseudoMIs();
2601char SIMemoryLegalizerLegacy::
ID = 0;
2605 return new SIMemoryLegalizerLegacy();
static std::optional< LoadInfo > getLoadInfo(const MachineInstr &MI)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
Provides AMDGPU specific target descriptions.
AMDGPU Machine Module Info.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
This header defines various interfaces for pass management in LLVM.
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static cl::opt< bool > AmdgcnSkipCacheInvalidations("amdgcn-skip-cache-invalidations", cl::init(false), cl::Hidden, cl::desc("Use this to skip inserting cache invalidating instructions."))
static bool isNonVolatileMemoryAccess(const MachineInstr &MI)
static bool canUseBUFFER_WBINVL1_VOL(const GCNSubtarget &ST)
static const uint32_t IV[8]
SyncScope::ID getWorkgroupSSID() const
SyncScope::ID getWavefrontSSID() const
SyncScope::ID getAgentSSID() const
SyncScope::ID getClusterOneAddressSpaceSSID() const
SyncScope::ID getClusterSSID() const
std::optional< bool > isSyncScopeInclusion(SyncScope::ID A, SyncScope::ID B) const
In AMDGPU target synchronization scopes are inclusive, meaning a larger synchronization scope is incl...
SyncScope::ID getAgentOneAddressSpaceSSID() const
SyncScope::ID getSingleThreadOneAddressSpaceSSID() const
SyncScope::ID getWavefrontOneAddressSpaceSSID() const
SyncScope::ID getSystemOneAddressSpaceSSID() const
SyncScope::ID getWorkgroupOneAddressSpaceSSID() const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Represents analyses that only rely on functions' control flow.
Diagnostic information for unsupported feature in backend.
FunctionPass class - This class is used to implement most global optimizations.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
A helper class to return the specified delimiter string after the first invocation of operator String...
Instructions::iterator instr_iterator
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Representation of each machine instruction.
A description of a memory reference used in the backend.
Ty & getObjFileInfo()
Keep track of various per-module pieces of information for backends that would like to do so.
MachineOperand class - Representation of each machine instruction operand.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
static bool isVMEM(const MachineInstr &MI)
static bool mayWriteLDSThroughDMA(const MachineInstr &MI)
static bool isBUF(const MachineInstr &MI)
static bool isAtomicRet(const MachineInstr &MI)
static bool isAtomic(const MachineInstr &MI)
static bool isLDSDMA(const MachineInstr &MI)
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
StringRef - Represent a constant reference to a string, i.e.
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ BUFFER_STRIDED_POINTER
Address space for 192-bit fat buffer pointers with an additional index.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ BUFFER_FAT_POINTER
Address space for 160-bit buffer fat pointers.
@ PRIVATE_ADDRESS
Address space for private memory.
@ BUFFER_RESOURCE
Address space for 128-bit buffer resources.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
bool isGFX10(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded)
unsigned getVmcntBitMask(const IsaVersion &Version)
unsigned getLgkmcntBitMask(const IsaVersion &Version)
unsigned getExpcntBitMask(const IsaVersion &Version)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
@ System
Synchronized with respect to all concurrently executing threads.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Scope
Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
OuterAnalysisManagerProxy< ModuleAnalysisManager, MachineFunction > ModuleAnalysisManagerMachineFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
char & SIMemoryLegalizerID
@ LLVM_MARK_AS_BITMASK_ENUM
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
bool isReleaseOrStronger(AtomicOrdering AO)
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
const char * toIRString(AtomicOrdering ao)
String used by LLVM IR to represent atomic ordering.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
AtomicOrdering getMergedAtomicOrdering(AtomicOrdering AO, AtomicOrdering Other)
Return a single atomic ordering that is at least as strong as both the AO and Other orderings for an ...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static const MachineMemOperand::Flags MOCooperative
Mark the MMO of cooperative load/store atomics.
AtomicOrdering
Atomic ordering for LLVM's memory model.
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
FunctionPass * createSIMemoryLegalizerPass()
bool isStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
Returns true if ao is stronger than other as defined by the AtomicOrdering lattice,...