36#define DEBUG_TYPE "si-memory-legalizer"
37#define PASS_NAME "SI Memory Legalizer"
41 cl::desc(
"Use this to skip inserting cache invalidating instructions."));
63enum class SIAtomicScope {
75enum class SIAtomicAddrSpace {
87 ATOMIC = GLOBAL |
LDS | SCRATCH | GDS,
90 ALL = GLOBAL |
LDS | SCRATCH | GDS | OTHER,
98 case SIAtomicScope::NONE:
100 case SIAtomicScope::SINGLETHREAD:
101 return "singlethread";
102 case SIAtomicScope::WAVEFRONT:
104 case SIAtomicScope::WORKGROUP:
106 case SIAtomicScope::CLUSTER:
108 case SIAtomicScope::AGENT:
110 case SIAtomicScope::SYSTEM:
117 if (AS == SIAtomicAddrSpace::NONE) {
122 if ((AS & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE)
123 OS <<
LS <<
"global";
124 if ((AS & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE)
126 if ((AS & SIAtomicAddrSpace::SCRATCH) != SIAtomicAddrSpace::NONE)
127 OS <<
LS <<
"scratch";
128 if ((AS & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE)
130 if ((AS & SIAtomicAddrSpace::OTHER) != SIAtomicAddrSpace::NONE)
136class SIMemOpInfo final {
139 friend class SIMemOpAccess;
143 SIAtomicScope Scope = SIAtomicScope::SYSTEM;
144 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
145 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
146 bool IsCrossAddressSpaceOrdering =
false;
147 bool IsVolatile =
false;
148 bool IsNonTemporal =
false;
149 bool IsLastUse =
false;
150 bool IsCooperative =
false;
151 bool IsAVNone =
false;
155 const GCNSubtarget &ST,
157 SIAtomicScope Scope = SIAtomicScope::SYSTEM,
158 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::ATOMIC,
159 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::ALL,
160 bool IsCrossAddressSpaceOrdering =
true,
161 AtomicOrdering FailureOrdering = AtomicOrdering::SequentiallyConsistent,
162 bool IsVolatile =
false,
bool IsNonTemporal =
false,
163 bool IsLastUse =
false,
bool IsCooperative =
false,
164 bool CanDemoteWorkgroupToWavefront =
false,
bool IsAVNone =
false)
165 : Ordering(Ordering), FailureOrdering(FailureOrdering), Scope(Scope),
166 OrderingAddrSpace(OrderingAddrSpace), InstrAddrSpace(InstrAddrSpace),
167 IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
168 IsVolatile(IsVolatile), IsNonTemporal(IsNonTemporal),
169 IsLastUse(IsLastUse), IsCooperative(IsCooperative), IsAVNone(IsAVNone) {
171 if (Ordering == AtomicOrdering::NotAtomic) {
172 assert(!IsCooperative &&
"Cannot be cooperative & non-atomic!");
173 assert(Scope == SIAtomicScope::NONE &&
174 OrderingAddrSpace == SIAtomicAddrSpace::NONE &&
175 !IsCrossAddressSpaceOrdering &&
176 FailureOrdering == AtomicOrdering::NotAtomic);
180 assert(Scope != SIAtomicScope::NONE &&
181 (OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
182 SIAtomicAddrSpace::NONE &&
183 (InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
184 SIAtomicAddrSpace::NONE);
189 if ((OrderingAddrSpace == InstrAddrSpace) &&
191 this->IsCrossAddressSpaceOrdering =
false;
195 if ((InstrAddrSpace & ~SIAtomicAddrSpace::SCRATCH) ==
196 SIAtomicAddrSpace::NONE) {
197 this->Scope = std::min(Scope, SIAtomicScope::SINGLETHREAD);
198 }
else if ((InstrAddrSpace &
199 ~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS)) ==
200 SIAtomicAddrSpace::NONE) {
201 this->Scope = std::min(Scope, SIAtomicScope::WORKGROUP);
202 }
else if ((InstrAddrSpace &
203 ~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS |
204 SIAtomicAddrSpace::GDS)) == SIAtomicAddrSpace::NONE) {
205 this->Scope = std::min(Scope, SIAtomicScope::AGENT);
210 if (this->Scope == SIAtomicScope::CLUSTER && !
ST.hasClusters())
211 this->Scope = SIAtomicScope::AGENT;
217 if (CanDemoteWorkgroupToWavefront &&
218 this->Scope == SIAtomicScope::WORKGROUP &&
221 AtomicOrdering::Monotonic)))
222 this->Scope = SIAtomicScope::WAVEFRONT;
228 SIAtomicScope getScope()
const {
241 return FailureOrdering;
246 SIAtomicAddrSpace getInstrAddrSpace()
const {
247 return InstrAddrSpace;
252 SIAtomicAddrSpace getOrderingAddrSpace()
const {
253 return OrderingAddrSpace;
258 bool getIsCrossAddressSpaceOrdering()
const {
259 return IsCrossAddressSpaceOrdering;
264 bool isVolatile()
const {
270 bool isNonTemporal()
const {
271 return IsNonTemporal;
276 bool isLastUse()
const {
return IsLastUse; }
279 bool isCooperative()
const {
return IsCooperative; }
282 bool isAVNone()
const {
return IsAVNone; }
286 bool isAtomic()
const {
287 return Ordering != AtomicOrdering::NotAtomic;
292class SIMemOpAccess final {
294 const AMDGPUMachineModuleInfo *MMI =
nullptr;
295 const GCNSubtarget &ST;
296 const bool CanDemoteWorkgroupToWavefront;
300 const char *Msg)
const;
306 std::optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
307 toSIAtomicScope(
SyncScope::ID SSID, SIAtomicAddrSpace InstrAddrSpace)
const;
310 SIAtomicAddrSpace toSIAtomicAddrSpace(
unsigned AS)
const;
314 std::optional<SIMemOpInfo>
320 SIMemOpAccess(
const AMDGPUMachineModuleInfo &MMI,
const GCNSubtarget &ST,
324 std::optional<SIMemOpInfo>
329 std::optional<SIMemOpInfo>
334 std::optional<SIMemOpInfo>
339 std::optional<SIMemOpInfo>
345 std::optional<SIMemOpInfo>
349class SICacheControl {
353 const GCNSubtarget &ST;
356 const SIInstrInfo *TII =
nullptr;
363 SICacheControl(
const GCNSubtarget &ST);
368 unsigned Bits)
const;
372 bool canAffectGlobalAddrSpace(SIAtomicAddrSpace AS)
const;
378 static std::unique_ptr<SICacheControl> create(
const GCNSubtarget &ST);
385 SIAtomicAddrSpace AddrSpace)
const = 0;
392 SIAtomicAddrSpace AddrSpace)
const = 0;
399 SIAtomicAddrSpace AddrSpace)
const = 0;
405 SIAtomicAddrSpace AddrSpace,
406 SIMemOp
Op,
bool IsVolatile,
408 bool IsLastUse =
false)
const = 0;
415 virtual bool finalizeStore(MachineInstr &
MI,
bool Atomic)
const {
425 virtual bool handleCooperativeAtomic(MachineInstr &
MI)
const {
427 "cooperative atomics are not available on this architecture");
440 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
441 bool IsCrossAddrSpaceOrdering, Position Pos,
451 SIAtomicAddrSpace AddrSpace,
452 Position Pos)
const = 0;
460 SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace,
461 Position Pos)
const = 0;
465 SIAtomicAddrSpace AddrSpace,
bool IsCrossAddrSpaceOrdering,
466 Position Pos,
bool IsAVNone)
const {
467 bool Changed = !IsAVNone && insertWriteback(
MI, Scope, AddrSpace, Pos);
468 Changed |= insertWait(
MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
469 IsCrossAddrSpaceOrdering, Pos,
470 AtomicOrdering::Release,
false);
476 virtual bool handleNonVolatile(MachineInstr &
MI)
const {
return false; }
479 virtual ~SICacheControl() =
default;
484class SIGfx6CacheControl final :
public SICacheControl {
487 SIGfx6CacheControl(
const GCNSubtarget &ST) : SICacheControl(
ST) {}
491 SIAtomicAddrSpace AddrSpace)
const override;
495 SIAtomicAddrSpace AddrSpace)
const override;
499 SIAtomicAddrSpace AddrSpace)
const override;
502 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
503 bool IsVolatile,
bool IsNonTemporal,
504 bool IsLastUse)
const override;
507 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
508 bool IsCrossAddrSpaceOrdering, Position Pos,
513 SIAtomicAddrSpace AddrSpace,
514 Position Pos)
const override;
517 SIAtomicAddrSpace AddrSpace,
518 Position Pos)
const override;
522class SIGfx10CacheControl final :
public SICacheControl {
524 SIGfx10CacheControl(
const GCNSubtarget &ST) : SICacheControl(
ST) {}
528 SIAtomicAddrSpace AddrSpace)
const override;
532 SIAtomicAddrSpace AddrSpace)
const override {
538 SIAtomicAddrSpace AddrSpace)
const override {
543 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
544 bool IsVolatile,
bool IsNonTemporal,
545 bool IsLastUse)
const override;
548 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
549 bool IsCrossAddrSpaceOrdering, Position Pos,
553 SIAtomicAddrSpace AddrSpace, Position Pos)
const override;
556 SIAtomicAddrSpace AddrSpace,
557 Position Pos)
const override {
562class SIGfx12CacheControl final :
public SICacheControl {
584 SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace)
const;
587 SIGfx12CacheControl(
const GCNSubtarget &ST) : SICacheControl(
ST) {
590 assert(!
ST.hasGFX1250Insts() ||
ST.hasGFX13Insts() ||
ST.isCuModeEnabled());
594 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
595 bool IsCrossAddrSpaceOrdering, Position Pos,
599 SIAtomicAddrSpace AddrSpace, Position Pos)
const override;
602 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
603 bool IsVolatile,
bool IsNonTemporal,
604 bool IsLastUse)
const override;
606 bool finalizeStore(MachineInstr &
MI,
bool Atomic)
const override;
610 bool handleCooperativeAtomic(MachineInstr &
MI)
const override;
613 SIAtomicAddrSpace AddrSpace,
614 Position Pos)
const override;
618 SIAtomicAddrSpace AddrSpace)
const override {
619 return setAtomicScope(
MI, Scope, AddrSpace);
624 SIAtomicAddrSpace AddrSpace)
const override {
625 return setAtomicScope(
MI, Scope, AddrSpace);
630 SIAtomicAddrSpace AddrSpace)
const override {
631 return setAtomicScope(
MI, Scope, AddrSpace);
634 bool handleNonVolatile(MachineInstr &
MI)
const override;
637class SIMemoryLegalizer final {
639 const MachineModuleInfo &MMI;
641 std::unique_ptr<SICacheControl> CC =
nullptr;
644 std::list<MachineBasicBlock::iterator> AtomicPseudoMIs;
648 bool isAtomicRet(
const MachineInstr &
MI)
const {
654 bool removeAtomicPseudoMIs();
658 bool expandLoad(
const SIMemOpInfo &MOI,
662 bool expandStore(
const SIMemOpInfo &MOI,
666 bool expandAtomicFence(
const SIMemOpInfo &MOI,
670 bool expandAtomicCmpxchgOrRmw(
const SIMemOpInfo &MOI,
677 SIMemoryLegalizer(
const MachineModuleInfo &MMI) : MMI(MMI) {};
678 bool run(MachineFunction &MF);
685 SIMemoryLegalizerLegacy() : MachineFunctionPass(ID) {}
687 void getAnalysisUsage(AnalysisUsage &AU)
const override {
692 StringRef getPassName()
const override {
696 bool runOnMachineFunction(MachineFunction &MF)
override;
700 {
"global", SIAtomicAddrSpace::GLOBAL},
701 {
"local", SIAtomicAddrSpace::LDS},
709 OS <<
"unknown address space '" << AS <<
"'; expected one of ";
711 for (
const auto &[Name, Val] : ASNames)
712 OS <<
LS <<
'\'' <<
Name <<
'\'';
720static std::optional<SIAtomicAddrSpace>
722 static constexpr StringLiteral FenceASPrefix =
"amdgpu-synchronize-as";
728 SIAtomicAddrSpace
Result = SIAtomicAddrSpace::NONE;
729 for (
const auto &[Prefix, Suffix] : MMRA) {
730 if (Prefix != FenceASPrefix)
733 if (
auto It = ASNames.find(Suffix); It != ASNames.end())
736 diagnoseUnknownMMRAASName(
MI, Suffix);
739 if (Result == SIAtomicAddrSpace::NONE)
750 Fn,
Twine(
"unknown amdgcn-av metadata '") + Suffix +
Twine(
'\''),
758 bool TagFound =
false;
759 for (
const auto &[Prefix, Suffix] : MMRA) {
760 if (Prefix !=
"amdgcn-av")
762 if (Suffix ==
"none")
765 diagnoseUnknownAVMetadata(
MI, Suffix);
773 const char *Msg)
const {
775 Func.getContext().diagnose(
776 DiagnosticInfoUnsupported(Func, Msg,
MI->getDebugLoc()));
779std::optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
781 SIAtomicAddrSpace InstrAddrSpace)
const {
783 return std::tuple(SIAtomicScope::SYSTEM, SIAtomicAddrSpace::ATOMIC,
true);
785 return std::tuple(SIAtomicScope::AGENT, SIAtomicAddrSpace::ATOMIC,
true);
787 return std::tuple(SIAtomicScope::CLUSTER, SIAtomicAddrSpace::ATOMIC,
true);
789 return std::tuple(SIAtomicScope::WORKGROUP, SIAtomicAddrSpace::ATOMIC,
792 return std::tuple(SIAtomicScope::WAVEFRONT, SIAtomicAddrSpace::ATOMIC,
795 return std::tuple(SIAtomicScope::SINGLETHREAD, SIAtomicAddrSpace::ATOMIC,
798 return std::tuple(SIAtomicScope::SYSTEM,
799 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
801 return std::tuple(SIAtomicScope::AGENT,
802 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
804 return std::tuple(SIAtomicScope::CLUSTER,
805 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
807 return std::tuple(SIAtomicScope::WORKGROUP,
808 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
810 return std::tuple(SIAtomicScope::WAVEFRONT,
811 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
813 return std::tuple(SIAtomicScope::SINGLETHREAD,
814 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
818SIAtomicAddrSpace SIMemOpAccess::toSIAtomicAddrSpace(
unsigned AS)
const {
820 return SIAtomicAddrSpace::FLAT;
822 return SIAtomicAddrSpace::GLOBAL;
824 return SIAtomicAddrSpace::LDS;
826 return SIAtomicAddrSpace::SCRATCH;
828 return SIAtomicAddrSpace::GDS;
831 return SIAtomicAddrSpace::GLOBAL;
833 return SIAtomicAddrSpace::OTHER;
839SIMemOpAccess::SIMemOpAccess(
const AMDGPUMachineModuleInfo &MMI_,
840 const GCNSubtarget &ST,
const Function &
F)
841 : MMI(&MMI_),
ST(
ST),
842 CanDemoteWorkgroupToWavefront(
ST.isSingleWavefrontWorkgroup(
F)) {}
844std::optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
846 assert(
MI->getNumMemOperands() > 0);
851 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
852 bool IsNonTemporal =
true;
854 bool IsLastUse =
false;
855 bool IsCooperative =
false;
859 for (
const auto &MMO :
MI->memoperands()) {
860 IsNonTemporal &= MMO->isNonTemporal();
862 IsLastUse |= MMO->getFlags() &
MOLastUse;
865 toSIAtomicAddrSpace(MMO->getPointerInfo().getAddrSpace());
867 if (OpOrdering != AtomicOrdering::NotAtomic) {
868 const auto &IsSyncScopeInclusion =
870 if (!IsSyncScopeInclusion) {
871 reportUnsupported(
MI,
872 "Unsupported non-inclusive atomic synchronization scope");
876 SSID = *IsSyncScopeInclusion ? SSID : MMO->getSyncScopeID();
878 assert(MMO->getFailureOrdering() != AtomicOrdering::Release &&
879 MMO->getFailureOrdering() != AtomicOrdering::AcquireRelease);
890 Ordering = AtomicOrdering::Monotonic;
892 SIAtomicScope
Scope = SIAtomicScope::NONE;
893 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
894 bool IsCrossAddressSpaceOrdering =
false;
895 if (Ordering != AtomicOrdering::NotAtomic) {
896 auto ScopeOrNone = toSIAtomicScope(SSID, InstrAddrSpace);
898 reportUnsupported(
MI,
"Unsupported atomic synchronization scope");
901 std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
903 if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
904 ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace) ||
905 ((InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) == SIAtomicAddrSpace::NONE)) {
906 reportUnsupported(
MI,
"Unsupported atomic address space");
910 return SIMemOpInfo(ST, Ordering, Scope, OrderingAddrSpace, InstrAddrSpace,
911 IsCrossAddressSpaceOrdering, FailureOrdering, IsVolatile,
912 IsNonTemporal, IsLastUse, IsCooperative,
913 CanDemoteWorkgroupToWavefront, hasAVNoneMMRA(*
MI));
916std::optional<SIMemOpInfo>
920 if (!(
MI->mayLoad() && !
MI->mayStore()))
924 if (
MI->getNumMemOperands() == 0)
925 return SIMemOpInfo(ST);
927 return constructFromMIWithMMO(
MI);
930std::optional<SIMemOpInfo>
934 if (!(!
MI->mayLoad() &&
MI->mayStore()))
938 if (
MI->getNumMemOperands() == 0)
939 return SIMemOpInfo(ST);
941 return constructFromMIWithMMO(
MI);
944std::optional<SIMemOpInfo>
948 if (
MI->getOpcode() != AMDGPU::ATOMIC_FENCE)
955 auto ScopeOrNone = toSIAtomicScope(SSID, SIAtomicAddrSpace::ATOMIC);
957 reportUnsupported(
MI,
"Unsupported atomic synchronization scope");
961 SIAtomicScope
Scope = SIAtomicScope::NONE;
962 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
963 bool IsCrossAddressSpaceOrdering =
false;
964 std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
967 if (OrderingAddrSpace != SIAtomicAddrSpace::ATOMIC) {
972 reportUnsupported(
MI,
"Unsupported atomic address space");
976 auto SynchronizeAS = getSynchronizeAddrSpaceMD(*
MI);
978 OrderingAddrSpace = *SynchronizeAS;
980 return SIMemOpInfo(ST, Ordering, Scope, OrderingAddrSpace,
981 SIAtomicAddrSpace::ATOMIC, IsCrossAddressSpaceOrdering,
982 AtomicOrdering::NotAtomic,
false,
false,
false,
false,
983 CanDemoteWorkgroupToWavefront, hasAVNoneMMRA(*
MI));
986std::optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
990 if (!(
MI->mayLoad() &&
MI->mayStore()))
994 if (
MI->getNumMemOperands() == 0)
995 return SIMemOpInfo(ST);
997 return constructFromMIWithMMO(
MI);
1000std::optional<SIMemOpInfo>
1005 return std::nullopt;
1007 return constructFromMIWithMMO(
MI);
1015 if (
MI.getNumMemOperands() == 0)
1018 return MMO->getFlags() & (MOThreadPrivate | MachineMemOperand::MOInvariant);
1022SICacheControl::SICacheControl(
const GCNSubtarget &ST) :
ST(
ST) {
1023 TII =
ST.getInstrInfo();
1029 unsigned Bits)
const {
1030 MachineOperand *CPol =
TII->getNamedOperand(*
MI, AMDGPU::OpName::cpol);
1034 CPol->setImm(
CPol->getImm() | Bits);
1038bool SICacheControl::canAffectGlobalAddrSpace(SIAtomicAddrSpace AS)
const {
1039 assert((!
ST.hasGloballyAddressableScratch() ||
1040 (AS & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ||
1041 (AS & SIAtomicAddrSpace::SCRATCH) == SIAtomicAddrSpace::NONE) &&
1042 "scratch instructions should already be replaced by flat "
1043 "instructions if GloballyAddressableScratch is enabled");
1044 return (AS & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE;
1048std::unique_ptr<SICacheControl> SICacheControl::create(
const GCNSubtarget &ST) {
1049 GCNSubtarget::Generation Generation =
ST.getGeneration();
1050 if (Generation < AMDGPUSubtarget::GFX10)
1051 return std::make_unique<SIGfx6CacheControl>(ST);
1052 if (Generation < AMDGPUSubtarget::GFX12)
1053 return std::make_unique<SIGfx10CacheControl>(ST);
1054 return std::make_unique<SIGfx12CacheControl>(ST);
1057bool SIGfx6CacheControl::enableLoadCacheBypass(
1059 SIAtomicScope Scope,
1060 SIAtomicAddrSpace AddrSpace)
const {
1063 if (!canAffectGlobalAddrSpace(AddrSpace)) {
1075 case SIAtomicScope::SYSTEM:
1076 if (
ST.hasGFX940Insts()) {
1082 case SIAtomicScope::AGENT:
1083 if (
ST.hasGFX940Insts()) {
1092 case SIAtomicScope::WORKGROUP:
1093 if (
ST.hasGFX940Insts()) {
1100 }
else if (
ST.hasGFX90AInsts()) {
1105 if (
ST.isTgSplitEnabled())
1109 case SIAtomicScope::WAVEFRONT:
1110 case SIAtomicScope::SINGLETHREAD:
1120bool SIGfx6CacheControl::enableStoreCacheBypass(
1122 SIAtomicScope Scope,
1123 SIAtomicAddrSpace AddrSpace)
const {
1131 if (
ST.hasGFX940Insts() && canAffectGlobalAddrSpace(AddrSpace)) {
1133 case SIAtomicScope::SYSTEM:
1137 case SIAtomicScope::AGENT:
1141 case SIAtomicScope::WORKGROUP:
1145 case SIAtomicScope::WAVEFRONT:
1146 case SIAtomicScope::SINGLETHREAD:
1164bool SIGfx6CacheControl::enableRMWCacheBypass(
1166 SIAtomicScope Scope,
1167 SIAtomicAddrSpace AddrSpace)
const {
1177 if (
ST.hasGFX940Insts() && canAffectGlobalAddrSpace(AddrSpace)) {
1179 case SIAtomicScope::SYSTEM:
1183 case SIAtomicScope::AGENT:
1184 case SIAtomicScope::WORKGROUP:
1185 case SIAtomicScope::WAVEFRONT:
1186 case SIAtomicScope::SINGLETHREAD:
1200bool SIGfx6CacheControl::enableVolatileAndOrNonTemporal(
1202 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
1212 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
1217 if (
ST.hasGFX940Insts()) {
1220 }
else if (
Op == SIMemOp::LOAD) {
1232 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1233 Position::AFTER, AtomicOrdering::Unordered,
1239 if (IsNonTemporal) {
1240 if (
ST.hasGFX940Insts()) {
1254 SIAtomicScope Scope,
1255 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
1256 bool IsCrossAddrSpaceOrdering, Position Pos,
1258 bool AtomicsOnly)
const {
1261 MachineBasicBlock &
MBB = *
MI->getParent();
1264 if (Pos == Position::AFTER)
1268 if (
ST.hasGFX90AInsts() &&
ST.isTgSplitEnabled()) {
1276 if (((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH |
1277 SIAtomicAddrSpace::GDS)) != SIAtomicAddrSpace::NONE) &&
1278 (Scope == SIAtomicScope::WORKGROUP)) {
1280 Scope = SIAtomicScope::AGENT;
1284 AddrSpace &= ~SIAtomicAddrSpace
::LDS;
1288 bool LGKMCnt =
false;
1290 if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
1291 SIAtomicAddrSpace::NONE) {
1293 case SIAtomicScope::SYSTEM:
1294 case SIAtomicScope::AGENT:
1297 case SIAtomicScope::WORKGROUP:
1298 case SIAtomicScope::WAVEFRONT:
1299 case SIAtomicScope::SINGLETHREAD:
1308 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1310 case SIAtomicScope::SYSTEM:
1311 case SIAtomicScope::AGENT:
1312 case SIAtomicScope::WORKGROUP:
1319 LGKMCnt |= IsCrossAddrSpaceOrdering;
1321 case SIAtomicScope::WAVEFRONT:
1322 case SIAtomicScope::SINGLETHREAD:
1331 if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
1333 case SIAtomicScope::SYSTEM:
1334 case SIAtomicScope::AGENT:
1341 LGKMCnt |= IsCrossAddrSpaceOrdering;
1343 case SIAtomicScope::WORKGROUP:
1344 case SIAtomicScope::WAVEFRONT:
1345 case SIAtomicScope::SINGLETHREAD:
1354 if (VMCnt || LGKMCnt) {
1355 unsigned WaitCntImmediate =
1361 .
addImm(WaitCntImmediate);
1369 Scope == SIAtomicScope::WORKGROUP &&
1370 (AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1375 if (Pos == Position::AFTER)
1384 return !ST.isAmdPalOS() && !ST.isMesa3DOS();
1388 SIAtomicScope Scope,
1389 SIAtomicAddrSpace AddrSpace,
1390 Position Pos)
const {
1391 if (!InsertCacheInv)
1396 MachineBasicBlock &
MBB = *
MI->getParent();
1399 if (Pos == Position::AFTER)
1403 ? AMDGPU::BUFFER_WBINVL1_VOL
1404 : AMDGPU::BUFFER_WBINVL1;
1406 if (canAffectGlobalAddrSpace(AddrSpace)) {
1408 case SIAtomicScope::SYSTEM:
1409 if (
ST.hasGFX940Insts()) {
1425 if (
ST.hasGFX90AInsts()) {
1440 case SIAtomicScope::AGENT:
1441 if (
ST.hasGFX940Insts()) {
1456 case SIAtomicScope::WORKGROUP:
1457 if (
ST.isTgSplitEnabled()) {
1458 if (
ST.hasGFX940Insts()) {
1477 }
else if (
ST.hasGFX90AInsts()) {
1483 case SIAtomicScope::WAVEFRONT:
1484 case SIAtomicScope::SINGLETHREAD:
1501 if (Pos == Position::AFTER)
1508 SIAtomicScope Scope,
1509 SIAtomicAddrSpace AddrSpace,
1510 Position Pos)
const {
1511 if (!
ST.hasGFX90AInsts())
1515 MachineBasicBlock &
MBB = *
MI->getParent();
1518 if (Pos == Position::AFTER)
1521 if (canAffectGlobalAddrSpace(AddrSpace)) {
1523 case SIAtomicScope::SYSTEM:
1535 case SIAtomicScope::AGENT:
1536 if (
ST.hasGFX940Insts()) {
1543 case SIAtomicScope::WORKGROUP:
1544 case SIAtomicScope::WAVEFRONT:
1545 case SIAtomicScope::SINGLETHREAD:
1555 if (Pos == Position::AFTER)
1561bool SIGfx10CacheControl::enableLoadCacheBypass(
1563 SIAtomicAddrSpace AddrSpace)
const {
1567 if (canAffectGlobalAddrSpace(AddrSpace)) {
1569 case SIAtomicScope::SYSTEM:
1570 case SIAtomicScope::AGENT:
1577 case SIAtomicScope::WORKGROUP:
1582 if (!
ST.isCuModeEnabled())
1585 case SIAtomicScope::WAVEFRONT:
1586 case SIAtomicScope::SINGLETHREAD:
1604bool SIGfx10CacheControl::enableVolatileAndOrNonTemporal(
1606 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
1617 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
1625 if (
Op == SIMemOp::LOAD) {
1638 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1639 Position::AFTER, AtomicOrdering::Unordered,
1644 if (IsNonTemporal) {
1649 if (
Op == SIMemOp::STORE)
1664 SIAtomicScope Scope,
1665 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
1666 bool IsCrossAddrSpaceOrdering,
1668 bool AtomicsOnly)
const {
1671 MachineBasicBlock &
MBB = *
MI->getParent();
1674 if (Pos == Position::AFTER)
1679 bool LGKMCnt =
false;
1681 if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
1682 SIAtomicAddrSpace::NONE) {
1684 case SIAtomicScope::SYSTEM:
1685 case SIAtomicScope::AGENT:
1686 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
1688 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
1691 case SIAtomicScope::WORKGROUP:
1701 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
1703 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
1707 case SIAtomicScope::WAVEFRONT:
1708 case SIAtomicScope::SINGLETHREAD:
1717 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1719 case SIAtomicScope::SYSTEM:
1720 case SIAtomicScope::AGENT:
1721 case SIAtomicScope::WORKGROUP:
1728 LGKMCnt |= IsCrossAddrSpaceOrdering;
1730 case SIAtomicScope::WAVEFRONT:
1731 case SIAtomicScope::SINGLETHREAD:
1740 if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
1742 case SIAtomicScope::SYSTEM:
1743 case SIAtomicScope::AGENT:
1750 LGKMCnt |= IsCrossAddrSpaceOrdering;
1752 case SIAtomicScope::WORKGROUP:
1753 case SIAtomicScope::WAVEFRONT:
1754 case SIAtomicScope::SINGLETHREAD:
1763 if (VMCnt || LGKMCnt) {
1764 unsigned WaitCntImmediate =
1770 .
addImm(WaitCntImmediate);
1778 Scope == SIAtomicScope::WORKGROUP &&
1779 (AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1786 .
addReg(AMDGPU::SGPR_NULL, RegState::Undef)
1791 if (Pos == Position::AFTER)
1798 SIAtomicScope Scope,
1799 SIAtomicAddrSpace AddrSpace,
1800 Position Pos)
const {
1801 if (!InsertCacheInv)
1806 MachineBasicBlock &
MBB = *
MI->getParent();
1809 if (Pos == Position::AFTER)
1812 if (canAffectGlobalAddrSpace(AddrSpace)) {
1814 case SIAtomicScope::SYSTEM:
1815 case SIAtomicScope::AGENT:
1823 case SIAtomicScope::WORKGROUP:
1828 if (!
ST.isCuModeEnabled()) {
1833 case SIAtomicScope::WAVEFRONT:
1834 case SIAtomicScope::SINGLETHREAD:
1849 if (Pos == Position::AFTER)
1857 MachineOperand *
CPol =
TII->getNamedOperand(*
MI, OpName::cpol);
1872 MachineOperand *
CPol =
TII->getNamedOperand(*
MI, OpName::cpol);
1885bool SIGfx12CacheControl::insertWaitsBeforeSystemScopeStore(
1889 MachineBasicBlock &
MBB = *
MI->getParent();
1893 if (
ST.hasImageInsts()) {
1904 SIAtomicScope Scope,
1905 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
1906 bool IsCrossAddrSpaceOrdering,
1908 bool AtomicsOnly)
const {
1911 MachineBasicBlock &
MBB = *
MI->getParent();
1914 bool LOADCnt =
false;
1916 bool STORECnt =
false;
1918 if (Pos == Position::AFTER)
1921 if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
1922 SIAtomicAddrSpace::NONE) {
1924 case SIAtomicScope::SYSTEM:
1925 case SIAtomicScope::AGENT:
1926 case SIAtomicScope::CLUSTER:
1927 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
1929 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
1932 case SIAtomicScope::WORKGROUP:
1949 if (!
ST.isCuModeEnabled() ||
ST.hasGFX1250Insts() ||
1951 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
1953 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
1957 case SIAtomicScope::WAVEFRONT:
1958 case SIAtomicScope::SINGLETHREAD:
1967 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1969 case SIAtomicScope::SYSTEM:
1970 case SIAtomicScope::AGENT:
1971 case SIAtomicScope::CLUSTER:
1972 case SIAtomicScope::WORKGROUP:
1979 DSCnt |= IsCrossAddrSpaceOrdering;
1981 case SIAtomicScope::WAVEFRONT:
1982 case SIAtomicScope::SINGLETHREAD:
2003 if (!AtomicsOnly &&
ST.hasImageInsts()) {
2021 if (Pos == Position::AFTER)
2028 SIAtomicScope Scope,
2029 SIAtomicAddrSpace AddrSpace,
2030 Position Pos)
const {
2031 if (!InsertCacheInv)
2034 MachineBasicBlock &
MBB = *
MI->getParent();
2043 if (!canAffectGlobalAddrSpace(AddrSpace))
2048 case SIAtomicScope::SYSTEM:
2051 case SIAtomicScope::AGENT:
2054 case SIAtomicScope::CLUSTER:
2057 case SIAtomicScope::WORKGROUP:
2065 if (
ST.isCuModeEnabled())
2070 case SIAtomicScope::WAVEFRONT:
2071 case SIAtomicScope::SINGLETHREAD:
2078 if (Pos == Position::AFTER)
2083 if (Pos == Position::AFTER)
2088 if (
ST.hasINVWBL2WaitCntRequirement() && Scope > SIAtomicScope::CLUSTER) {
2089 insertWait(
MI, Scope, AddrSpace, SIMemOp::LOAD,
2090 false, Pos, AtomicOrdering::Acquire,
2093 if (Pos == Position::AFTER)
2101 SIAtomicScope Scope,
2102 SIAtomicAddrSpace AddrSpace,
2103 Position Pos)
const {
2108 if (!canAffectGlobalAddrSpace(AddrSpace))
2112 MachineBasicBlock &
MBB = *
MI->getParent();
2115 if (Pos == Position::AFTER)
2124 std::optional<AMDGPU::CPol::CPol> NeedsWB;
2126 case SIAtomicScope::SYSTEM:
2129 case SIAtomicScope::AGENT:
2131 if (
ST.hasGFX1250Insts())
2134 case SIAtomicScope::CLUSTER:
2135 case SIAtomicScope::WORKGROUP:
2136 case SIAtomicScope::WAVEFRONT:
2137 case SIAtomicScope::SINGLETHREAD:
2139 case SIAtomicScope::NONE:
2148 if (
ST.hasINVWBL2WaitCntRequirement()) {
2149 insertWait(
MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
2151 AtomicOrdering::Release,
2159 if (Pos == Position::AFTER)
2165bool SIGfx12CacheControl::handleNonVolatile(MachineInstr &
MI)
const {
2167 if (!
ST.hasGFX1250Insts())
2169 MachineOperand *
CPol =
TII->getNamedOperand(
MI, OpName::cpol);
2176bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal(
2178 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
2187 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
2194 }
else if (IsNonTemporal) {
2202 if (
ST.requiresWaitXCntForSingleAccessInstructions() &&
2204 MachineBasicBlock &
MBB = *
MI->getParent();
2214 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
2215 Position::AFTER, AtomicOrdering::Unordered,
2222bool SIGfx12CacheControl::finalizeStore(MachineInstr &
MI,
bool Atomic)
const {
2223 assert(
MI.mayStore() &&
"Not a Store inst");
2224 const bool IsRMW = (
MI.mayLoad() &&
MI.mayStore());
2227 if (Atomic &&
ST.requiresWaitXCntForSingleAccessInstructions() &&
2229 MachineBasicBlock &
MBB = *
MI.getParent();
2238 MachineOperand *
CPol =
TII->getNamedOperand(
MI, OpName::cpol);
2244 if (
ST.requiresWaitsBeforeSystemScopeStores() && !Atomic &&
2246 Changed |= insertWaitsBeforeSystemScopeStore(
MI.getIterator());
2256 MachineOperand *
CPol =
TII->getNamedOperand(*
MI, AMDGPU::OpName::cpol);
2257 assert(CPol &&
"load_monitor must have a cpol operand");
2263bool SIGfx12CacheControl::handleCooperativeAtomic(MachineInstr &
MI)
const {
2264 if (!
ST.hasGFX1250Insts())
2268 MachineOperand *
CPol =
TII->getNamedOperand(
MI, OpName::cpol);
2269 assert(CPol &&
"No CPol operand?");
2277 SIAtomicScope Scope,
2278 SIAtomicAddrSpace AddrSpace)
const {
2281 if (canAffectGlobalAddrSpace(AddrSpace)) {
2283 case SIAtomicScope::SYSTEM:
2286 case SIAtomicScope::AGENT:
2289 case SIAtomicScope::CLUSTER:
2292 case SIAtomicScope::WORKGROUP:
2295 if (!
ST.isCuModeEnabled())
2298 case SIAtomicScope::WAVEFRONT:
2299 case SIAtomicScope::SINGLETHREAD:
2317bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
2318 if (AtomicPseudoMIs.empty())
2321 for (
auto &
MI : AtomicPseudoMIs)
2322 MI->eraseFromParent();
2324 AtomicPseudoMIs.clear();
2328bool SIMemoryLegalizer::expandLoad(
const SIMemOpInfo &MOI,
2336 if (MOI.isAtomic()) {
2338 <<
", scope=" <<
toString(MOI.getScope())
2339 <<
", ordering-AS=" << MOI.getOrderingAddrSpace()
2340 <<
", instr-AS=" << MOI.getInstrAddrSpace() <<
"\n");
2342 if (Order == AtomicOrdering::Monotonic ||
2343 Order == AtomicOrdering::Acquire ||
2344 Order == AtomicOrdering::SequentiallyConsistent) {
2345 Changed |= CC->enableLoadCacheBypass(
MI, MOI.getScope(),
2346 MOI.getOrderingAddrSpace());
2351 if (MOI.isCooperative())
2352 Changed |= CC->handleCooperativeAtomic(*
MI);
2354 if (Order == AtomicOrdering::SequentiallyConsistent)
2355 Changed |= CC->insertWait(
MI, MOI.getScope(), MOI.getOrderingAddrSpace(),
2356 SIMemOp::LOAD | SIMemOp::STORE,
2357 MOI.getIsCrossAddressSpaceOrdering(),
2358 Position::BEFORE, Order,
false);
2360 if (Order == AtomicOrdering::Acquire ||
2361 Order == AtomicOrdering::SequentiallyConsistent) {
2364 CC->insertWait(
MI, MOI.getScope(), MOI.getInstrAddrSpace(),
2365 SIMemOp::LOAD, MOI.getIsCrossAddressSpaceOrdering(),
2366 Position::AFTER, Order,
true);
2367 if (!MOI.isAVNone()) {
2369 MI, MOI.getScope(), MOI.getOrderingAddrSpace(), Position::AFTER);
2380 Changed |= CC->enableVolatileAndOrNonTemporal(
2381 MI, MOI.getInstrAddrSpace(), SIMemOp::LOAD, MOI.isVolatile(),
2382 MOI.isNonTemporal(), MOI.isLastUse());
2388bool SIMemoryLegalizer::expandStore(
const SIMemOpInfo &MOI,
2396 MachineInstr &StoreMI = *
MI;
2398 if (MOI.isAtomic()) {
2400 <<
", scope=" <<
toString(MOI.getScope())
2401 <<
", ordering-AS=" << MOI.getOrderingAddrSpace()
2402 <<
", instr-AS=" << MOI.getInstrAddrSpace() <<
"\n");
2403 if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
2404 MOI.getOrdering() == AtomicOrdering::Release ||
2405 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
2406 Changed |= CC->enableStoreCacheBypass(
MI, MOI.getScope(),
2407 MOI.getOrderingAddrSpace());
2412 if (MOI.isCooperative())
2413 Changed |= CC->handleCooperativeAtomic(*
MI);
2415 if (MOI.getOrdering() == AtomicOrdering::Release ||
2416 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
2418 CC->insertRelease(
MI, MOI.getScope(), MOI.getOrderingAddrSpace(),
2419 MOI.getIsCrossAddressSpaceOrdering(),
2420 Position::BEFORE, MOI.isAVNone());
2423 Changed |= CC->finalizeStore(StoreMI,
true);
2430 Changed |= CC->enableVolatileAndOrNonTemporal(
2431 MI, MOI.getInstrAddrSpace(), SIMemOp::STORE, MOI.isVolatile(),
2432 MOI.isNonTemporal());
2436 Changed |= CC->finalizeStore(StoreMI,
false);
2440bool SIMemoryLegalizer::expandAtomicFence(
const SIMemOpInfo &MOI,
2442 assert(
MI->getOpcode() == AMDGPU::ATOMIC_FENCE);
2446 AtomicPseudoMIs.push_back(
MI);
2449 const SIAtomicAddrSpace OrderingAddrSpace = MOI.getOrderingAddrSpace();
2451 if (MOI.isAtomic()) {
2453 <<
", scope=" <<
toString(MOI.getScope())
2454 <<
", ordering-AS=" << OrderingAddrSpace <<
"\n");
2456 if (Order == AtomicOrdering::Acquire) {
2458 Changed |= CC->insertWait(
MI, MOI.getScope(), OrderingAddrSpace,
2459 SIMemOp::LOAD | SIMemOp::STORE,
2460 MOI.getIsCrossAddressSpaceOrdering(),
2461 Position::BEFORE, Order,
true);
2464 if (Order == AtomicOrdering::Release ||
2465 Order == AtomicOrdering::AcquireRelease ||
2466 Order == AtomicOrdering::SequentiallyConsistent) {
2474 Changed |= CC->insertRelease(
MI, MOI.getScope(), OrderingAddrSpace,
2475 MOI.getIsCrossAddressSpaceOrdering(),
2476 Position::BEFORE, MOI.isAVNone());
2484 if ((Order == AtomicOrdering::Acquire ||
2485 Order == AtomicOrdering::AcquireRelease ||
2486 Order == AtomicOrdering::SequentiallyConsistent) &&
2488 Changed |= CC->insertAcquire(
MI, MOI.getScope(), OrderingAddrSpace,
2498bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(
const SIMemOpInfo &MOI,
2505 MachineInstr &RMWMI = *
MI;
2507 if (MOI.isAtomic()) {
2509 <<
", failure-ordering="
2511 <<
", scope=" <<
toString(MOI.getScope())
2512 <<
", ordering-AS=" << MOI.getOrderingAddrSpace()
2513 <<
", instr-AS=" << MOI.getInstrAddrSpace() <<
"\n");
2515 if (Order == AtomicOrdering::Monotonic ||
2516 Order == AtomicOrdering::Acquire || Order == AtomicOrdering::Release ||
2517 Order == AtomicOrdering::AcquireRelease ||
2518 Order == AtomicOrdering::SequentiallyConsistent) {
2519 Changed |= CC->enableRMWCacheBypass(
MI, MOI.getScope(),
2520 MOI.getInstrAddrSpace());
2523 if (Order == AtomicOrdering::Release ||
2524 Order == AtomicOrdering::AcquireRelease ||
2525 Order == AtomicOrdering::SequentiallyConsistent ||
2526 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) {
2528 CC->insertRelease(
MI, MOI.getScope(), MOI.getOrderingAddrSpace(),
2529 MOI.getIsCrossAddressSpaceOrdering(),
2530 Position::BEFORE, MOI.isAVNone());
2533 if (Order == AtomicOrdering::Acquire ||
2534 Order == AtomicOrdering::AcquireRelease ||
2535 Order == AtomicOrdering::SequentiallyConsistent ||
2536 MOI.getFailureOrdering() == AtomicOrdering::Acquire ||
2537 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) {
2540 CC->insertWait(
MI, MOI.getScope(), MOI.getInstrAddrSpace(),
2541 isAtomicRet(*
MI) ? SIMemOp::LOAD : SIMemOp::STORE,
2542 MOI.getIsCrossAddressSpaceOrdering(), Position::AFTER,
2544 if (!MOI.isAVNone()) {
2546 MI, MOI.getScope(), MOI.getOrderingAddrSpace(), Position::AFTER);
2550 Changed |= CC->finalizeStore(RMWMI,
true);
2557bool SIMemoryLegalizer::expandLDSDMA(
const SIMemOpInfo &MOI,
2571 return CC->enableVolatileAndOrNonTemporal(
2572 MI, MOI.getInstrAddrSpace(), OpKind, MOI.isVolatile(),
2573 MOI.isNonTemporal(), MOI.isLastUse());
2576bool SIMemoryLegalizerLegacy::runOnMachineFunction(MachineFunction &MF) {
2577 const MachineModuleInfo &MMI =
2578 getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
2579 return SIMemoryLegalizer(MMI).run(MF);
2586 .getCachedResult<MachineModuleAnalysis>(
2588 assert(MMI &&
"MachineModuleAnalysis must be available");
2589 if (!SIMemoryLegalizer(MMI->getMMI()).run(MF))
2600 CC = SICacheControl::create(ST);
2602 for (
auto &
MBB : MF) {
2606 if (
MI->isBundle() &&
MI->mayLoadOrStore()) {
2609 I != E &&
I->isBundledWithPred(); ++
I) {
2610 I->unbundleFromPred();
2613 MO.setIsInternalRead(
false);
2616 MI =
MI->eraseFromParent();
2620 if (
const auto &MOI = MOA.getLoadInfo(
MI))
2622 else if (
const auto &MOI = MOA.getStoreInfo(
MI))
2624 else if (
const auto &MOI = MOA.getLDSDMAInfo(
MI))
2626 else if (
const auto &MOI = MOA.getAtomicFenceInfo(
MI))
2628 else if (
const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(
MI))
2629 Changed |= expandAtomicCmpxchgOrRmw(*MOI,
MI);
2637 Changed |= removeAtomicPseudoMIs();
2643char SIMemoryLegalizerLegacy::
ID = 0;
2647 return new SIMemoryLegalizerLegacy();
static std::optional< LoadInfo > getLoadInfo(const MachineInstr &MI)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
Provides AMDGPU specific target descriptions.
AMDGPU Machine Module Info.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
This header defines various interfaces for pass management in LLVM.
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static cl::opt< bool > AmdgcnSkipCacheInvalidations("amdgcn-skip-cache-invalidations", cl::init(false), cl::Hidden, cl::desc("Use this to skip inserting cache invalidating instructions."))
static bool isNonVolatileMemoryAccess(const MachineInstr &MI)
static bool canUseBUFFER_WBINVL1_VOL(const GCNSubtarget &ST)
static const uint32_t IV[8]
SyncScope::ID getWorkgroupSSID() const
SyncScope::ID getWavefrontSSID() const
SyncScope::ID getAgentSSID() const
SyncScope::ID getClusterOneAddressSpaceSSID() const
SyncScope::ID getClusterSSID() const
std::optional< bool > isSyncScopeInclusion(SyncScope::ID A, SyncScope::ID B) const
In AMDGPU target synchronization scopes are inclusive, meaning a larger synchronization scope is incl...
SyncScope::ID getAgentOneAddressSpaceSSID() const
SyncScope::ID getSingleThreadOneAddressSpaceSSID() const
SyncScope::ID getWavefrontOneAddressSpaceSSID() const
SyncScope::ID getSystemOneAddressSpaceSSID() const
SyncScope::ID getWorkgroupOneAddressSpaceSSID() const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Represents analyses that only rely on functions' control flow.
Diagnostic information for unsupported feature in backend.
FunctionPass class - This class is used to implement most global optimizations.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
A helper class to return the specified delimiter string after the first invocation of operator String...
Instructions::iterator instr_iterator
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Representation of each machine instruction.
A description of a memory reference used in the backend.
Ty & getObjFileInfo()
Keep track of various per-module pieces of information for backends that would like to do so.
MachineOperand class - Representation of each machine instruction operand.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
static bool isVMEM(const MachineInstr &MI)
static bool mayWriteLDSThroughDMA(const MachineInstr &MI)
static bool isBUF(const MachineInstr &MI)
static bool isAtomicRet(const MachineInstr &MI)
static bool isAtomic(const MachineInstr &MI)
static bool isLoadMonitor(unsigned Opc)
static bool isLDSDMA(const MachineInstr &MI)
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
Represent a constant reference to a string, i.e.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ BUFFER_STRIDED_POINTER
Address space for 192-bit fat buffer pointers with an additional index.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ BUFFER_FAT_POINTER
Address space for 160-bit buffer fat pointers.
@ PRIVATE_ADDRESS
Address space for private memory.
@ BUFFER_RESOURCE
Address space for 128-bit buffer resources.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
bool isGFX10(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded)
unsigned getVmcntBitMask(const IsaVersion &Version)
unsigned getLgkmcntBitMask(const IsaVersion &Version)
unsigned getExpcntBitMask(const IsaVersion &Version)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
@ System
Synchronized with respect to all concurrently executing threads.
initializer< Ty > init(const Ty &Val)
DXILDebugInfoMap run(Module &M)
Scope
Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
OuterAnalysisManagerProxy< ModuleAnalysisManager, MachineFunction > ModuleAnalysisManagerMachineFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
char & SIMemoryLegalizerID
@ LLVM_MARK_AS_BITMASK_ENUM
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
bool isReleaseOrStronger(AtomicOrdering AO)
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
const char * toIRString(AtomicOrdering ao)
String used by LLVM IR to represent atomic ordering.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
AtomicOrdering getMergedAtomicOrdering(AtomicOrdering AO, AtomicOrdering Other)
Return a single atomic ordering that is at least as strong as both the AO and Other orderings for an ...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static const MachineMemOperand::Flags MOCooperative
Mark the MMO of cooperative load/store atomics.
AtomicOrdering
Atomic ordering for LLVM's memory model.
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
FunctionPass * createSIMemoryLegalizerPass()
bool isStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
Returns true if ao is stronger than other as defined by the AtomicOrdering lattice,...