32#define DEBUG_TYPE "si-memory-legalizer"
33#define PASS_NAME "SI Memory Legalizer"
37 cl::desc(
"Use this to skip inserting cache invalidating instructions."));
59enum class SIAtomicScope {
70enum class SIAtomicAddrSpace {
90class SIMemOpInfo final {
93 friend class SIMemOpAccess;
97 SIAtomicScope
Scope = SIAtomicScope::SYSTEM;
98 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
99 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
100 bool IsCrossAddressSpaceOrdering =
false;
102 bool IsNonTemporal =
false;
103 bool IsLastUse =
false;
107 SIAtomicScope Scope = SIAtomicScope::SYSTEM,
108 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::ATOMIC,
109 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::ALL,
110 bool IsCrossAddressSpaceOrdering =
true,
111 AtomicOrdering FailureOrdering = AtomicOrdering::SequentiallyConsistent,
112 bool IsVolatile =
false,
bool IsNonTemporal =
false,
113 bool IsLastUse =
false)
114 : Ordering(Ordering), FailureOrdering(FailureOrdering),
Scope(
Scope),
115 OrderingAddrSpace(OrderingAddrSpace), InstrAddrSpace(InstrAddrSpace),
116 IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
118 IsLastUse(IsLastUse) {
120 if (Ordering == AtomicOrdering::NotAtomic) {
121 assert(Scope == SIAtomicScope::NONE &&
122 OrderingAddrSpace == SIAtomicAddrSpace::NONE &&
123 !IsCrossAddressSpaceOrdering &&
124 FailureOrdering == AtomicOrdering::NotAtomic);
128 assert(Scope != SIAtomicScope::NONE &&
129 (OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
130 SIAtomicAddrSpace::NONE &&
131 (InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
132 SIAtomicAddrSpace::NONE);
137 if ((OrderingAddrSpace == InstrAddrSpace) &&
139 this->IsCrossAddressSpaceOrdering =
false;
143 if ((InstrAddrSpace & ~SIAtomicAddrSpace::SCRATCH) ==
144 SIAtomicAddrSpace::NONE) {
145 this->Scope = std::min(Scope, SIAtomicScope::SINGLETHREAD);
146 }
else if ((InstrAddrSpace &
147 ~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS)) ==
148 SIAtomicAddrSpace::NONE) {
149 this->Scope = std::min(Scope, SIAtomicScope::WORKGROUP);
150 }
else if ((InstrAddrSpace &
151 ~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS |
152 SIAtomicAddrSpace::GDS)) == SIAtomicAddrSpace::NONE) {
153 this->Scope = std::min(Scope, SIAtomicScope::AGENT);
173 return FailureOrdering;
178 SIAtomicAddrSpace getInstrAddrSpace()
const {
179 return InstrAddrSpace;
184 SIAtomicAddrSpace getOrderingAddrSpace()
const {
185 return OrderingAddrSpace;
190 bool getIsCrossAddressSpaceOrdering()
const {
191 return IsCrossAddressSpaceOrdering;
196 bool isVolatile()
const {
202 bool isNonTemporal()
const {
203 return IsNonTemporal;
208 bool isLastUse()
const {
return IsLastUse; }
212 bool isAtomic()
const {
213 return Ordering != AtomicOrdering::NotAtomic;
218class SIMemOpAccess final {
224 const char *Msg)
const;
230 std::optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
231 toSIAtomicScope(
SyncScope::ID SSID, SIAtomicAddrSpace InstrAddrSpace)
const;
234 SIAtomicAddrSpace toSIAtomicAddrSpace(
unsigned AS)
const;
238 std::optional<SIMemOpInfo>
247 std::optional<SIMemOpInfo>
252 std::optional<SIMemOpInfo>
257 std::optional<SIMemOpInfo>
262 std::optional<SIMemOpInfo>
266class SICacheControl {
290 static std::unique_ptr<SICacheControl> create(
const GCNSubtarget &ST);
297 SIAtomicAddrSpace AddrSpace)
const = 0;
304 SIAtomicAddrSpace AddrSpace)
const = 0;
311 SIAtomicAddrSpace AddrSpace)
const = 0;
317 SIAtomicAddrSpace AddrSpace,
318 SIMemOp
Op,
bool IsVolatile,
320 bool IsLastUse =
false)
const = 0;
335 SIAtomicAddrSpace AddrSpace,
337 bool IsCrossAddrSpaceOrdering,
338 Position Pos)
const = 0;
347 SIAtomicAddrSpace AddrSpace,
348 Position Pos)
const = 0;
358 SIAtomicAddrSpace AddrSpace,
359 bool IsCrossAddrSpaceOrdering,
360 Position Pos)
const = 0;
363 virtual ~SICacheControl() =
default;
365 virtual bool tryForceStoreSC0SC1(
const SIMemOpInfo &MOI,
371class SIGfx6CacheControl :
public SICacheControl {
388 SIGfx6CacheControl(
const GCNSubtarget &ST) : SICacheControl(
ST) {}
392 SIAtomicAddrSpace AddrSpace)
const override;
396 SIAtomicAddrSpace AddrSpace)
const override;
400 SIAtomicAddrSpace AddrSpace)
const override;
403 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
404 bool IsVolatile,
bool IsNonTemporal,
405 bool IsLastUse)
const override;
409 SIAtomicAddrSpace AddrSpace,
411 bool IsCrossAddrSpaceOrdering,
412 Position Pos)
const override;
416 SIAtomicAddrSpace AddrSpace,
417 Position Pos)
const override;
421 SIAtomicAddrSpace AddrSpace,
422 bool IsCrossAddrSpaceOrdering,
423 Position Pos)
const override;
426class SIGfx7CacheControl :
public SIGfx6CacheControl {
429 SIGfx7CacheControl(
const GCNSubtarget &ST) : SIGfx6CacheControl(
ST) {}
433 SIAtomicAddrSpace AddrSpace,
434 Position Pos)
const override;
438class SIGfx90ACacheControl :
public SIGfx7CacheControl {
441 SIGfx90ACacheControl(
const GCNSubtarget &ST) : SIGfx7CacheControl(
ST) {}
445 SIAtomicAddrSpace AddrSpace)
const override;
449 SIAtomicAddrSpace AddrSpace)
const override;
453 SIAtomicAddrSpace AddrSpace)
const override;
456 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
457 bool IsVolatile,
bool IsNonTemporal,
458 bool IsLastUse)
const override;
462 SIAtomicAddrSpace AddrSpace,
464 bool IsCrossAddrSpaceOrdering,
465 Position Pos)
const override;
469 SIAtomicAddrSpace AddrSpace,
470 Position Pos)
const override;
474 SIAtomicAddrSpace AddrSpace,
475 bool IsCrossAddrSpaceOrdering,
476 Position Pos)
const override;
479class SIGfx940CacheControl :
public SIGfx90ACacheControl {
502 SIGfx940CacheControl(
const GCNSubtarget &ST) : SIGfx90ACacheControl(
ST) {};
506 SIAtomicAddrSpace AddrSpace)
const override;
510 SIAtomicAddrSpace AddrSpace)
const override;
514 SIAtomicAddrSpace AddrSpace)
const override;
517 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
518 bool IsVolatile,
bool IsNonTemporal,
519 bool IsLastUse)
const override;
522 SIAtomicAddrSpace AddrSpace, Position Pos)
const override;
525 SIAtomicAddrSpace AddrSpace,
bool IsCrossAddrSpaceOrdering,
526 Position Pos)
const override;
528 bool tryForceStoreSC0SC1(
const SIMemOpInfo &MOI,
530 bool Changed =
false;
531 if (
ST.hasForceStoreSC0SC1() &&
532 (MOI.getInstrAddrSpace() & (SIAtomicAddrSpace::SCRATCH |
533 SIAtomicAddrSpace::GLOBAL |
534 SIAtomicAddrSpace::OTHER)) !=
535 SIAtomicAddrSpace::NONE) {
536 Changed |= enableSC0Bit(
MI);
537 Changed |= enableSC1Bit(
MI);
543class SIGfx10CacheControl :
public SIGfx7CacheControl {
554 SIGfx10CacheControl(
const GCNSubtarget &ST) : SIGfx7CacheControl(
ST) {}
558 SIAtomicAddrSpace AddrSpace)
const override;
561 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
562 bool IsVolatile,
bool IsNonTemporal,
563 bool IsLastUse)
const override;
567 SIAtomicAddrSpace AddrSpace,
569 bool IsCrossAddrSpaceOrdering,
570 Position Pos)
const override;
574 SIAtomicAddrSpace AddrSpace,
575 Position Pos)
const override;
578class SIGfx11CacheControl :
public SIGfx10CacheControl {
580 SIGfx11CacheControl(
const GCNSubtarget &ST) : SIGfx10CacheControl(
ST) {}
584 SIAtomicAddrSpace AddrSpace)
const override;
587 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
588 bool IsVolatile,
bool IsNonTemporal,
589 bool IsLastUse)
const override;
592class SIGfx12CacheControl :
public SIGfx11CacheControl {
613 SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace)
const;
616 SIGfx12CacheControl(
const GCNSubtarget &ST) : SIGfx11CacheControl(
ST) {}
619 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
620 bool IsCrossAddrSpaceOrdering, Position Pos)
const override;
623 SIAtomicAddrSpace AddrSpace, Position Pos)
const override;
626 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
627 bool IsVolatile,
bool IsNonTemporal,
628 bool IsLastUse)
const override;
633 SIAtomicAddrSpace AddrSpace,
bool IsCrossAddrSpaceOrdering,
634 Position Pos)
const override;
638 SIAtomicAddrSpace AddrSpace)
const override {
639 return setAtomicScope(
MI, Scope, AddrSpace);
644 SIAtomicAddrSpace AddrSpace)
const override {
645 return setAtomicScope(
MI, Scope, AddrSpace);
650 SIAtomicAddrSpace AddrSpace)
const override {
651 return setAtomicScope(
MI, Scope, AddrSpace);
659 std::unique_ptr<SICacheControl>
CC =
nullptr;
662 std::list<MachineBasicBlock::iterator> AtomicPseudoMIs;
672 bool removeAtomicPseudoMIs();
676 bool expandLoad(
const SIMemOpInfo &MOI,
680 bool expandStore(
const SIMemOpInfo &MOI,
684 bool expandAtomicFence(
const SIMemOpInfo &MOI,
688 bool expandAtomicCmpxchgOrRmw(
const SIMemOpInfo &MOI,
709 {
"global", SIAtomicAddrSpace::GLOBAL},
710 {
"local", SIAtomicAddrSpace::LDS},
718 OS <<
"unknown address space '" << AS <<
"'; expected one of ";
720 for (
const auto &[
Name, Val] : ASNames)
729static SIAtomicAddrSpace getFenceAddrSpaceMMRA(
const MachineInstr &
MI,
737 SIAtomicAddrSpace
Result = SIAtomicAddrSpace::NONE;
738 for (
const auto &[Prefix, Suffix] : MMRA) {
739 if (Prefix != FenceASPrefix)
742 if (
auto It = ASNames.find(Suffix); It != ASNames.end())
745 diagnoseUnknownMMRAASName(
MI, Suffix);
748 return (Result != SIAtomicAddrSpace::NONE) ?
Result :
Default;
754 const char *Msg)
const {
755 const Function &
Func =
MI->getParent()->getParent()->getFunction();
757 Func.getContext().diagnose(Diag);
760std::optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
762 SIAtomicAddrSpace InstrAddrSpace)
const {
764 return std::tuple(SIAtomicScope::SYSTEM, SIAtomicAddrSpace::ATOMIC,
true);
765 if (SSID == MMI->getAgentSSID())
766 return std::tuple(SIAtomicScope::AGENT, SIAtomicAddrSpace::ATOMIC,
true);
767 if (SSID == MMI->getWorkgroupSSID())
768 return std::tuple(SIAtomicScope::WORKGROUP, SIAtomicAddrSpace::ATOMIC,
770 if (SSID == MMI->getWavefrontSSID())
771 return std::tuple(SIAtomicScope::WAVEFRONT, SIAtomicAddrSpace::ATOMIC,
774 return std::tuple(SIAtomicScope::SINGLETHREAD, SIAtomicAddrSpace::ATOMIC,
776 if (SSID == MMI->getSystemOneAddressSpaceSSID())
777 return std::tuple(SIAtomicScope::SYSTEM,
778 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
779 if (SSID == MMI->getAgentOneAddressSpaceSSID())
780 return std::tuple(SIAtomicScope::AGENT,
781 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
782 if (SSID == MMI->getWorkgroupOneAddressSpaceSSID())
783 return std::tuple(SIAtomicScope::WORKGROUP,
784 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
785 if (SSID == MMI->getWavefrontOneAddressSpaceSSID())
786 return std::tuple(SIAtomicScope::WAVEFRONT,
787 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
788 if (SSID == MMI->getSingleThreadOneAddressSpaceSSID())
789 return std::tuple(SIAtomicScope::SINGLETHREAD,
790 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
794SIAtomicAddrSpace SIMemOpAccess::toSIAtomicAddrSpace(
unsigned AS)
const {
796 return SIAtomicAddrSpace::FLAT;
798 return SIAtomicAddrSpace::GLOBAL;
800 return SIAtomicAddrSpace::LDS;
802 return SIAtomicAddrSpace::SCRATCH;
804 return SIAtomicAddrSpace::GDS;
806 return SIAtomicAddrSpace::OTHER;
813std::optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
815 assert(
MI->getNumMemOperands() > 0);
820 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
821 bool IsNonTemporal =
true;
823 bool IsLastUse =
false;
827 for (
const auto &MMO :
MI->memoperands()) {
828 IsNonTemporal &= MMO->isNonTemporal();
830 IsLastUse |= MMO->getFlags() &
MOLastUse;
832 toSIAtomicAddrSpace(MMO->getPointerInfo().getAddrSpace());
834 if (OpOrdering != AtomicOrdering::NotAtomic) {
835 const auto &IsSyncScopeInclusion =
836 MMI->isSyncScopeInclusion(SSID, MMO->getSyncScopeID());
837 if (!IsSyncScopeInclusion) {
838 reportUnsupported(
MI,
839 "Unsupported non-inclusive atomic synchronization scope");
843 SSID = *IsSyncScopeInclusion ? SSID : MMO->getSyncScopeID();
845 assert(MMO->getFailureOrdering() != AtomicOrdering::Release &&
846 MMO->getFailureOrdering() != AtomicOrdering::AcquireRelease);
852 SIAtomicScope
Scope = SIAtomicScope::NONE;
853 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
854 bool IsCrossAddressSpaceOrdering =
false;
855 if (Ordering != AtomicOrdering::NotAtomic) {
856 auto ScopeOrNone = toSIAtomicScope(SSID, InstrAddrSpace);
858 reportUnsupported(
MI,
"Unsupported atomic synchronization scope");
861 std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
863 if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
864 ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace) ||
865 ((InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) == SIAtomicAddrSpace::NONE)) {
866 reportUnsupported(
MI,
"Unsupported atomic address space");
870 return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, InstrAddrSpace,
871 IsCrossAddressSpaceOrdering, FailureOrdering, IsVolatile,
872 IsNonTemporal, IsLastUse);
875std::optional<SIMemOpInfo>
879 if (!(
MI->mayLoad() && !
MI->mayStore()))
883 if (
MI->getNumMemOperands() == 0)
884 return SIMemOpInfo();
886 return constructFromMIWithMMO(
MI);
889std::optional<SIMemOpInfo>
893 if (!(!
MI->mayLoad() &&
MI->mayStore()))
897 if (
MI->getNumMemOperands() == 0)
898 return SIMemOpInfo();
900 return constructFromMIWithMMO(
MI);
903std::optional<SIMemOpInfo>
907 if (
MI->getOpcode() != AMDGPU::ATOMIC_FENCE)
914 auto ScopeOrNone = toSIAtomicScope(SSID, SIAtomicAddrSpace::ATOMIC);
916 reportUnsupported(
MI,
"Unsupported atomic synchronization scope");
920 SIAtomicScope
Scope = SIAtomicScope::NONE;
921 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
922 bool IsCrossAddressSpaceOrdering =
false;
923 std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
926 if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
927 ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace)) {
928 reportUnsupported(
MI,
"Unsupported atomic address space");
932 return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, SIAtomicAddrSpace::ATOMIC,
933 IsCrossAddressSpaceOrdering, AtomicOrdering::NotAtomic);
936std::optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
940 if (!(
MI->mayLoad() &&
MI->mayStore()))
944 if (
MI->getNumMemOperands() == 0)
945 return SIMemOpInfo();
947 return constructFromMIWithMMO(
MI);
951 TII =
ST.getInstrInfo();
967std::unique_ptr<SICacheControl> SICacheControl::create(
const GCNSubtarget &ST) {
969 if (
ST.hasGFX940Insts())
970 return std::make_unique<SIGfx940CacheControl>(ST);
971 if (
ST.hasGFX90AInsts())
972 return std::make_unique<SIGfx90ACacheControl>(ST);
974 return std::make_unique<SIGfx6CacheControl>(ST);
976 return std::make_unique<SIGfx7CacheControl>(ST);
978 return std::make_unique<SIGfx10CacheControl>(ST);
980 return std::make_unique<SIGfx11CacheControl>(ST);
981 return std::make_unique<SIGfx12CacheControl>(ST);
984bool SIGfx6CacheControl::enableLoadCacheBypass(
987 SIAtomicAddrSpace AddrSpace)
const {
989 bool Changed =
false;
991 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
993 case SIAtomicScope::SYSTEM:
994 case SIAtomicScope::AGENT:
997 Changed |= enableGLCBit(
MI);
999 case SIAtomicScope::WORKGROUP:
1000 case SIAtomicScope::WAVEFRONT:
1001 case SIAtomicScope::SINGLETHREAD:
1019bool SIGfx6CacheControl::enableStoreCacheBypass(
1021 SIAtomicScope Scope,
1022 SIAtomicAddrSpace AddrSpace)
const {
1024 bool Changed =
false;
1032bool SIGfx6CacheControl::enableRMWCacheBypass(
1034 SIAtomicScope Scope,
1035 SIAtomicAddrSpace AddrSpace)
const {
1037 bool Changed =
false;
1047bool SIGfx6CacheControl::enableVolatileAndOrNonTemporal(
1049 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
1059 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
1061 bool Changed =
false;
1067 if (
Op == SIMemOp::LOAD)
1068 Changed |= enableGLCBit(
MI);
1075 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1081 if (IsNonTemporal) {
1084 Changed |= enableGLCBit(
MI);
1085 Changed |= enableSLCBit(
MI);
1093 SIAtomicScope Scope,
1094 SIAtomicAddrSpace AddrSpace,
1096 bool IsCrossAddrSpaceOrdering,
1097 Position Pos)
const {
1098 bool Changed =
false;
1103 if (Pos == Position::AFTER)
1107 bool LGKMCnt =
false;
1109 if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
1110 SIAtomicAddrSpace::NONE) {
1112 case SIAtomicScope::SYSTEM:
1113 case SIAtomicScope::AGENT:
1116 case SIAtomicScope::WORKGROUP:
1117 case SIAtomicScope::WAVEFRONT:
1118 case SIAtomicScope::SINGLETHREAD:
1127 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1129 case SIAtomicScope::SYSTEM:
1130 case SIAtomicScope::AGENT:
1131 case SIAtomicScope::WORKGROUP:
1138 LGKMCnt |= IsCrossAddrSpaceOrdering;
1140 case SIAtomicScope::WAVEFRONT:
1141 case SIAtomicScope::SINGLETHREAD:
1150 if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
1152 case SIAtomicScope::SYSTEM:
1153 case SIAtomicScope::AGENT:
1160 LGKMCnt |= IsCrossAddrSpaceOrdering;
1162 case SIAtomicScope::WORKGROUP:
1163 case SIAtomicScope::WAVEFRONT:
1164 case SIAtomicScope::SINGLETHREAD:
1173 if (VMCnt || LGKMCnt) {
1174 unsigned WaitCntImmediate =
1180 .
addImm(WaitCntImmediate);
1184 if (Pos == Position::AFTER)
1191 SIAtomicScope Scope,
1192 SIAtomicAddrSpace AddrSpace,
1193 Position Pos)
const {
1194 if (!InsertCacheInv)
1197 bool Changed =
false;
1202 if (Pos == Position::AFTER)
1205 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1207 case SIAtomicScope::SYSTEM:
1208 case SIAtomicScope::AGENT:
1212 case SIAtomicScope::WORKGROUP:
1213 case SIAtomicScope::WAVEFRONT:
1214 case SIAtomicScope::SINGLETHREAD:
1229 if (Pos == Position::AFTER)
1236 SIAtomicScope Scope,
1237 SIAtomicAddrSpace AddrSpace,
1238 bool IsCrossAddrSpaceOrdering,
1239 Position Pos)
const {
1240 return insertWait(
MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
1241 IsCrossAddrSpaceOrdering, Pos);
1245 SIAtomicScope Scope,
1246 SIAtomicAddrSpace AddrSpace,
1247 Position Pos)
const {
1248 if (!InsertCacheInv)
1251 bool Changed =
false;
1259 ? AMDGPU::BUFFER_WBINVL1
1260 : AMDGPU::BUFFER_WBINVL1_VOL;
1262 if (Pos == Position::AFTER)
1265 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1267 case SIAtomicScope::SYSTEM:
1268 case SIAtomicScope::AGENT:
1272 case SIAtomicScope::WORKGROUP:
1273 case SIAtomicScope::WAVEFRONT:
1274 case SIAtomicScope::SINGLETHREAD:
1289 if (Pos == Position::AFTER)
1295bool SIGfx90ACacheControl::enableLoadCacheBypass(
1297 SIAtomicScope Scope,
1298 SIAtomicAddrSpace AddrSpace)
const {
1300 bool Changed =
false;
1302 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1304 case SIAtomicScope::SYSTEM:
1305 case SIAtomicScope::AGENT:
1308 Changed |= enableGLCBit(
MI);
1310 case SIAtomicScope::WORKGROUP:
1315 if (
ST.isTgSplitEnabled())
1316 Changed |= enableGLCBit(
MI);
1318 case SIAtomicScope::WAVEFRONT:
1319 case SIAtomicScope::SINGLETHREAD:
1337bool SIGfx90ACacheControl::enableStoreCacheBypass(
1339 SIAtomicScope Scope,
1340 SIAtomicAddrSpace AddrSpace)
const {
1342 bool Changed =
false;
1344 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1346 case SIAtomicScope::SYSTEM:
1347 case SIAtomicScope::AGENT:
1351 case SIAtomicScope::WORKGROUP:
1352 case SIAtomicScope::WAVEFRONT:
1353 case SIAtomicScope::SINGLETHREAD:
1372bool SIGfx90ACacheControl::enableRMWCacheBypass(
1374 SIAtomicScope Scope,
1375 SIAtomicAddrSpace AddrSpace)
const {
1377 bool Changed =
false;
1379 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1381 case SIAtomicScope::SYSTEM:
1382 case SIAtomicScope::AGENT:
1387 case SIAtomicScope::WORKGROUP:
1388 case SIAtomicScope::WAVEFRONT:
1389 case SIAtomicScope::SINGLETHREAD:
1400bool SIGfx90ACacheControl::enableVolatileAndOrNonTemporal(
1402 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
1412 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
1414 bool Changed =
false;
1420 if (
Op == SIMemOp::LOAD)
1421 Changed |= enableGLCBit(
MI);
1428 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1434 if (IsNonTemporal) {
1437 Changed |= enableGLCBit(
MI);
1438 Changed |= enableSLCBit(
MI);
1446 SIAtomicScope Scope,
1447 SIAtomicAddrSpace AddrSpace,
1449 bool IsCrossAddrSpaceOrdering,
1450 Position Pos)
const {
1451 if (
ST.isTgSplitEnabled()) {
1459 if (((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH |
1460 SIAtomicAddrSpace::GDS)) != SIAtomicAddrSpace::NONE) &&
1461 (Scope == SIAtomicScope::WORKGROUP)) {
1463 Scope = SIAtomicScope::AGENT;
1467 AddrSpace &= ~SIAtomicAddrSpace
::LDS;
1469 return SIGfx7CacheControl::insertWait(
MI, Scope, AddrSpace,
Op,
1470 IsCrossAddrSpaceOrdering, Pos);
1474 SIAtomicScope Scope,
1475 SIAtomicAddrSpace AddrSpace,
1476 Position Pos)
const {
1477 if (!InsertCacheInv)
1480 bool Changed =
false;
1485 if (Pos == Position::AFTER)
1488 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1490 case SIAtomicScope::SYSTEM:
1502 case SIAtomicScope::AGENT:
1505 case SIAtomicScope::WORKGROUP:
1510 if (
ST.isTgSplitEnabled()) {
1512 Scope = SIAtomicScope::AGENT;
1515 case SIAtomicScope::WAVEFRONT:
1516 case SIAtomicScope::SINGLETHREAD:
1531 if (Pos == Position::AFTER)
1534 Changed |= SIGfx7CacheControl::insertAcquire(
MI, Scope, AddrSpace, Pos);
1540 SIAtomicScope Scope,
1541 SIAtomicAddrSpace AddrSpace,
1542 bool IsCrossAddrSpaceOrdering,
1543 Position Pos)
const {
1544 bool Changed =
false;
1549 if (Pos == Position::AFTER)
1552 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1554 case SIAtomicScope::SYSTEM:
1568 case SIAtomicScope::AGENT:
1569 case SIAtomicScope::WORKGROUP:
1570 case SIAtomicScope::WAVEFRONT:
1571 case SIAtomicScope::SINGLETHREAD:
1579 if (Pos == Position::AFTER)
1583 SIGfx7CacheControl::insertRelease(
MI, Scope, AddrSpace,
1584 IsCrossAddrSpaceOrdering, Pos);
1589bool SIGfx940CacheControl::enableLoadCacheBypass(
1591 SIAtomicAddrSpace AddrSpace)
const {
1593 bool Changed =
false;
1595 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1597 case SIAtomicScope::SYSTEM:
1599 Changed |= enableSC0Bit(
MI);
1600 Changed |= enableSC1Bit(
MI);
1602 case SIAtomicScope::AGENT:
1604 Changed |= enableSC1Bit(
MI);
1606 case SIAtomicScope::WORKGROUP:
1612 Changed |= enableSC0Bit(
MI);
1614 case SIAtomicScope::WAVEFRONT:
1615 case SIAtomicScope::SINGLETHREAD:
1633bool SIGfx940CacheControl::enableStoreCacheBypass(
1635 SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace)
const {
1637 bool Changed =
false;
1639 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1641 case SIAtomicScope::SYSTEM:
1643 Changed |= enableSC0Bit(
MI);
1644 Changed |= enableSC1Bit(
MI);
1646 case SIAtomicScope::AGENT:
1648 Changed |= enableSC1Bit(
MI);
1650 case SIAtomicScope::WORKGROUP:
1652 Changed |= enableSC0Bit(
MI);
1654 case SIAtomicScope::WAVEFRONT:
1655 case SIAtomicScope::SINGLETHREAD:
1673bool SIGfx940CacheControl::enableRMWCacheBypass(
1675 SIAtomicAddrSpace AddrSpace)
const {
1677 bool Changed =
false;
1679 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1681 case SIAtomicScope::SYSTEM:
1683 Changed |= enableSC1Bit(
MI);
1685 case SIAtomicScope::AGENT:
1686 case SIAtomicScope::WORKGROUP:
1687 case SIAtomicScope::WAVEFRONT:
1688 case SIAtomicScope::SINGLETHREAD:
1702bool SIGfx940CacheControl::enableVolatileAndOrNonTemporal(
1704 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
1714 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
1716 bool Changed =
false;
1720 Changed |= enableSC0Bit(
MI);
1721 Changed |= enableSC1Bit(
MI);
1728 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1734 if (IsNonTemporal) {
1735 Changed |= enableNTBit(
MI);
1743 SIAtomicScope Scope,
1744 SIAtomicAddrSpace AddrSpace,
1745 Position Pos)
const {
1746 if (!InsertCacheInv)
1749 bool Changed =
false;
1754 if (Pos == Position::AFTER)
1757 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1759 case SIAtomicScope::SYSTEM:
1773 case SIAtomicScope::AGENT:
1786 case SIAtomicScope::WORKGROUP:
1791 if (
ST.isTgSplitEnabled()) {
1805 case SIAtomicScope::WAVEFRONT:
1806 case SIAtomicScope::SINGLETHREAD:
1822 if (Pos == Position::AFTER)
1829 SIAtomicScope Scope,
1830 SIAtomicAddrSpace AddrSpace,
1831 bool IsCrossAddrSpaceOrdering,
1832 Position Pos)
const {
1833 bool Changed =
false;
1838 if (Pos == Position::AFTER)
1841 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1843 case SIAtomicScope::SYSTEM:
1858 case SIAtomicScope::AGENT:
1868 case SIAtomicScope::WORKGROUP:
1869 case SIAtomicScope::WAVEFRONT:
1870 case SIAtomicScope::SINGLETHREAD:
1880 if (Pos == Position::AFTER)
1885 Changed |= insertWait(
MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
1886 IsCrossAddrSpaceOrdering, Pos);
1891bool SIGfx10CacheControl::enableLoadCacheBypass(
1893 SIAtomicScope Scope,
1894 SIAtomicAddrSpace AddrSpace)
const {
1896 bool Changed =
false;
1898 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1900 case SIAtomicScope::SYSTEM:
1901 case SIAtomicScope::AGENT:
1904 Changed |= enableGLCBit(
MI);
1905 Changed |= enableDLCBit(
MI);
1907 case SIAtomicScope::WORKGROUP:
1912 if (!
ST.isCuModeEnabled())
1913 Changed |= enableGLCBit(
MI);
1915 case SIAtomicScope::WAVEFRONT:
1916 case SIAtomicScope::SINGLETHREAD:
1934bool SIGfx10CacheControl::enableVolatileAndOrNonTemporal(
1936 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
1947 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
1949 bool Changed =
false;
1955 if (
Op == SIMemOp::LOAD) {
1956 Changed |= enableGLCBit(
MI);
1957 Changed |= enableDLCBit(
MI);
1965 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1970 if (IsNonTemporal) {
1975 if (
Op == SIMemOp::STORE)
1976 Changed |= enableGLCBit(
MI);
1977 Changed |= enableSLCBit(
MI);
1986 SIAtomicScope Scope,
1987 SIAtomicAddrSpace AddrSpace,
1989 bool IsCrossAddrSpaceOrdering,
1990 Position Pos)
const {
1991 bool Changed =
false;
1996 if (Pos == Position::AFTER)
2001 bool LGKMCnt =
false;
2003 if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
2004 SIAtomicAddrSpace::NONE) {
2006 case SIAtomicScope::SYSTEM:
2007 case SIAtomicScope::AGENT:
2008 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
2010 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
2013 case SIAtomicScope::WORKGROUP:
2019 if (!
ST.isCuModeEnabled()) {
2020 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
2022 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
2026 case SIAtomicScope::WAVEFRONT:
2027 case SIAtomicScope::SINGLETHREAD:
2036 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
2038 case SIAtomicScope::SYSTEM:
2039 case SIAtomicScope::AGENT:
2040 case SIAtomicScope::WORKGROUP:
2047 LGKMCnt |= IsCrossAddrSpaceOrdering;
2049 case SIAtomicScope::WAVEFRONT:
2050 case SIAtomicScope::SINGLETHREAD:
2059 if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
2061 case SIAtomicScope::SYSTEM:
2062 case SIAtomicScope::AGENT:
2069 LGKMCnt |= IsCrossAddrSpaceOrdering;
2071 case SIAtomicScope::WORKGROUP:
2072 case SIAtomicScope::WAVEFRONT:
2073 case SIAtomicScope::SINGLETHREAD:
2082 if (VMCnt || LGKMCnt) {
2083 unsigned WaitCntImmediate =
2089 .
addImm(WaitCntImmediate);
2100 if (Pos == Position::AFTER)
2107 SIAtomicScope Scope,
2108 SIAtomicAddrSpace AddrSpace,
2109 Position Pos)
const {
2110 if (!InsertCacheInv)
2113 bool Changed =
false;
2118 if (Pos == Position::AFTER)
2121 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
2123 case SIAtomicScope::SYSTEM:
2124 case SIAtomicScope::AGENT:
2132 case SIAtomicScope::WORKGROUP:
2137 if (!
ST.isCuModeEnabled()) {
2142 case SIAtomicScope::WAVEFRONT:
2143 case SIAtomicScope::SINGLETHREAD:
2158 if (Pos == Position::AFTER)
2164bool SIGfx11CacheControl::enableLoadCacheBypass(
2166 SIAtomicAddrSpace AddrSpace)
const {
2168 bool Changed =
false;
2170 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
2172 case SIAtomicScope::SYSTEM:
2173 case SIAtomicScope::AGENT:
2176 Changed |= enableGLCBit(
MI);
2178 case SIAtomicScope::WORKGROUP:
2183 if (!
ST.isCuModeEnabled())
2184 Changed |= enableGLCBit(
MI);
2186 case SIAtomicScope::WAVEFRONT:
2187 case SIAtomicScope::SINGLETHREAD:
2205bool SIGfx11CacheControl::enableVolatileAndOrNonTemporal(
2207 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
2218 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
2220 bool Changed =
false;
2226 if (
Op == SIMemOp::LOAD)
2227 Changed |= enableGLCBit(
MI);
2230 Changed |= enableDLCBit(
MI);
2237 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
2242 if (IsNonTemporal) {
2247 if (
Op == SIMemOp::STORE)
2248 Changed |= enableGLCBit(
MI);
2249 Changed |= enableSLCBit(
MI);
2252 Changed |= enableDLCBit(
MI);
2289bool SIGfx12CacheControl::insertWaitsBeforeSystemScopeStore(
2306 SIAtomicScope Scope,
2307 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
2308 bool IsCrossAddrSpaceOrdering,
2309 Position Pos)
const {
2310 bool Changed =
false;
2315 bool LOADCnt =
false;
2317 bool STORECnt =
false;
2319 if (Pos == Position::AFTER)
2322 if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
2323 SIAtomicAddrSpace::NONE) {
2325 case SIAtomicScope::SYSTEM:
2326 case SIAtomicScope::AGENT:
2327 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
2329 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
2332 case SIAtomicScope::WORKGROUP:
2338 if (!
ST.isCuModeEnabled()) {
2339 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
2341 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
2345 case SIAtomicScope::WAVEFRONT:
2346 case SIAtomicScope::SINGLETHREAD:
2355 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
2357 case SIAtomicScope::SYSTEM:
2358 case SIAtomicScope::AGENT:
2359 case SIAtomicScope::WORKGROUP:
2366 DSCnt |= IsCrossAddrSpaceOrdering;
2368 case SIAtomicScope::WAVEFRONT:
2369 case SIAtomicScope::SINGLETHREAD:
2395 if (Pos == Position::AFTER)
2402 SIAtomicScope Scope,
2403 SIAtomicAddrSpace AddrSpace,
2404 Position Pos)
const {
2405 if (!InsertCacheInv)
2417 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) == SIAtomicAddrSpace::NONE)
2422 case SIAtomicScope::SYSTEM:
2425 case SIAtomicScope::AGENT:
2428 case SIAtomicScope::WORKGROUP:
2433 if (
ST.isCuModeEnabled())
2438 case SIAtomicScope::WAVEFRONT:
2439 case SIAtomicScope::SINGLETHREAD:
2446 if (Pos == Position::AFTER)
2451 if (Pos == Position::AFTER)
2458 SIAtomicScope Scope,
2459 SIAtomicAddrSpace AddrSpace,
2460 bool IsCrossAddrSpaceOrdering,
2461 Position Pos)
const {
2471 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) == SIAtomicAddrSpace::NONE)
2474 if (Pos == Position::AFTER)
2479 bool SkipWB =
false;
2482 case SIAtomicScope::SYSTEM:
2485 case SIAtomicScope::AGENT:
2488 case SIAtomicScope::WORKGROUP:
2495 if (
ST.isCuModeEnabled())
2500 case SIAtomicScope::WAVEFRONT:
2501 case SIAtomicScope::SINGLETHREAD:
2511 if (Pos == Position::AFTER)
2517 insertWait(
MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
2518 IsCrossAddrSpaceOrdering, Pos);
2523bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal(
2525 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
2534 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
2536 bool Changed =
false;
2541 }
else if (IsNonTemporal) {
2549 if (
Op == SIMemOp::STORE)
2550 Changed |= insertWaitsBeforeSystemScopeStore(
MI);
2557 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
2564bool SIGfx12CacheControl::expandSystemScopeStore(
2568 return insertWaitsBeforeSystemScopeStore(
MI);
2574 SIAtomicScope Scope,
2575 SIAtomicAddrSpace AddrSpace)
const {
2576 bool Changed =
false;
2578 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
2580 case SIAtomicScope::SYSTEM:
2583 case SIAtomicScope::AGENT:
2586 case SIAtomicScope::WORKGROUP:
2589 if (!
ST.isCuModeEnabled())
2592 case SIAtomicScope::WAVEFRONT:
2593 case SIAtomicScope::SINGLETHREAD:
2611bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
2612 if (AtomicPseudoMIs.empty())
2615 for (
auto &
MI : AtomicPseudoMIs)
2616 MI->eraseFromParent();
2618 AtomicPseudoMIs.clear();
2622bool SIMemoryLegalizer::expandLoad(
const SIMemOpInfo &MOI,
2626 bool Changed =
false;
2628 if (MOI.isAtomic()) {
2629 if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
2630 MOI.getOrdering() == AtomicOrdering::Acquire ||
2631 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
2632 Changed |=
CC->enableLoadCacheBypass(
MI, MOI.getScope(),
2633 MOI.getOrderingAddrSpace());
2636 if (MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
2637 Changed |=
CC->insertWait(
MI, MOI.getScope(),
2638 MOI.getOrderingAddrSpace(),
2639 SIMemOp::LOAD | SIMemOp::STORE,
2640 MOI.getIsCrossAddressSpaceOrdering(),
2643 if (MOI.getOrdering() == AtomicOrdering::Acquire ||
2644 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
2645 Changed |=
CC->insertWait(
MI, MOI.getScope(),
2646 MOI.getInstrAddrSpace(),
2648 MOI.getIsCrossAddressSpaceOrdering(),
2650 Changed |=
CC->insertAcquire(
MI, MOI.getScope(),
2651 MOI.getOrderingAddrSpace(),
2661 Changed |=
CC->enableVolatileAndOrNonTemporal(
2662 MI, MOI.getInstrAddrSpace(), SIMemOp::LOAD, MOI.isVolatile(),
2663 MOI.isNonTemporal(), MOI.isLastUse());
2668bool SIMemoryLegalizer::expandStore(
const SIMemOpInfo &MOI,
2672 bool Changed =
false;
2674 if (MOI.isAtomic()) {
2675 if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
2676 MOI.getOrdering() == AtomicOrdering::Release ||
2677 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
2678 Changed |=
CC->enableStoreCacheBypass(
MI, MOI.getScope(),
2679 MOI.getOrderingAddrSpace());
2682 if (MOI.getOrdering() == AtomicOrdering::Release ||
2683 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
2684 Changed |=
CC->insertRelease(
MI, MOI.getScope(),
2685 MOI.getOrderingAddrSpace(),
2686 MOI.getIsCrossAddressSpaceOrdering(),
2695 Changed |=
CC->enableVolatileAndOrNonTemporal(
2696 MI, MOI.getInstrAddrSpace(), SIMemOp::STORE, MOI.isVolatile(),
2697 MOI.isNonTemporal());
2701 Changed |=
CC->expandSystemScopeStore(
MI);
2705bool SIMemoryLegalizer::expandAtomicFence(
const SIMemOpInfo &MOI,
2707 assert(
MI->getOpcode() == AMDGPU::ATOMIC_FENCE);
2709 AtomicPseudoMIs.push_back(
MI);
2710 bool Changed =
false;
2715 auto OrderingAddrSpace =
2716 getFenceAddrSpaceMMRA(*
MI, MOI.getOrderingAddrSpace());
2718 if (MOI.isAtomic()) {
2719 if (MOI.getOrdering() == AtomicOrdering::Acquire)
2720 Changed |=
CC->insertWait(
2721 MI, MOI.getScope(), OrderingAddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
2722 MOI.getIsCrossAddressSpaceOrdering(), Position::BEFORE);
2724 if (MOI.getOrdering() == AtomicOrdering::Release ||
2725 MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
2726 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
2734 Changed |=
CC->insertRelease(
MI, MOI.getScope(), OrderingAddrSpace,
2735 MOI.getIsCrossAddressSpaceOrdering(),
2743 if (MOI.getOrdering() == AtomicOrdering::Acquire ||
2744 MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
2745 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
2746 Changed |=
CC->insertAcquire(
MI, MOI.getScope(), OrderingAddrSpace,
2755bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(
const SIMemOpInfo &MOI,
2759 bool Changed =
false;
2761 if (MOI.isAtomic()) {
2762 if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
2763 MOI.getOrdering() == AtomicOrdering::Acquire ||
2764 MOI.getOrdering() == AtomicOrdering::Release ||
2765 MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
2766 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
2767 Changed |=
CC->enableRMWCacheBypass(
MI, MOI.getScope(),
2768 MOI.getInstrAddrSpace());
2771 if (MOI.getOrdering() == AtomicOrdering::Release ||
2772 MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
2773 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
2774 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent)
2775 Changed |=
CC->insertRelease(
MI, MOI.getScope(),
2776 MOI.getOrderingAddrSpace(),
2777 MOI.getIsCrossAddressSpaceOrdering(),
2780 if (MOI.getOrdering() == AtomicOrdering::Acquire ||
2781 MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
2782 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
2783 MOI.getFailureOrdering() == AtomicOrdering::Acquire ||
2784 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) {
2785 Changed |=
CC->insertWait(
MI, MOI.getScope(),
2786 MOI.getInstrAddrSpace(),
2787 isAtomicRet(*
MI) ? SIMemOp::LOAD :
2789 MOI.getIsCrossAddressSpaceOrdering(),
2791 Changed |=
CC->insertAcquire(
MI, MOI.getScope(),
2792 MOI.getOrderingAddrSpace(),
2803 bool Changed =
false;
2805 SIMemOpAccess MOA(MF);
2808 for (
auto &
MBB : MF) {
2812 if (
MI->isBundle() &&
MI->mayLoadOrStore()) {
2815 I != E &&
I->isBundledWithPred(); ++
I) {
2816 I->unbundleFromPred();
2819 MO.setIsInternalRead(
false);
2822 MI->eraseFromParent();
2823 MI =
II->getIterator();
2829 if (
const auto &MOI = MOA.getLoadInfo(
MI))
2830 Changed |= expandLoad(*MOI,
MI);
2831 else if (
const auto &MOI = MOA.getStoreInfo(
MI)) {
2832 Changed |= expandStore(*MOI,
MI);
2833 Changed |=
CC->tryForceStoreSC0SC1(*MOI,
MI);
2834 }
else if (
const auto &MOI = MOA.getAtomicFenceInfo(
MI))
2835 Changed |= expandAtomicFence(*MOI,
MI);
2836 else if (
const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(
MI))
2837 Changed |= expandAtomicCmpxchgOrRmw(*MOI,
MI);
2841 Changed |= removeAtomicPseudoMIs();
2847char SIMemoryLegalizer::
ID = 0;
2851 return new SIMemoryLegalizer();
static std::optional< LoadInfo > getLoadInfo(const MachineInstr &MI)
Provides AMDGPU specific target descriptions.
AMDGPU Machine Module Info.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
#define LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE() pulls the operator overloads used by LLVM_MARK_AS_BITMASK_EN...
#define LLVM_MARK_AS_BITMASK_ENUM(LargestValue)
LLVM_MARK_AS_BITMASK_ENUM lets you opt in an individual enum type so you can perform bitwise operatio...
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static cl::opt< bool > AmdgcnSkipCacheInvalidations("amdgcn-skip-cache-invalidations", cl::init(false), cl::Hidden, cl::desc("Use this to skip inserting cache invalidating instructions."))
static SPIRV::Scope::Scope getScope(SyncScope::ID Ord, SPIRVMachineModuleInfo *MMI)
static const uint32_t IV[8]
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
This class represents an Operation in the Expression.
Diagnostic information for unsupported feature in backend.
FunctionPass class - This class is used to implement most global optimizations.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Instructions::iterator instr_iterator
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineModuleInfo & getMMI() const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Ty & getObjFileInfo()
Keep track of various per-module pieces of information for backends that would like to do so.
MachineOperand class - Representation of each machine instruction operand.
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
static bool isAtomicRet(const MachineInstr &MI)
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
StringRef - Represent a constant reference to a string, i.e.
LLVM Value Representation.
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
IsaVersion getIsaVersion(StringRef GPU)
unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
unsigned getVmcntBitMask(const IsaVersion &Version)
unsigned getLgkmcntBitMask(const IsaVersion &Version)
unsigned getExpcntBitMask(const IsaVersion &Version)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Undef
Value of the register doesn't matter.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
@ System
Synchronized with respect to all concurrently executing threads.
initializer< Ty > init(const Ty &Val)
Scope
Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
char & SIMemoryLegalizerID
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
AtomicOrdering getMergedAtomicOrdering(AtomicOrdering AO, AtomicOrdering Other)
Return a single atomic ordering that is at least as strong as both the AO and Other orderings for an ...
AtomicOrdering
Atomic ordering for LLVM's memory model.
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
FunctionPass * createSIMemoryLegalizerPass()
Instruction set architecture version.