32#define DEBUG_TYPE "si-memory-legalizer"
33#define PASS_NAME "SI Memory Legalizer"
37 cl::desc(
"Use this to skip inserting cache invalidating instructions."));
59enum class SIAtomicScope {
70enum class SIAtomicAddrSpace {
90class SIMemOpInfo final {
93 friend class SIMemOpAccess;
97 SIAtomicScope
Scope = SIAtomicScope::SYSTEM;
98 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
99 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
100 bool IsCrossAddressSpaceOrdering =
false;
102 bool IsNonTemporal =
false;
103 bool IsLastUse =
false;
107 SIAtomicScope Scope = SIAtomicScope::SYSTEM,
108 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::ATOMIC,
109 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::ALL,
110 bool IsCrossAddressSpaceOrdering =
true,
111 AtomicOrdering FailureOrdering = AtomicOrdering::SequentiallyConsistent,
112 bool IsVolatile =
false,
bool IsNonTemporal =
false,
113 bool IsLastUse =
false)
115 OrderingAddrSpace(OrderingAddrSpace), InstrAddrSpace(InstrAddrSpace),
116 IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
118 IsLastUse(IsLastUse) {
120 if (Ordering == AtomicOrdering::NotAtomic) {
121 assert(Scope == SIAtomicScope::NONE &&
122 OrderingAddrSpace == SIAtomicAddrSpace::NONE &&
123 !IsCrossAddressSpaceOrdering &&
124 FailureOrdering == AtomicOrdering::NotAtomic);
128 assert(Scope != SIAtomicScope::NONE &&
129 (OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
130 SIAtomicAddrSpace::NONE &&
131 (InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
132 SIAtomicAddrSpace::NONE);
137 if ((OrderingAddrSpace == InstrAddrSpace) &&
139 this->IsCrossAddressSpaceOrdering =
false;
143 if ((InstrAddrSpace & ~SIAtomicAddrSpace::SCRATCH) ==
144 SIAtomicAddrSpace::NONE) {
145 this->Scope = std::min(Scope, SIAtomicScope::SINGLETHREAD);
146 }
else if ((InstrAddrSpace &
147 ~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS)) ==
148 SIAtomicAddrSpace::NONE) {
149 this->Scope = std::min(Scope, SIAtomicScope::WORKGROUP);
150 }
else if ((InstrAddrSpace &
151 ~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS |
152 SIAtomicAddrSpace::GDS)) == SIAtomicAddrSpace::NONE) {
153 this->Scope = std::min(Scope, SIAtomicScope::AGENT);
173 return FailureOrdering;
178 SIAtomicAddrSpace getInstrAddrSpace()
const {
179 return InstrAddrSpace;
184 SIAtomicAddrSpace getOrderingAddrSpace()
const {
185 return OrderingAddrSpace;
190 bool getIsCrossAddressSpaceOrdering()
const {
191 return IsCrossAddressSpaceOrdering;
196 bool isVolatile()
const {
202 bool isNonTemporal()
const {
203 return IsNonTemporal;
208 bool isLastUse()
const {
return IsLastUse; }
212 bool isAtomic()
const {
213 return Ordering != AtomicOrdering::NotAtomic;
218class SIMemOpAccess final {
224 const char *Msg)
const;
230 std::optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
231 toSIAtomicScope(
SyncScope::ID SSID, SIAtomicAddrSpace InstrAddrSpace)
const;
234 SIAtomicAddrSpace toSIAtomicAddrSpace(
unsigned AS)
const;
238 std::optional<SIMemOpInfo>
247 std::optional<SIMemOpInfo>
252 std::optional<SIMemOpInfo>
257 std::optional<SIMemOpInfo>
262 std::optional<SIMemOpInfo>
266class SICacheControl {
290 static std::unique_ptr<SICacheControl> create(
const GCNSubtarget &ST);
297 SIAtomicAddrSpace AddrSpace)
const = 0;
304 SIAtomicAddrSpace AddrSpace)
const = 0;
311 SIAtomicAddrSpace AddrSpace)
const = 0;
317 SIAtomicAddrSpace AddrSpace,
318 SIMemOp
Op,
bool IsVolatile,
320 bool IsLastUse =
false)
const = 0;
335 SIAtomicAddrSpace AddrSpace,
337 bool IsCrossAddrSpaceOrdering,
338 Position Pos)
const = 0;
347 SIAtomicAddrSpace AddrSpace,
348 Position Pos)
const = 0;
358 SIAtomicAddrSpace AddrSpace,
359 bool IsCrossAddrSpaceOrdering,
360 Position Pos)
const = 0;
363 virtual ~SICacheControl() =
default;
365 virtual bool tryForceStoreSC0SC1(
const SIMemOpInfo &MOI,
371class SIGfx6CacheControl :
public SICacheControl {
388 SIGfx6CacheControl(
const GCNSubtarget &ST) : SICacheControl(
ST) {}
392 SIAtomicAddrSpace AddrSpace)
const override;
396 SIAtomicAddrSpace AddrSpace)
const override;
400 SIAtomicAddrSpace AddrSpace)
const override;
403 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
404 bool IsVolatile,
bool IsNonTemporal,
405 bool IsLastUse)
const override;
409 SIAtomicAddrSpace AddrSpace,
411 bool IsCrossAddrSpaceOrdering,
412 Position Pos)
const override;
416 SIAtomicAddrSpace AddrSpace,
417 Position Pos)
const override;
421 SIAtomicAddrSpace AddrSpace,
422 bool IsCrossAddrSpaceOrdering,
423 Position Pos)
const override;
426class SIGfx7CacheControl :
public SIGfx6CacheControl {
429 SIGfx7CacheControl(
const GCNSubtarget &ST) : SIGfx6CacheControl(
ST) {}
433 SIAtomicAddrSpace AddrSpace,
434 Position Pos)
const override;
438class SIGfx90ACacheControl :
public SIGfx7CacheControl {
441 SIGfx90ACacheControl(
const GCNSubtarget &ST) : SIGfx7CacheControl(
ST) {}
445 SIAtomicAddrSpace AddrSpace)
const override;
449 SIAtomicAddrSpace AddrSpace)
const override;
453 SIAtomicAddrSpace AddrSpace)
const override;
456 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
457 bool IsVolatile,
bool IsNonTemporal,
458 bool IsLastUse)
const override;
462 SIAtomicAddrSpace AddrSpace,
464 bool IsCrossAddrSpaceOrdering,
465 Position Pos)
const override;
469 SIAtomicAddrSpace AddrSpace,
470 Position Pos)
const override;
474 SIAtomicAddrSpace AddrSpace,
475 bool IsCrossAddrSpaceOrdering,
476 Position Pos)
const override;
479class SIGfx940CacheControl :
public SIGfx90ACacheControl {
502 SIGfx940CacheControl(
const GCNSubtarget &ST) : SIGfx90ACacheControl(
ST) {};
506 SIAtomicAddrSpace AddrSpace)
const override;
510 SIAtomicAddrSpace AddrSpace)
const override;
514 SIAtomicAddrSpace AddrSpace)
const override;
517 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
518 bool IsVolatile,
bool IsNonTemporal,
519 bool IsLastUse)
const override;
522 SIAtomicAddrSpace AddrSpace, Position Pos)
const override;
525 SIAtomicAddrSpace AddrSpace,
bool IsCrossAddrSpaceOrdering,
526 Position Pos)
const override;
528 bool tryForceStoreSC0SC1(
const SIMemOpInfo &MOI,
530 bool Changed =
false;
531 if (
ST.hasForceStoreSC0SC1() &&
532 (MOI.getInstrAddrSpace() & (SIAtomicAddrSpace::SCRATCH |
533 SIAtomicAddrSpace::GLOBAL |
534 SIAtomicAddrSpace::OTHER)) !=
535 SIAtomicAddrSpace::NONE) {
536 Changed |= enableSC0Bit(
MI);
537 Changed |= enableSC1Bit(
MI);
543class SIGfx10CacheControl :
public SIGfx7CacheControl {
554 SIGfx10CacheControl(
const GCNSubtarget &ST) : SIGfx7CacheControl(
ST) {}
558 SIAtomicAddrSpace AddrSpace)
const override;
561 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
562 bool IsVolatile,
bool IsNonTemporal,
563 bool IsLastUse)
const override;
567 SIAtomicAddrSpace AddrSpace,
569 bool IsCrossAddrSpaceOrdering,
570 Position Pos)
const override;
574 SIAtomicAddrSpace AddrSpace,
575 Position Pos)
const override;
578class SIGfx11CacheControl :
public SIGfx10CacheControl {
580 SIGfx11CacheControl(
const GCNSubtarget &ST) : SIGfx10CacheControl(
ST) {}
584 SIAtomicAddrSpace AddrSpace)
const override;
587 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
588 bool IsVolatile,
bool IsNonTemporal,
589 bool IsLastUse)
const override;
592class SIGfx12CacheControl :
public SIGfx11CacheControl {
613 SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace)
const;
616 SIGfx12CacheControl(
const GCNSubtarget &ST) : SIGfx11CacheControl(
ST) {}
619 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
620 bool IsCrossAddrSpaceOrdering, Position Pos)
const override;
623 SIAtomicAddrSpace AddrSpace, Position Pos)
const override;
626 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
627 bool IsVolatile,
bool IsNonTemporal,
628 bool IsLastUse)
const override;
633 SIAtomicAddrSpace AddrSpace,
bool IsCrossAddrSpaceOrdering,
634 Position Pos)
const override;
638 SIAtomicAddrSpace AddrSpace)
const override {
639 return setAtomicScope(
MI, Scope, AddrSpace);
644 SIAtomicAddrSpace AddrSpace)
const override {
645 return setAtomicScope(
MI, Scope, AddrSpace);
650 SIAtomicAddrSpace AddrSpace)
const override {
651 return setAtomicScope(
MI, Scope, AddrSpace);
659 std::unique_ptr<SICacheControl>
CC =
nullptr;
662 std::list<MachineBasicBlock::iterator> AtomicPseudoMIs;
672 bool removeAtomicPseudoMIs();
676 bool expandLoad(
const SIMemOpInfo &MOI,
680 bool expandStore(
const SIMemOpInfo &MOI,
684 bool expandAtomicFence(
const SIMemOpInfo &MOI,
688 bool expandAtomicCmpxchgOrRmw(
const SIMemOpInfo &MOI,
709 {
"global", SIAtomicAddrSpace::GLOBAL},
710 {
"local", SIAtomicAddrSpace::LDS},
718 OS <<
"unknown address space '" << AS <<
"'; expected one of ";
720 for (
const auto &[
Name, Val] : ASNames)
729static SIAtomicAddrSpace getFenceAddrSpaceMMRA(
const MachineInstr &
MI,
737 SIAtomicAddrSpace
Result = SIAtomicAddrSpace::NONE;
738 for (
const auto &[Prefix, Suffix] : MMRA) {
739 if (Prefix != FenceASPrefix)
742 if (
auto It = ASNames.find(Suffix); It != ASNames.end())
745 diagnoseUnknownMMRAASName(
MI, Suffix);
748 return (Result != SIAtomicAddrSpace::NONE) ?
Result :
Default;
754 const char *Msg)
const {
755 const Function &
Func =
MI->getParent()->getParent()->getFunction();
757 Func.getContext().diagnose(Diag);
760std::optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
762 SIAtomicAddrSpace InstrAddrSpace)
const {
764 return std::tuple(SIAtomicScope::SYSTEM, SIAtomicAddrSpace::ATOMIC,
true);
765 if (SSID == MMI->getAgentSSID())
766 return std::tuple(SIAtomicScope::AGENT, SIAtomicAddrSpace::ATOMIC,
true);
767 if (SSID == MMI->getWorkgroupSSID())
768 return std::tuple(SIAtomicScope::WORKGROUP, SIAtomicAddrSpace::ATOMIC,
770 if (SSID == MMI->getWavefrontSSID())
771 return std::tuple(SIAtomicScope::WAVEFRONT, SIAtomicAddrSpace::ATOMIC,
774 return std::tuple(SIAtomicScope::SINGLETHREAD, SIAtomicAddrSpace::ATOMIC,
776 if (SSID == MMI->getSystemOneAddressSpaceSSID())
777 return std::tuple(SIAtomicScope::SYSTEM,
778 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
779 if (SSID == MMI->getAgentOneAddressSpaceSSID())
780 return std::tuple(SIAtomicScope::AGENT,
781 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
782 if (SSID == MMI->getWorkgroupOneAddressSpaceSSID())
783 return std::tuple(SIAtomicScope::WORKGROUP,
784 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
785 if (SSID == MMI->getWavefrontOneAddressSpaceSSID())
786 return std::tuple(SIAtomicScope::WAVEFRONT,
787 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
788 if (SSID == MMI->getSingleThreadOneAddressSpaceSSID())
789 return std::tuple(SIAtomicScope::SINGLETHREAD,
790 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
794SIAtomicAddrSpace SIMemOpAccess::toSIAtomicAddrSpace(
unsigned AS)
const {
796 return SIAtomicAddrSpace::FLAT;
798 return SIAtomicAddrSpace::GLOBAL;
800 return SIAtomicAddrSpace::LDS;
802 return SIAtomicAddrSpace::SCRATCH;
804 return SIAtomicAddrSpace::GDS;
806 return SIAtomicAddrSpace::OTHER;
812std::optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
814 assert(
MI->getNumMemOperands() > 0);
819 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
820 bool IsNonTemporal =
true;
822 bool IsLastUse =
false;
826 for (
const auto &MMO :
MI->memoperands()) {
827 IsNonTemporal &= MMO->isNonTemporal();
829 IsLastUse |= MMO->getFlags() &
MOLastUse;
831 toSIAtomicAddrSpace(MMO->getPointerInfo().getAddrSpace());
833 if (OpOrdering != AtomicOrdering::NotAtomic) {
834 const auto &IsSyncScopeInclusion =
835 MMI->isSyncScopeInclusion(SSID, MMO->getSyncScopeID());
836 if (!IsSyncScopeInclusion) {
837 reportUnsupported(
MI,
838 "Unsupported non-inclusive atomic synchronization scope");
842 SSID = *IsSyncScopeInclusion ? SSID : MMO->getSyncScopeID();
844 assert(MMO->getFailureOrdering() != AtomicOrdering::Release &&
845 MMO->getFailureOrdering() != AtomicOrdering::AcquireRelease);
851 SIAtomicScope
Scope = SIAtomicScope::NONE;
852 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
853 bool IsCrossAddressSpaceOrdering =
false;
854 if (Ordering != AtomicOrdering::NotAtomic) {
855 auto ScopeOrNone = toSIAtomicScope(SSID, InstrAddrSpace);
857 reportUnsupported(
MI,
"Unsupported atomic synchronization scope");
860 std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
862 if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
863 ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace) ||
864 ((InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) == SIAtomicAddrSpace::NONE)) {
865 reportUnsupported(
MI,
"Unsupported atomic address space");
869 return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, InstrAddrSpace,
870 IsCrossAddressSpaceOrdering, FailureOrdering, IsVolatile,
871 IsNonTemporal, IsLastUse);
874std::optional<SIMemOpInfo>
878 if (!(
MI->mayLoad() && !
MI->mayStore()))
882 if (
MI->getNumMemOperands() == 0)
883 return SIMemOpInfo();
885 return constructFromMIWithMMO(
MI);
888std::optional<SIMemOpInfo>
892 if (!(!
MI->mayLoad() &&
MI->mayStore()))
896 if (
MI->getNumMemOperands() == 0)
897 return SIMemOpInfo();
899 return constructFromMIWithMMO(
MI);
902std::optional<SIMemOpInfo>
906 if (
MI->getOpcode() != AMDGPU::ATOMIC_FENCE)
913 auto ScopeOrNone = toSIAtomicScope(SSID, SIAtomicAddrSpace::ATOMIC);
915 reportUnsupported(
MI,
"Unsupported atomic synchronization scope");
919 SIAtomicScope
Scope = SIAtomicScope::NONE;
920 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
921 bool IsCrossAddressSpaceOrdering =
false;
922 std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
925 if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
926 ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace)) {
927 reportUnsupported(
MI,
"Unsupported atomic address space");
931 return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, SIAtomicAddrSpace::ATOMIC,
932 IsCrossAddressSpaceOrdering, AtomicOrdering::NotAtomic);
935std::optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
939 if (!(
MI->mayLoad() &&
MI->mayStore()))
943 if (
MI->getNumMemOperands() == 0)
944 return SIMemOpInfo();
946 return constructFromMIWithMMO(
MI);
950 TII =
ST.getInstrInfo();
966std::unique_ptr<SICacheControl> SICacheControl::create(
const GCNSubtarget &ST) {
968 if (
ST.hasGFX940Insts())
969 return std::make_unique<SIGfx940CacheControl>(ST);
970 if (
ST.hasGFX90AInsts())
971 return std::make_unique<SIGfx90ACacheControl>(ST);
973 return std::make_unique<SIGfx6CacheControl>(ST);
975 return std::make_unique<SIGfx7CacheControl>(ST);
977 return std::make_unique<SIGfx10CacheControl>(ST);
979 return std::make_unique<SIGfx11CacheControl>(ST);
980 return std::make_unique<SIGfx12CacheControl>(ST);
983bool SIGfx6CacheControl::enableLoadCacheBypass(
986 SIAtomicAddrSpace AddrSpace)
const {
988 bool Changed =
false;
990 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
992 case SIAtomicScope::SYSTEM:
993 case SIAtomicScope::AGENT:
996 Changed |= enableGLCBit(
MI);
998 case SIAtomicScope::WORKGROUP:
999 case SIAtomicScope::WAVEFRONT:
1000 case SIAtomicScope::SINGLETHREAD:
1018bool SIGfx6CacheControl::enableStoreCacheBypass(
1020 SIAtomicScope Scope,
1021 SIAtomicAddrSpace AddrSpace)
const {
1023 bool Changed =
false;
1031bool SIGfx6CacheControl::enableRMWCacheBypass(
1033 SIAtomicScope Scope,
1034 SIAtomicAddrSpace AddrSpace)
const {
1036 bool Changed =
false;
1046bool SIGfx6CacheControl::enableVolatileAndOrNonTemporal(
1048 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
1058 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
1060 bool Changed =
false;
1066 if (
Op == SIMemOp::LOAD)
1067 Changed |= enableGLCBit(
MI);
1074 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1080 if (IsNonTemporal) {
1083 Changed |= enableGLCBit(
MI);
1084 Changed |= enableSLCBit(
MI);
1092 SIAtomicScope Scope,
1093 SIAtomicAddrSpace AddrSpace,
1095 bool IsCrossAddrSpaceOrdering,
1096 Position Pos)
const {
1097 bool Changed =
false;
1102 if (Pos == Position::AFTER)
1106 bool LGKMCnt =
false;
1108 if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
1109 SIAtomicAddrSpace::NONE) {
1111 case SIAtomicScope::SYSTEM:
1112 case SIAtomicScope::AGENT:
1115 case SIAtomicScope::WORKGROUP:
1116 case SIAtomicScope::WAVEFRONT:
1117 case SIAtomicScope::SINGLETHREAD:
1126 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1128 case SIAtomicScope::SYSTEM:
1129 case SIAtomicScope::AGENT:
1130 case SIAtomicScope::WORKGROUP:
1137 LGKMCnt |= IsCrossAddrSpaceOrdering;
1139 case SIAtomicScope::WAVEFRONT:
1140 case SIAtomicScope::SINGLETHREAD:
1149 if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
1151 case SIAtomicScope::SYSTEM:
1152 case SIAtomicScope::AGENT:
1159 LGKMCnt |= IsCrossAddrSpaceOrdering;
1161 case SIAtomicScope::WORKGROUP:
1162 case SIAtomicScope::WAVEFRONT:
1163 case SIAtomicScope::SINGLETHREAD:
1172 if (VMCnt || LGKMCnt) {
1173 unsigned WaitCntImmediate =
1179 .
addImm(WaitCntImmediate);
1183 if (Pos == Position::AFTER)
1190 SIAtomicScope Scope,
1191 SIAtomicAddrSpace AddrSpace,
1192 Position Pos)
const {
1193 if (!InsertCacheInv)
1196 bool Changed =
false;
1201 if (Pos == Position::AFTER)
1204 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1206 case SIAtomicScope::SYSTEM:
1207 case SIAtomicScope::AGENT:
1211 case SIAtomicScope::WORKGROUP:
1212 case SIAtomicScope::WAVEFRONT:
1213 case SIAtomicScope::SINGLETHREAD:
1228 if (Pos == Position::AFTER)
1235 SIAtomicScope Scope,
1236 SIAtomicAddrSpace AddrSpace,
1237 bool IsCrossAddrSpaceOrdering,
1238 Position Pos)
const {
1239 return insertWait(
MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
1240 IsCrossAddrSpaceOrdering, Pos);
1244 SIAtomicScope Scope,
1245 SIAtomicAddrSpace AddrSpace,
1246 Position Pos)
const {
1247 if (!InsertCacheInv)
1250 bool Changed =
false;
1258 ? AMDGPU::BUFFER_WBINVL1
1259 : AMDGPU::BUFFER_WBINVL1_VOL;
1261 if (Pos == Position::AFTER)
1264 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1266 case SIAtomicScope::SYSTEM:
1267 case SIAtomicScope::AGENT:
1271 case SIAtomicScope::WORKGROUP:
1272 case SIAtomicScope::WAVEFRONT:
1273 case SIAtomicScope::SINGLETHREAD:
1288 if (Pos == Position::AFTER)
1294bool SIGfx90ACacheControl::enableLoadCacheBypass(
1296 SIAtomicScope Scope,
1297 SIAtomicAddrSpace AddrSpace)
const {
1299 bool Changed =
false;
1301 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1303 case SIAtomicScope::SYSTEM:
1304 case SIAtomicScope::AGENT:
1307 Changed |= enableGLCBit(
MI);
1309 case SIAtomicScope::WORKGROUP:
1314 if (
ST.isTgSplitEnabled())
1315 Changed |= enableGLCBit(
MI);
1317 case SIAtomicScope::WAVEFRONT:
1318 case SIAtomicScope::SINGLETHREAD:
1336bool SIGfx90ACacheControl::enableStoreCacheBypass(
1338 SIAtomicScope Scope,
1339 SIAtomicAddrSpace AddrSpace)
const {
1341 bool Changed =
false;
1343 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1345 case SIAtomicScope::SYSTEM:
1346 case SIAtomicScope::AGENT:
1350 case SIAtomicScope::WORKGROUP:
1351 case SIAtomicScope::WAVEFRONT:
1352 case SIAtomicScope::SINGLETHREAD:
1371bool SIGfx90ACacheControl::enableRMWCacheBypass(
1373 SIAtomicScope Scope,
1374 SIAtomicAddrSpace AddrSpace)
const {
1376 bool Changed =
false;
1378 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1380 case SIAtomicScope::SYSTEM:
1381 case SIAtomicScope::AGENT:
1386 case SIAtomicScope::WORKGROUP:
1387 case SIAtomicScope::WAVEFRONT:
1388 case SIAtomicScope::SINGLETHREAD:
1399bool SIGfx90ACacheControl::enableVolatileAndOrNonTemporal(
1401 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
1411 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
1413 bool Changed =
false;
1419 if (
Op == SIMemOp::LOAD)
1420 Changed |= enableGLCBit(
MI);
1427 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1433 if (IsNonTemporal) {
1436 Changed |= enableGLCBit(
MI);
1437 Changed |= enableSLCBit(
MI);
1445 SIAtomicScope Scope,
1446 SIAtomicAddrSpace AddrSpace,
1448 bool IsCrossAddrSpaceOrdering,
1449 Position Pos)
const {
1450 if (
ST.isTgSplitEnabled()) {
1458 if (((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH |
1459 SIAtomicAddrSpace::GDS)) != SIAtomicAddrSpace::NONE) &&
1460 (Scope == SIAtomicScope::WORKGROUP)) {
1462 Scope = SIAtomicScope::AGENT;
1466 AddrSpace &= ~SIAtomicAddrSpace
::LDS;
1468 return SIGfx7CacheControl::insertWait(
MI, Scope, AddrSpace,
Op,
1469 IsCrossAddrSpaceOrdering, Pos);
1473 SIAtomicScope Scope,
1474 SIAtomicAddrSpace AddrSpace,
1475 Position Pos)
const {
1476 if (!InsertCacheInv)
1479 bool Changed =
false;
1484 if (Pos == Position::AFTER)
1487 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1489 case SIAtomicScope::SYSTEM:
1501 case SIAtomicScope::AGENT:
1504 case SIAtomicScope::WORKGROUP:
1509 if (
ST.isTgSplitEnabled()) {
1511 Scope = SIAtomicScope::AGENT;
1514 case SIAtomicScope::WAVEFRONT:
1515 case SIAtomicScope::SINGLETHREAD:
1530 if (Pos == Position::AFTER)
1533 Changed |= SIGfx7CacheControl::insertAcquire(
MI, Scope, AddrSpace, Pos);
1539 SIAtomicScope Scope,
1540 SIAtomicAddrSpace AddrSpace,
1541 bool IsCrossAddrSpaceOrdering,
1542 Position Pos)
const {
1543 bool Changed =
false;
1548 if (Pos == Position::AFTER)
1551 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1553 case SIAtomicScope::SYSTEM:
1567 case SIAtomicScope::AGENT:
1568 case SIAtomicScope::WORKGROUP:
1569 case SIAtomicScope::WAVEFRONT:
1570 case SIAtomicScope::SINGLETHREAD:
1578 if (Pos == Position::AFTER)
1582 SIGfx7CacheControl::insertRelease(
MI, Scope, AddrSpace,
1583 IsCrossAddrSpaceOrdering, Pos);
1588bool SIGfx940CacheControl::enableLoadCacheBypass(
1590 SIAtomicAddrSpace AddrSpace)
const {
1592 bool Changed =
false;
1594 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1596 case SIAtomicScope::SYSTEM:
1598 Changed |= enableSC0Bit(
MI);
1599 Changed |= enableSC1Bit(
MI);
1601 case SIAtomicScope::AGENT:
1603 Changed |= enableSC1Bit(
MI);
1605 case SIAtomicScope::WORKGROUP:
1611 Changed |= enableSC0Bit(
MI);
1613 case SIAtomicScope::WAVEFRONT:
1614 case SIAtomicScope::SINGLETHREAD:
1632bool SIGfx940CacheControl::enableStoreCacheBypass(
1634 SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace)
const {
1636 bool Changed =
false;
1638 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1640 case SIAtomicScope::SYSTEM:
1642 Changed |= enableSC0Bit(
MI);
1643 Changed |= enableSC1Bit(
MI);
1645 case SIAtomicScope::AGENT:
1647 Changed |= enableSC1Bit(
MI);
1649 case SIAtomicScope::WORKGROUP:
1651 Changed |= enableSC0Bit(
MI);
1653 case SIAtomicScope::WAVEFRONT:
1654 case SIAtomicScope::SINGLETHREAD:
1672bool SIGfx940CacheControl::enableRMWCacheBypass(
1674 SIAtomicAddrSpace AddrSpace)
const {
1676 bool Changed =
false;
1678 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1680 case SIAtomicScope::SYSTEM:
1682 Changed |= enableSC1Bit(
MI);
1684 case SIAtomicScope::AGENT:
1685 case SIAtomicScope::WORKGROUP:
1686 case SIAtomicScope::WAVEFRONT:
1687 case SIAtomicScope::SINGLETHREAD:
1701bool SIGfx940CacheControl::enableVolatileAndOrNonTemporal(
1703 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
1713 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
1715 bool Changed =
false;
1719 Changed |= enableSC0Bit(
MI);
1720 Changed |= enableSC1Bit(
MI);
1727 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1733 if (IsNonTemporal) {
1734 Changed |= enableNTBit(
MI);
1742 SIAtomicScope Scope,
1743 SIAtomicAddrSpace AddrSpace,
1744 Position Pos)
const {
1745 if (!InsertCacheInv)
1748 bool Changed =
false;
1753 if (Pos == Position::AFTER)
1756 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1758 case SIAtomicScope::SYSTEM:
1772 case SIAtomicScope::AGENT:
1785 case SIAtomicScope::WORKGROUP:
1790 if (
ST.isTgSplitEnabled()) {
1804 case SIAtomicScope::WAVEFRONT:
1805 case SIAtomicScope::SINGLETHREAD:
1821 if (Pos == Position::AFTER)
1828 SIAtomicScope Scope,
1829 SIAtomicAddrSpace AddrSpace,
1830 bool IsCrossAddrSpaceOrdering,
1831 Position Pos)
const {
1832 bool Changed =
false;
1837 if (Pos == Position::AFTER)
1840 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1842 case SIAtomicScope::SYSTEM:
1857 case SIAtomicScope::AGENT:
1867 case SIAtomicScope::WORKGROUP:
1868 case SIAtomicScope::WAVEFRONT:
1869 case SIAtomicScope::SINGLETHREAD:
1879 if (Pos == Position::AFTER)
1884 Changed |= insertWait(
MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
1885 IsCrossAddrSpaceOrdering, Pos);
1890bool SIGfx10CacheControl::enableLoadCacheBypass(
1892 SIAtomicScope Scope,
1893 SIAtomicAddrSpace AddrSpace)
const {
1895 bool Changed =
false;
1897 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1899 case SIAtomicScope::SYSTEM:
1900 case SIAtomicScope::AGENT:
1903 Changed |= enableGLCBit(
MI);
1904 Changed |= enableDLCBit(
MI);
1906 case SIAtomicScope::WORKGROUP:
1911 if (!
ST.isCuModeEnabled())
1912 Changed |= enableGLCBit(
MI);
1914 case SIAtomicScope::WAVEFRONT:
1915 case SIAtomicScope::SINGLETHREAD:
1933bool SIGfx10CacheControl::enableVolatileAndOrNonTemporal(
1935 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
1946 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
1948 bool Changed =
false;
1954 if (
Op == SIMemOp::LOAD) {
1955 Changed |= enableGLCBit(
MI);
1956 Changed |= enableDLCBit(
MI);
1964 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1969 if (IsNonTemporal) {
1974 if (
Op == SIMemOp::STORE)
1975 Changed |= enableGLCBit(
MI);
1976 Changed |= enableSLCBit(
MI);
1985 SIAtomicScope Scope,
1986 SIAtomicAddrSpace AddrSpace,
1988 bool IsCrossAddrSpaceOrdering,
1989 Position Pos)
const {
1990 bool Changed =
false;
1995 if (Pos == Position::AFTER)
2000 bool LGKMCnt =
false;
2002 if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
2003 SIAtomicAddrSpace::NONE) {
2005 case SIAtomicScope::SYSTEM:
2006 case SIAtomicScope::AGENT:
2007 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
2009 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
2012 case SIAtomicScope::WORKGROUP:
2018 if (!
ST.isCuModeEnabled()) {
2019 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
2021 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
2025 case SIAtomicScope::WAVEFRONT:
2026 case SIAtomicScope::SINGLETHREAD:
2035 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
2037 case SIAtomicScope::SYSTEM:
2038 case SIAtomicScope::AGENT:
2039 case SIAtomicScope::WORKGROUP:
2046 LGKMCnt |= IsCrossAddrSpaceOrdering;
2048 case SIAtomicScope::WAVEFRONT:
2049 case SIAtomicScope::SINGLETHREAD:
2058 if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
2060 case SIAtomicScope::SYSTEM:
2061 case SIAtomicScope::AGENT:
2068 LGKMCnt |= IsCrossAddrSpaceOrdering;
2070 case SIAtomicScope::WORKGROUP:
2071 case SIAtomicScope::WAVEFRONT:
2072 case SIAtomicScope::SINGLETHREAD:
2081 if (VMCnt || LGKMCnt) {
2082 unsigned WaitCntImmediate =
2088 .
addImm(WaitCntImmediate);
2099 if (Pos == Position::AFTER)
2106 SIAtomicScope Scope,
2107 SIAtomicAddrSpace AddrSpace,
2108 Position Pos)
const {
2109 if (!InsertCacheInv)
2112 bool Changed =
false;
2117 if (Pos == Position::AFTER)
2120 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
2122 case SIAtomicScope::SYSTEM:
2123 case SIAtomicScope::AGENT:
2131 case SIAtomicScope::WORKGROUP:
2136 if (!
ST.isCuModeEnabled()) {
2141 case SIAtomicScope::WAVEFRONT:
2142 case SIAtomicScope::SINGLETHREAD:
2157 if (Pos == Position::AFTER)
2163bool SIGfx11CacheControl::enableLoadCacheBypass(
2165 SIAtomicAddrSpace AddrSpace)
const {
2167 bool Changed =
false;
2169 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
2171 case SIAtomicScope::SYSTEM:
2172 case SIAtomicScope::AGENT:
2175 Changed |= enableGLCBit(
MI);
2177 case SIAtomicScope::WORKGROUP:
2182 if (!
ST.isCuModeEnabled())
2183 Changed |= enableGLCBit(
MI);
2185 case SIAtomicScope::WAVEFRONT:
2186 case SIAtomicScope::SINGLETHREAD:
2204bool SIGfx11CacheControl::enableVolatileAndOrNonTemporal(
2206 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
2217 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
2219 bool Changed =
false;
2225 if (
Op == SIMemOp::LOAD)
2226 Changed |= enableGLCBit(
MI);
2229 Changed |= enableDLCBit(
MI);
2236 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
2241 if (IsNonTemporal) {
2246 if (
Op == SIMemOp::STORE)
2247 Changed |= enableGLCBit(
MI);
2248 Changed |= enableSLCBit(
MI);
2251 Changed |= enableDLCBit(
MI);
2288bool SIGfx12CacheControl::insertWaitsBeforeSystemScopeStore(
2305 SIAtomicScope Scope,
2306 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
2307 bool IsCrossAddrSpaceOrdering,
2308 Position Pos)
const {
2309 bool Changed =
false;
2314 bool LOADCnt =
false;
2316 bool STORECnt =
false;
2318 if (Pos == Position::AFTER)
2321 if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
2322 SIAtomicAddrSpace::NONE) {
2324 case SIAtomicScope::SYSTEM:
2325 case SIAtomicScope::AGENT:
2326 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
2328 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
2331 case SIAtomicScope::WORKGROUP:
2337 if (!
ST.isCuModeEnabled()) {
2338 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
2340 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
2344 case SIAtomicScope::WAVEFRONT:
2345 case SIAtomicScope::SINGLETHREAD:
2354 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
2356 case SIAtomicScope::SYSTEM:
2357 case SIAtomicScope::AGENT:
2358 case SIAtomicScope::WORKGROUP:
2365 DSCnt |= IsCrossAddrSpaceOrdering;
2367 case SIAtomicScope::WAVEFRONT:
2368 case SIAtomicScope::SINGLETHREAD:
2394 if (Pos == Position::AFTER)
2401 SIAtomicScope Scope,
2402 SIAtomicAddrSpace AddrSpace,
2403 Position Pos)
const {
2404 if (!InsertCacheInv)
2416 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) == SIAtomicAddrSpace::NONE)
2421 case SIAtomicScope::SYSTEM:
2424 case SIAtomicScope::AGENT:
2427 case SIAtomicScope::WORKGROUP:
2432 if (
ST.isCuModeEnabled())
2437 case SIAtomicScope::WAVEFRONT:
2438 case SIAtomicScope::SINGLETHREAD:
2445 if (Pos == Position::AFTER)
2450 if (Pos == Position::AFTER)
2457 SIAtomicScope Scope,
2458 SIAtomicAddrSpace AddrSpace,
2459 bool IsCrossAddrSpaceOrdering,
2460 Position Pos)
const {
2470 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) == SIAtomicAddrSpace::NONE)
2473 if (Pos == Position::AFTER)
2478 bool SkipWB =
false;
2481 case SIAtomicScope::SYSTEM:
2484 case SIAtomicScope::AGENT:
2487 case SIAtomicScope::WORKGROUP:
2494 if (
ST.isCuModeEnabled())
2499 case SIAtomicScope::WAVEFRONT:
2500 case SIAtomicScope::SINGLETHREAD:
2510 if (Pos == Position::AFTER)
2516 insertWait(
MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
2517 IsCrossAddrSpaceOrdering, Pos);
2522bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal(
2524 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
2533 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
2535 bool Changed =
false;
2540 }
else if (IsNonTemporal) {
2548 if (
Op == SIMemOp::STORE)
2549 Changed |= insertWaitsBeforeSystemScopeStore(
MI);
2556 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
2563bool SIGfx12CacheControl::expandSystemScopeStore(
2567 return insertWaitsBeforeSystemScopeStore(
MI);
2573 SIAtomicScope Scope,
2574 SIAtomicAddrSpace AddrSpace)
const {
2575 bool Changed =
false;
2577 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
2579 case SIAtomicScope::SYSTEM:
2582 case SIAtomicScope::AGENT:
2585 case SIAtomicScope::WORKGROUP:
2588 if (!
ST.isCuModeEnabled())
2591 case SIAtomicScope::WAVEFRONT:
2592 case SIAtomicScope::SINGLETHREAD:
2610bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
2611 if (AtomicPseudoMIs.empty())
2614 for (
auto &
MI : AtomicPseudoMIs)
2615 MI->eraseFromParent();
2617 AtomicPseudoMIs.clear();
2621bool SIMemoryLegalizer::expandLoad(
const SIMemOpInfo &MOI,
2625 bool Changed =
false;
2627 if (MOI.isAtomic()) {
2628 if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
2629 MOI.getOrdering() == AtomicOrdering::Acquire ||
2630 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
2631 Changed |=
CC->enableLoadCacheBypass(
MI, MOI.getScope(),
2632 MOI.getOrderingAddrSpace());
2635 if (MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
2636 Changed |=
CC->insertWait(
MI, MOI.getScope(),
2637 MOI.getOrderingAddrSpace(),
2638 SIMemOp::LOAD | SIMemOp::STORE,
2639 MOI.getIsCrossAddressSpaceOrdering(),
2642 if (MOI.getOrdering() == AtomicOrdering::Acquire ||
2643 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
2644 Changed |=
CC->insertWait(
MI, MOI.getScope(),
2645 MOI.getInstrAddrSpace(),
2647 MOI.getIsCrossAddressSpaceOrdering(),
2649 Changed |=
CC->insertAcquire(
MI, MOI.getScope(),
2650 MOI.getOrderingAddrSpace(),
2660 Changed |=
CC->enableVolatileAndOrNonTemporal(
2661 MI, MOI.getInstrAddrSpace(), SIMemOp::LOAD, MOI.isVolatile(),
2662 MOI.isNonTemporal(), MOI.isLastUse());
2667bool SIMemoryLegalizer::expandStore(
const SIMemOpInfo &MOI,
2671 bool Changed =
false;
2673 if (MOI.isAtomic()) {
2674 if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
2675 MOI.getOrdering() == AtomicOrdering::Release ||
2676 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
2677 Changed |=
CC->enableStoreCacheBypass(
MI, MOI.getScope(),
2678 MOI.getOrderingAddrSpace());
2681 if (MOI.getOrdering() == AtomicOrdering::Release ||
2682 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
2683 Changed |=
CC->insertRelease(
MI, MOI.getScope(),
2684 MOI.getOrderingAddrSpace(),
2685 MOI.getIsCrossAddressSpaceOrdering(),
2694 Changed |=
CC->enableVolatileAndOrNonTemporal(
2695 MI, MOI.getInstrAddrSpace(), SIMemOp::STORE, MOI.isVolatile(),
2696 MOI.isNonTemporal());
2700 Changed |=
CC->expandSystemScopeStore(
MI);
2704bool SIMemoryLegalizer::expandAtomicFence(
const SIMemOpInfo &MOI,
2706 assert(
MI->getOpcode() == AMDGPU::ATOMIC_FENCE);
2708 AtomicPseudoMIs.push_back(
MI);
2709 bool Changed =
false;
2714 auto OrderingAddrSpace =
2715 getFenceAddrSpaceMMRA(*
MI, MOI.getOrderingAddrSpace());
2717 if (MOI.isAtomic()) {
2718 if (MOI.getOrdering() == AtomicOrdering::Acquire)
2719 Changed |=
CC->insertWait(
2720 MI, MOI.getScope(), OrderingAddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
2721 MOI.getIsCrossAddressSpaceOrdering(), Position::BEFORE);
2723 if (MOI.getOrdering() == AtomicOrdering::Release ||
2724 MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
2725 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
2733 Changed |=
CC->insertRelease(
MI, MOI.getScope(), OrderingAddrSpace,
2734 MOI.getIsCrossAddressSpaceOrdering(),
2742 if (MOI.getOrdering() == AtomicOrdering::Acquire ||
2743 MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
2744 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
2745 Changed |=
CC->insertAcquire(
MI, MOI.getScope(), OrderingAddrSpace,
2754bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(
const SIMemOpInfo &MOI,
2758 bool Changed =
false;
2760 if (MOI.isAtomic()) {
2761 if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
2762 MOI.getOrdering() == AtomicOrdering::Acquire ||
2763 MOI.getOrdering() == AtomicOrdering::Release ||
2764 MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
2765 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
2766 Changed |=
CC->enableRMWCacheBypass(
MI, MOI.getScope(),
2767 MOI.getInstrAddrSpace());
2770 if (MOI.getOrdering() == AtomicOrdering::Release ||
2771 MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
2772 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
2773 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent)
2774 Changed |=
CC->insertRelease(
MI, MOI.getScope(),
2775 MOI.getOrderingAddrSpace(),
2776 MOI.getIsCrossAddressSpaceOrdering(),
2779 if (MOI.getOrdering() == AtomicOrdering::Acquire ||
2780 MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
2781 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
2782 MOI.getFailureOrdering() == AtomicOrdering::Acquire ||
2783 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) {
2784 Changed |=
CC->insertWait(
MI, MOI.getScope(),
2785 MOI.getInstrAddrSpace(),
2786 isAtomicRet(*
MI) ? SIMemOp::LOAD :
2788 MOI.getIsCrossAddressSpaceOrdering(),
2790 Changed |=
CC->insertAcquire(
MI, MOI.getScope(),
2791 MOI.getOrderingAddrSpace(),
2802 bool Changed =
false;
2805 getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
2810 for (
auto &
MBB : MF) {
2814 if (
MI->isBundle() &&
MI->mayLoadOrStore()) {
2817 I != E &&
I->isBundledWithPred(); ++
I) {
2818 I->unbundleFromPred();
2821 MO.setIsInternalRead(
false);
2824 MI->eraseFromParent();
2825 MI =
II->getIterator();
2831 if (
const auto &MOI = MOA.getLoadInfo(
MI))
2832 Changed |= expandLoad(*MOI,
MI);
2833 else if (
const auto &MOI = MOA.getStoreInfo(
MI)) {
2834 Changed |= expandStore(*MOI,
MI);
2835 Changed |=
CC->tryForceStoreSC0SC1(*MOI,
MI);
2836 }
else if (
const auto &MOI = MOA.getAtomicFenceInfo(
MI))
2837 Changed |= expandAtomicFence(*MOI,
MI);
2838 else if (
const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(
MI))
2839 Changed |= expandAtomicCmpxchgOrRmw(*MOI,
MI);
2843 Changed |= removeAtomicPseudoMIs();
2849char SIMemoryLegalizer::
ID = 0;
2853 return new SIMemoryLegalizer();
static std::optional< LoadInfo > getLoadInfo(const MachineInstr &MI)
Provides AMDGPU specific target descriptions.
AMDGPU Machine Module Info.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
#define LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE() pulls the operator overloads used by LLVM_MARK_AS_BITMASK_EN...
#define LLVM_MARK_AS_BITMASK_ENUM(LargestValue)
LLVM_MARK_AS_BITMASK_ENUM lets you opt in an individual enum type so you can perform bitwise operatio...
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static cl::opt< bool > AmdgcnSkipCacheInvalidations("amdgcn-skip-cache-invalidations", cl::init(false), cl::Hidden, cl::desc("Use this to skip inserting cache invalidating instructions."))
static SPIRV::Scope::Scope getScope(SyncScope::ID Ord, const SyncScopeIDs &SSIDs)
static const uint32_t IV[8]
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
This class represents an Operation in the Expression.
Diagnostic information for unsupported feature in backend.
FunctionPass class - This class is used to implement most global optimizations.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Instructions::iterator instr_iterator
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
This class contains meta information specific to a module.
Ty & getObjFileInfo()
Keep track of various per-module pieces of information for backends that would like to do so.
MachineOperand class - Representation of each machine instruction operand.
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
static bool isAtomicRet(const MachineInstr &MI)
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
StringRef - Represent a constant reference to a string, i.e.
LLVM Value Representation.
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
IsaVersion getIsaVersion(StringRef GPU)
unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
unsigned getVmcntBitMask(const IsaVersion &Version)
unsigned getLgkmcntBitMask(const IsaVersion &Version)
unsigned getExpcntBitMask(const IsaVersion &Version)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Undef
Value of the register doesn't matter.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
@ System
Synchronized with respect to all concurrently executing threads.
initializer< Ty > init(const Ty &Val)
Scope
Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
char & SIMemoryLegalizerID
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
AtomicOrdering getMergedAtomicOrdering(AtomicOrdering AO, AtomicOrdering Other)
Return a single atomic ordering that is at least as strong as both the AO and Other orderings for an ...
AtomicOrdering
Atomic ordering for LLVM's memory model.
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
FunctionPass * createSIMemoryLegalizerPass()
Instruction set architecture version.