32#define DEBUG_TYPE "si-memory-legalizer"
33#define PASS_NAME "SI Memory Legalizer"
37 cl::desc(
"Use this to skip inserting cache invalidating instructions."));
59enum class SIAtomicScope {
70enum class SIAtomicAddrSpace {
90class SIMemOpInfo final {
93 friend class SIMemOpAccess;
97 SIAtomicScope
Scope = SIAtomicScope::SYSTEM;
98 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
99 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
100 bool IsCrossAddressSpaceOrdering =
false;
102 bool IsNonTemporal =
false;
103 bool IsLastUse =
false;
107 SIAtomicScope Scope = SIAtomicScope::SYSTEM,
108 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::ATOMIC,
109 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::ALL,
110 bool IsCrossAddressSpaceOrdering =
true,
111 AtomicOrdering FailureOrdering = AtomicOrdering::SequentiallyConsistent,
112 bool IsVolatile =
false,
bool IsNonTemporal =
false,
113 bool IsLastUse =
false)
115 OrderingAddrSpace(OrderingAddrSpace), InstrAddrSpace(InstrAddrSpace),
116 IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
118 IsLastUse(IsLastUse) {
120 if (Ordering == AtomicOrdering::NotAtomic) {
121 assert(Scope == SIAtomicScope::NONE &&
122 OrderingAddrSpace == SIAtomicAddrSpace::NONE &&
123 !IsCrossAddressSpaceOrdering &&
124 FailureOrdering == AtomicOrdering::NotAtomic);
128 assert(Scope != SIAtomicScope::NONE &&
129 (OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
130 SIAtomicAddrSpace::NONE &&
131 (InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
132 SIAtomicAddrSpace::NONE);
137 if ((OrderingAddrSpace == InstrAddrSpace) &&
139 this->IsCrossAddressSpaceOrdering =
false;
143 if ((InstrAddrSpace & ~SIAtomicAddrSpace::SCRATCH) ==
144 SIAtomicAddrSpace::NONE) {
145 this->Scope = std::min(Scope, SIAtomicScope::SINGLETHREAD);
146 }
else if ((InstrAddrSpace &
147 ~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS)) ==
148 SIAtomicAddrSpace::NONE) {
149 this->Scope = std::min(Scope, SIAtomicScope::WORKGROUP);
150 }
else if ((InstrAddrSpace &
151 ~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS |
152 SIAtomicAddrSpace::GDS)) == SIAtomicAddrSpace::NONE) {
153 this->Scope = std::min(Scope, SIAtomicScope::AGENT);
160 SIAtomicScope getScope()
const {
173 return FailureOrdering;
178 SIAtomicAddrSpace getInstrAddrSpace()
const {
179 return InstrAddrSpace;
184 SIAtomicAddrSpace getOrderingAddrSpace()
const {
185 return OrderingAddrSpace;
190 bool getIsCrossAddressSpaceOrdering()
const {
191 return IsCrossAddressSpaceOrdering;
196 bool isVolatile()
const {
202 bool isNonTemporal()
const {
203 return IsNonTemporal;
208 bool isLastUse()
const {
return IsLastUse; }
212 bool isAtomic()
const {
213 return Ordering != AtomicOrdering::NotAtomic;
218class SIMemOpAccess final {
224 const char *Msg)
const;
230 std::optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
231 toSIAtomicScope(
SyncScope::ID SSID, SIAtomicAddrSpace InstrAddrSpace)
const;
234 SIAtomicAddrSpace toSIAtomicAddrSpace(
unsigned AS)
const;
238 std::optional<SIMemOpInfo>
247 std::optional<SIMemOpInfo>
252 std::optional<SIMemOpInfo>
257 std::optional<SIMemOpInfo>
262 std::optional<SIMemOpInfo>
266class SICacheControl {
290 static std::unique_ptr<SICacheControl> create(
const GCNSubtarget &ST);
297 SIAtomicAddrSpace AddrSpace)
const = 0;
304 SIAtomicAddrSpace AddrSpace)
const = 0;
311 SIAtomicAddrSpace AddrSpace)
const = 0;
317 SIAtomicAddrSpace AddrSpace,
318 SIMemOp
Op,
bool IsVolatile,
320 bool IsLastUse =
false)
const = 0;
334 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
335 bool IsCrossAddrSpaceOrdering, Position Pos,
345 SIAtomicAddrSpace AddrSpace,
346 Position Pos)
const = 0;
356 SIAtomicAddrSpace AddrSpace,
357 bool IsCrossAddrSpaceOrdering,
358 Position Pos)
const = 0;
361 virtual ~SICacheControl() =
default;
363 virtual bool tryForceStoreSC0SC1(
const SIMemOpInfo &MOI,
369class SIGfx6CacheControl :
public SICacheControl {
386 SIGfx6CacheControl(
const GCNSubtarget &ST) : SICacheControl(
ST) {}
390 SIAtomicAddrSpace AddrSpace)
const override;
394 SIAtomicAddrSpace AddrSpace)
const override;
398 SIAtomicAddrSpace AddrSpace)
const override;
401 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
402 bool IsVolatile,
bool IsNonTemporal,
403 bool IsLastUse)
const override;
406 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
407 bool IsCrossAddrSpaceOrdering, Position Pos,
412 SIAtomicAddrSpace AddrSpace,
413 Position Pos)
const override;
417 SIAtomicAddrSpace AddrSpace,
418 bool IsCrossAddrSpaceOrdering,
419 Position Pos)
const override;
422class SIGfx7CacheControl :
public SIGfx6CacheControl {
425 SIGfx7CacheControl(
const GCNSubtarget &ST) : SIGfx6CacheControl(
ST) {}
429 SIAtomicAddrSpace AddrSpace,
430 Position Pos)
const override;
434class SIGfx90ACacheControl :
public SIGfx7CacheControl {
437 SIGfx90ACacheControl(
const GCNSubtarget &ST) : SIGfx7CacheControl(
ST) {}
441 SIAtomicAddrSpace AddrSpace)
const override;
445 SIAtomicAddrSpace AddrSpace)
const override;
449 SIAtomicAddrSpace AddrSpace)
const override;
452 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
453 bool IsVolatile,
bool IsNonTemporal,
454 bool IsLastUse)
const override;
457 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
458 bool IsCrossAddrSpaceOrdering, Position Pos,
463 SIAtomicAddrSpace AddrSpace,
464 Position Pos)
const override;
468 SIAtomicAddrSpace AddrSpace,
469 bool IsCrossAddrSpaceOrdering,
470 Position Pos)
const override;
473class SIGfx940CacheControl :
public SIGfx90ACacheControl {
496 SIGfx940CacheControl(
const GCNSubtarget &ST) : SIGfx90ACacheControl(
ST) {};
500 SIAtomicAddrSpace AddrSpace)
const override;
504 SIAtomicAddrSpace AddrSpace)
const override;
508 SIAtomicAddrSpace AddrSpace)
const override;
511 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
512 bool IsVolatile,
bool IsNonTemporal,
513 bool IsLastUse)
const override;
516 SIAtomicAddrSpace AddrSpace, Position Pos)
const override;
519 SIAtomicAddrSpace AddrSpace,
bool IsCrossAddrSpaceOrdering,
520 Position Pos)
const override;
522 bool tryForceStoreSC0SC1(
const SIMemOpInfo &MOI,
524 bool Changed =
false;
525 if (
ST.hasForceStoreSC0SC1() &&
526 (MOI.getInstrAddrSpace() & (SIAtomicAddrSpace::SCRATCH |
527 SIAtomicAddrSpace::GLOBAL |
528 SIAtomicAddrSpace::OTHER)) !=
529 SIAtomicAddrSpace::NONE) {
530 Changed |= enableSC0Bit(
MI);
531 Changed |= enableSC1Bit(
MI);
537class SIGfx10CacheControl :
public SIGfx7CacheControl {
548 SIGfx10CacheControl(
const GCNSubtarget &ST) : SIGfx7CacheControl(
ST) {}
552 SIAtomicAddrSpace AddrSpace)
const override;
555 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
556 bool IsVolatile,
bool IsNonTemporal,
557 bool IsLastUse)
const override;
560 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
561 bool IsCrossAddrSpaceOrdering, Position Pos,
566 SIAtomicAddrSpace AddrSpace,
567 Position Pos)
const override;
570class SIGfx11CacheControl :
public SIGfx10CacheControl {
572 SIGfx11CacheControl(
const GCNSubtarget &ST) : SIGfx10CacheControl(
ST) {}
576 SIAtomicAddrSpace AddrSpace)
const override;
579 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
580 bool IsVolatile,
bool IsNonTemporal,
581 bool IsLastUse)
const override;
584class SIGfx12CacheControl :
public SIGfx11CacheControl {
605 SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace)
const;
608 SIGfx12CacheControl(
const GCNSubtarget &ST) : SIGfx11CacheControl(
ST) {}
611 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
612 bool IsCrossAddrSpaceOrdering, Position Pos,
616 SIAtomicAddrSpace AddrSpace, Position Pos)
const override;
619 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
620 bool IsVolatile,
bool IsNonTemporal,
621 bool IsLastUse)
const override;
626 SIAtomicAddrSpace AddrSpace,
bool IsCrossAddrSpaceOrdering,
627 Position Pos)
const override;
631 SIAtomicAddrSpace AddrSpace)
const override {
632 return setAtomicScope(
MI, Scope, AddrSpace);
637 SIAtomicAddrSpace AddrSpace)
const override {
638 return setAtomicScope(
MI, Scope, AddrSpace);
643 SIAtomicAddrSpace AddrSpace)
const override {
644 return setAtomicScope(
MI, Scope, AddrSpace);
652 std::unique_ptr<SICacheControl>
CC =
nullptr;
655 std::list<MachineBasicBlock::iterator> AtomicPseudoMIs;
665 bool removeAtomicPseudoMIs();
669 bool expandLoad(
const SIMemOpInfo &MOI,
673 bool expandStore(
const SIMemOpInfo &MOI,
677 bool expandAtomicFence(
const SIMemOpInfo &MOI,
681 bool expandAtomicCmpxchgOrRmw(
const SIMemOpInfo &MOI,
702 {
"global", SIAtomicAddrSpace::GLOBAL},
703 {
"local", SIAtomicAddrSpace::LDS},
711 OS <<
"unknown address space '" << AS <<
"'; expected one of ";
713 for (
const auto &[
Name, Val] : ASNames)
722static SIAtomicAddrSpace getFenceAddrSpaceMMRA(
const MachineInstr &
MI,
730 SIAtomicAddrSpace
Result = SIAtomicAddrSpace::NONE;
731 for (
const auto &[Prefix, Suffix] : MMRA) {
732 if (Prefix != FenceASPrefix)
735 if (
auto It = ASNames.find(Suffix); It != ASNames.end())
738 diagnoseUnknownMMRAASName(
MI, Suffix);
741 return (Result != SIAtomicAddrSpace::NONE) ?
Result :
Default;
747 const char *Msg)
const {
748 const Function &
Func =
MI->getParent()->getParent()->getFunction();
750 Func.getContext().diagnose(Diag);
753std::optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
755 SIAtomicAddrSpace InstrAddrSpace)
const {
757 return std::tuple(SIAtomicScope::SYSTEM, SIAtomicAddrSpace::ATOMIC,
true);
758 if (SSID == MMI->getAgentSSID())
759 return std::tuple(SIAtomicScope::AGENT, SIAtomicAddrSpace::ATOMIC,
true);
760 if (SSID == MMI->getWorkgroupSSID())
761 return std::tuple(SIAtomicScope::WORKGROUP, SIAtomicAddrSpace::ATOMIC,
763 if (SSID == MMI->getWavefrontSSID())
764 return std::tuple(SIAtomicScope::WAVEFRONT, SIAtomicAddrSpace::ATOMIC,
767 return std::tuple(SIAtomicScope::SINGLETHREAD, SIAtomicAddrSpace::ATOMIC,
769 if (SSID == MMI->getSystemOneAddressSpaceSSID())
770 return std::tuple(SIAtomicScope::SYSTEM,
771 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
772 if (SSID == MMI->getAgentOneAddressSpaceSSID())
773 return std::tuple(SIAtomicScope::AGENT,
774 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
775 if (SSID == MMI->getWorkgroupOneAddressSpaceSSID())
776 return std::tuple(SIAtomicScope::WORKGROUP,
777 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
778 if (SSID == MMI->getWavefrontOneAddressSpaceSSID())
779 return std::tuple(SIAtomicScope::WAVEFRONT,
780 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
781 if (SSID == MMI->getSingleThreadOneAddressSpaceSSID())
782 return std::tuple(SIAtomicScope::SINGLETHREAD,
783 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
787SIAtomicAddrSpace SIMemOpAccess::toSIAtomicAddrSpace(
unsigned AS)
const {
789 return SIAtomicAddrSpace::FLAT;
791 return SIAtomicAddrSpace::GLOBAL;
793 return SIAtomicAddrSpace::LDS;
795 return SIAtomicAddrSpace::SCRATCH;
797 return SIAtomicAddrSpace::GDS;
799 return SIAtomicAddrSpace::OTHER;
805std::optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
807 assert(
MI->getNumMemOperands() > 0);
812 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
813 bool IsNonTemporal =
true;
815 bool IsLastUse =
false;
819 for (
const auto &MMO :
MI->memoperands()) {
820 IsNonTemporal &= MMO->isNonTemporal();
822 IsLastUse |= MMO->getFlags() &
MOLastUse;
824 toSIAtomicAddrSpace(MMO->getPointerInfo().getAddrSpace());
826 if (OpOrdering != AtomicOrdering::NotAtomic) {
827 const auto &IsSyncScopeInclusion =
828 MMI->isSyncScopeInclusion(SSID, MMO->getSyncScopeID());
829 if (!IsSyncScopeInclusion) {
830 reportUnsupported(
MI,
831 "Unsupported non-inclusive atomic synchronization scope");
835 SSID = *IsSyncScopeInclusion ? SSID : MMO->getSyncScopeID();
837 assert(MMO->getFailureOrdering() != AtomicOrdering::Release &&
838 MMO->getFailureOrdering() != AtomicOrdering::AcquireRelease);
844 SIAtomicScope
Scope = SIAtomicScope::NONE;
845 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
846 bool IsCrossAddressSpaceOrdering =
false;
847 if (Ordering != AtomicOrdering::NotAtomic) {
848 auto ScopeOrNone = toSIAtomicScope(SSID, InstrAddrSpace);
850 reportUnsupported(
MI,
"Unsupported atomic synchronization scope");
853 std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
855 if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
856 ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace) ||
857 ((InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) == SIAtomicAddrSpace::NONE)) {
858 reportUnsupported(
MI,
"Unsupported atomic address space");
862 return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, InstrAddrSpace,
863 IsCrossAddressSpaceOrdering, FailureOrdering, IsVolatile,
864 IsNonTemporal, IsLastUse);
867std::optional<SIMemOpInfo>
871 if (!(
MI->mayLoad() && !
MI->mayStore()))
875 if (
MI->getNumMemOperands() == 0)
876 return SIMemOpInfo();
878 return constructFromMIWithMMO(
MI);
881std::optional<SIMemOpInfo>
885 if (!(!
MI->mayLoad() &&
MI->mayStore()))
889 if (
MI->getNumMemOperands() == 0)
890 return SIMemOpInfo();
892 return constructFromMIWithMMO(
MI);
895std::optional<SIMemOpInfo>
899 if (
MI->getOpcode() != AMDGPU::ATOMIC_FENCE)
906 auto ScopeOrNone = toSIAtomicScope(SSID, SIAtomicAddrSpace::ATOMIC);
908 reportUnsupported(
MI,
"Unsupported atomic synchronization scope");
912 SIAtomicScope
Scope = SIAtomicScope::NONE;
913 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
914 bool IsCrossAddressSpaceOrdering =
false;
915 std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
918 if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
919 ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace)) {
920 reportUnsupported(
MI,
"Unsupported atomic address space");
924 return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, SIAtomicAddrSpace::ATOMIC,
925 IsCrossAddressSpaceOrdering, AtomicOrdering::NotAtomic);
928std::optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
932 if (!(
MI->mayLoad() &&
MI->mayStore()))
936 if (
MI->getNumMemOperands() == 0)
937 return SIMemOpInfo();
939 return constructFromMIWithMMO(
MI);
943 TII =
ST.getInstrInfo();
959std::unique_ptr<SICacheControl> SICacheControl::create(
const GCNSubtarget &ST) {
961 if (
ST.hasGFX940Insts())
962 return std::make_unique<SIGfx940CacheControl>(ST);
963 if (
ST.hasGFX90AInsts())
964 return std::make_unique<SIGfx90ACacheControl>(ST);
966 return std::make_unique<SIGfx6CacheControl>(ST);
968 return std::make_unique<SIGfx7CacheControl>(ST);
970 return std::make_unique<SIGfx10CacheControl>(ST);
972 return std::make_unique<SIGfx11CacheControl>(ST);
973 return std::make_unique<SIGfx12CacheControl>(ST);
976bool SIGfx6CacheControl::enableLoadCacheBypass(
979 SIAtomicAddrSpace AddrSpace)
const {
981 bool Changed =
false;
983 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
985 case SIAtomicScope::SYSTEM:
986 case SIAtomicScope::AGENT:
989 Changed |= enableGLCBit(
MI);
991 case SIAtomicScope::WORKGROUP:
992 case SIAtomicScope::WAVEFRONT:
993 case SIAtomicScope::SINGLETHREAD:
1011bool SIGfx6CacheControl::enableStoreCacheBypass(
1013 SIAtomicScope Scope,
1014 SIAtomicAddrSpace AddrSpace)
const {
1016 bool Changed =
false;
1024bool SIGfx6CacheControl::enableRMWCacheBypass(
1026 SIAtomicScope Scope,
1027 SIAtomicAddrSpace AddrSpace)
const {
1029 bool Changed =
false;
1039bool SIGfx6CacheControl::enableVolatileAndOrNonTemporal(
1041 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
1051 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
1053 bool Changed =
false;
1059 if (
Op == SIMemOp::LOAD)
1060 Changed |= enableGLCBit(
MI);
1067 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1068 Position::AFTER, AtomicOrdering::Unordered);
1073 if (IsNonTemporal) {
1076 Changed |= enableGLCBit(
MI);
1077 Changed |= enableSLCBit(
MI);
1085 SIAtomicScope Scope,
1086 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
1087 bool IsCrossAddrSpaceOrdering, Position Pos,
1089 bool Changed =
false;
1094 if (Pos == Position::AFTER)
1098 bool LGKMCnt =
false;
1100 if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
1101 SIAtomicAddrSpace::NONE) {
1103 case SIAtomicScope::SYSTEM:
1104 case SIAtomicScope::AGENT:
1107 case SIAtomicScope::WORKGROUP:
1108 case SIAtomicScope::WAVEFRONT:
1109 case SIAtomicScope::SINGLETHREAD:
1118 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1120 case SIAtomicScope::SYSTEM:
1121 case SIAtomicScope::AGENT:
1122 case SIAtomicScope::WORKGROUP:
1129 LGKMCnt |= IsCrossAddrSpaceOrdering;
1131 case SIAtomicScope::WAVEFRONT:
1132 case SIAtomicScope::SINGLETHREAD:
1141 if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
1143 case SIAtomicScope::SYSTEM:
1144 case SIAtomicScope::AGENT:
1151 LGKMCnt |= IsCrossAddrSpaceOrdering;
1153 case SIAtomicScope::WORKGROUP:
1154 case SIAtomicScope::WAVEFRONT:
1155 case SIAtomicScope::SINGLETHREAD:
1164 if (VMCnt || LGKMCnt) {
1165 unsigned WaitCntImmediate =
1171 .
addImm(WaitCntImmediate);
1175 if (Pos == Position::AFTER)
1182 SIAtomicScope Scope,
1183 SIAtomicAddrSpace AddrSpace,
1184 Position Pos)
const {
1185 if (!InsertCacheInv)
1188 bool Changed =
false;
1193 if (Pos == Position::AFTER)
1196 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1198 case SIAtomicScope::SYSTEM:
1199 case SIAtomicScope::AGENT:
1203 case SIAtomicScope::WORKGROUP:
1204 case SIAtomicScope::WAVEFRONT:
1205 case SIAtomicScope::SINGLETHREAD:
1220 if (Pos == Position::AFTER)
1227 SIAtomicScope Scope,
1228 SIAtomicAddrSpace AddrSpace,
1229 bool IsCrossAddrSpaceOrdering,
1230 Position Pos)
const {
1231 return insertWait(
MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
1232 IsCrossAddrSpaceOrdering, Pos, AtomicOrdering::Release);
1236 SIAtomicScope Scope,
1237 SIAtomicAddrSpace AddrSpace,
1238 Position Pos)
const {
1239 if (!InsertCacheInv)
1242 bool Changed =
false;
1250 ? AMDGPU::BUFFER_WBINVL1
1251 : AMDGPU::BUFFER_WBINVL1_VOL;
1253 if (Pos == Position::AFTER)
1256 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1258 case SIAtomicScope::SYSTEM:
1259 case SIAtomicScope::AGENT:
1263 case SIAtomicScope::WORKGROUP:
1264 case SIAtomicScope::WAVEFRONT:
1265 case SIAtomicScope::SINGLETHREAD:
1280 if (Pos == Position::AFTER)
1286bool SIGfx90ACacheControl::enableLoadCacheBypass(
1288 SIAtomicScope Scope,
1289 SIAtomicAddrSpace AddrSpace)
const {
1291 bool Changed =
false;
1293 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1295 case SIAtomicScope::SYSTEM:
1296 case SIAtomicScope::AGENT:
1299 Changed |= enableGLCBit(
MI);
1301 case SIAtomicScope::WORKGROUP:
1306 if (
ST.isTgSplitEnabled())
1307 Changed |= enableGLCBit(
MI);
1309 case SIAtomicScope::WAVEFRONT:
1310 case SIAtomicScope::SINGLETHREAD:
1328bool SIGfx90ACacheControl::enableStoreCacheBypass(
1330 SIAtomicScope Scope,
1331 SIAtomicAddrSpace AddrSpace)
const {
1333 bool Changed =
false;
1335 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1337 case SIAtomicScope::SYSTEM:
1338 case SIAtomicScope::AGENT:
1342 case SIAtomicScope::WORKGROUP:
1343 case SIAtomicScope::WAVEFRONT:
1344 case SIAtomicScope::SINGLETHREAD:
1363bool SIGfx90ACacheControl::enableRMWCacheBypass(
1365 SIAtomicScope Scope,
1366 SIAtomicAddrSpace AddrSpace)
const {
1368 bool Changed =
false;
1370 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1372 case SIAtomicScope::SYSTEM:
1373 case SIAtomicScope::AGENT:
1378 case SIAtomicScope::WORKGROUP:
1379 case SIAtomicScope::WAVEFRONT:
1380 case SIAtomicScope::SINGLETHREAD:
1391bool SIGfx90ACacheControl::enableVolatileAndOrNonTemporal(
1393 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
1403 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
1405 bool Changed =
false;
1411 if (
Op == SIMemOp::LOAD)
1412 Changed |= enableGLCBit(
MI);
1419 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1420 Position::AFTER, AtomicOrdering::Unordered);
1425 if (IsNonTemporal) {
1428 Changed |= enableGLCBit(
MI);
1429 Changed |= enableSLCBit(
MI);
1437 SIAtomicScope Scope,
1438 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
1439 bool IsCrossAddrSpaceOrdering,
1442 if (
ST.isTgSplitEnabled()) {
1450 if (((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH |
1451 SIAtomicAddrSpace::GDS)) != SIAtomicAddrSpace::NONE) &&
1452 (Scope == SIAtomicScope::WORKGROUP)) {
1454 Scope = SIAtomicScope::AGENT;
1458 AddrSpace &= ~SIAtomicAddrSpace
::LDS;
1460 return SIGfx7CacheControl::insertWait(
MI, Scope, AddrSpace,
Op,
1461 IsCrossAddrSpaceOrdering, Pos, Order);
1465 SIAtomicScope Scope,
1466 SIAtomicAddrSpace AddrSpace,
1467 Position Pos)
const {
1468 if (!InsertCacheInv)
1471 bool Changed =
false;
1476 if (Pos == Position::AFTER)
1479 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1481 case SIAtomicScope::SYSTEM:
1493 case SIAtomicScope::AGENT:
1496 case SIAtomicScope::WORKGROUP:
1501 if (
ST.isTgSplitEnabled()) {
1503 Scope = SIAtomicScope::AGENT;
1506 case SIAtomicScope::WAVEFRONT:
1507 case SIAtomicScope::SINGLETHREAD:
1522 if (Pos == Position::AFTER)
1525 Changed |= SIGfx7CacheControl::insertAcquire(
MI, Scope, AddrSpace, Pos);
1531 SIAtomicScope Scope,
1532 SIAtomicAddrSpace AddrSpace,
1533 bool IsCrossAddrSpaceOrdering,
1534 Position Pos)
const {
1535 bool Changed =
false;
1540 if (Pos == Position::AFTER)
1543 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1545 case SIAtomicScope::SYSTEM:
1559 case SIAtomicScope::AGENT:
1560 case SIAtomicScope::WORKGROUP:
1561 case SIAtomicScope::WAVEFRONT:
1562 case SIAtomicScope::SINGLETHREAD:
1570 if (Pos == Position::AFTER)
1574 SIGfx7CacheControl::insertRelease(
MI, Scope, AddrSpace,
1575 IsCrossAddrSpaceOrdering, Pos);
1580bool SIGfx940CacheControl::enableLoadCacheBypass(
1582 SIAtomicAddrSpace AddrSpace)
const {
1584 bool Changed =
false;
1586 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1588 case SIAtomicScope::SYSTEM:
1590 Changed |= enableSC0Bit(
MI);
1591 Changed |= enableSC1Bit(
MI);
1593 case SIAtomicScope::AGENT:
1595 Changed |= enableSC1Bit(
MI);
1597 case SIAtomicScope::WORKGROUP:
1603 Changed |= enableSC0Bit(
MI);
1605 case SIAtomicScope::WAVEFRONT:
1606 case SIAtomicScope::SINGLETHREAD:
1624bool SIGfx940CacheControl::enableStoreCacheBypass(
1626 SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace)
const {
1628 bool Changed =
false;
1630 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1632 case SIAtomicScope::SYSTEM:
1634 Changed |= enableSC0Bit(
MI);
1635 Changed |= enableSC1Bit(
MI);
1637 case SIAtomicScope::AGENT:
1639 Changed |= enableSC1Bit(
MI);
1641 case SIAtomicScope::WORKGROUP:
1643 Changed |= enableSC0Bit(
MI);
1645 case SIAtomicScope::WAVEFRONT:
1646 case SIAtomicScope::SINGLETHREAD:
1664bool SIGfx940CacheControl::enableRMWCacheBypass(
1666 SIAtomicAddrSpace AddrSpace)
const {
1668 bool Changed =
false;
1670 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1672 case SIAtomicScope::SYSTEM:
1674 Changed |= enableSC1Bit(
MI);
1676 case SIAtomicScope::AGENT:
1677 case SIAtomicScope::WORKGROUP:
1678 case SIAtomicScope::WAVEFRONT:
1679 case SIAtomicScope::SINGLETHREAD:
1693bool SIGfx940CacheControl::enableVolatileAndOrNonTemporal(
1695 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
1705 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
1707 bool Changed =
false;
1711 Changed |= enableSC0Bit(
MI);
1712 Changed |= enableSC1Bit(
MI);
1719 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1720 Position::AFTER, AtomicOrdering::Unordered);
1725 if (IsNonTemporal) {
1726 Changed |= enableNTBit(
MI);
1734 SIAtomicScope Scope,
1735 SIAtomicAddrSpace AddrSpace,
1736 Position Pos)
const {
1737 if (!InsertCacheInv)
1740 bool Changed =
false;
1745 if (Pos == Position::AFTER)
1748 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1750 case SIAtomicScope::SYSTEM:
1764 case SIAtomicScope::AGENT:
1777 case SIAtomicScope::WORKGROUP:
1782 if (
ST.isTgSplitEnabled()) {
1796 case SIAtomicScope::WAVEFRONT:
1797 case SIAtomicScope::SINGLETHREAD:
1813 if (Pos == Position::AFTER)
1820 SIAtomicScope Scope,
1821 SIAtomicAddrSpace AddrSpace,
1822 bool IsCrossAddrSpaceOrdering,
1823 Position Pos)
const {
1824 bool Changed =
false;
1829 if (Pos == Position::AFTER)
1832 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1834 case SIAtomicScope::SYSTEM:
1849 case SIAtomicScope::AGENT:
1859 case SIAtomicScope::WORKGROUP:
1860 case SIAtomicScope::WAVEFRONT:
1861 case SIAtomicScope::SINGLETHREAD:
1871 if (Pos == Position::AFTER)
1876 Changed |= insertWait(
MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
1877 IsCrossAddrSpaceOrdering, Pos, AtomicOrdering::Release);
1882bool SIGfx10CacheControl::enableLoadCacheBypass(
1884 SIAtomicScope Scope,
1885 SIAtomicAddrSpace AddrSpace)
const {
1887 bool Changed =
false;
1889 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1891 case SIAtomicScope::SYSTEM:
1892 case SIAtomicScope::AGENT:
1895 Changed |= enableGLCBit(
MI);
1896 Changed |= enableDLCBit(
MI);
1898 case SIAtomicScope::WORKGROUP:
1903 if (!
ST.isCuModeEnabled())
1904 Changed |= enableGLCBit(
MI);
1906 case SIAtomicScope::WAVEFRONT:
1907 case SIAtomicScope::SINGLETHREAD:
1925bool SIGfx10CacheControl::enableVolatileAndOrNonTemporal(
1927 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
1938 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
1940 bool Changed =
false;
1946 if (
Op == SIMemOp::LOAD) {
1947 Changed |= enableGLCBit(
MI);
1948 Changed |= enableDLCBit(
MI);
1956 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1957 Position::AFTER, AtomicOrdering::Unordered);
1961 if (IsNonTemporal) {
1966 if (
Op == SIMemOp::STORE)
1967 Changed |= enableGLCBit(
MI);
1968 Changed |= enableSLCBit(
MI);
1977 SIAtomicScope Scope,
1978 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
1979 bool IsCrossAddrSpaceOrdering,
1981 bool Changed =
false;
1986 if (Pos == Position::AFTER)
1991 bool LGKMCnt =
false;
1993 if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
1994 SIAtomicAddrSpace::NONE) {
1996 case SIAtomicScope::SYSTEM:
1997 case SIAtomicScope::AGENT:
1998 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
2000 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
2003 case SIAtomicScope::WORKGROUP:
2009 if (!
ST.isCuModeEnabled()) {
2010 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
2012 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
2016 case SIAtomicScope::WAVEFRONT:
2017 case SIAtomicScope::SINGLETHREAD:
2026 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
2028 case SIAtomicScope::SYSTEM:
2029 case SIAtomicScope::AGENT:
2030 case SIAtomicScope::WORKGROUP:
2037 LGKMCnt |= IsCrossAddrSpaceOrdering;
2039 case SIAtomicScope::WAVEFRONT:
2040 case SIAtomicScope::SINGLETHREAD:
2049 if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
2051 case SIAtomicScope::SYSTEM:
2052 case SIAtomicScope::AGENT:
2059 LGKMCnt |= IsCrossAddrSpaceOrdering;
2061 case SIAtomicScope::WORKGROUP:
2062 case SIAtomicScope::WAVEFRONT:
2063 case SIAtomicScope::SINGLETHREAD:
2072 if (VMCnt || LGKMCnt) {
2073 unsigned WaitCntImmediate =
2079 .
addImm(WaitCntImmediate);
2090 if (Pos == Position::AFTER)
2097 SIAtomicScope Scope,
2098 SIAtomicAddrSpace AddrSpace,
2099 Position Pos)
const {
2100 if (!InsertCacheInv)
2103 bool Changed =
false;
2108 if (Pos == Position::AFTER)
2111 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
2113 case SIAtomicScope::SYSTEM:
2114 case SIAtomicScope::AGENT:
2122 case SIAtomicScope::WORKGROUP:
2127 if (!
ST.isCuModeEnabled()) {
2132 case SIAtomicScope::WAVEFRONT:
2133 case SIAtomicScope::SINGLETHREAD:
2148 if (Pos == Position::AFTER)
2154bool SIGfx11CacheControl::enableLoadCacheBypass(
2156 SIAtomicAddrSpace AddrSpace)
const {
2158 bool Changed =
false;
2160 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
2162 case SIAtomicScope::SYSTEM:
2163 case SIAtomicScope::AGENT:
2166 Changed |= enableGLCBit(
MI);
2168 case SIAtomicScope::WORKGROUP:
2173 if (!
ST.isCuModeEnabled())
2174 Changed |= enableGLCBit(
MI);
2176 case SIAtomicScope::WAVEFRONT:
2177 case SIAtomicScope::SINGLETHREAD:
2195bool SIGfx11CacheControl::enableVolatileAndOrNonTemporal(
2197 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
2208 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
2210 bool Changed =
false;
2216 if (
Op == SIMemOp::LOAD)
2217 Changed |= enableGLCBit(
MI);
2220 Changed |= enableDLCBit(
MI);
2227 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
2228 Position::AFTER, AtomicOrdering::Unordered);
2232 if (IsNonTemporal) {
2237 if (
Op == SIMemOp::STORE)
2238 Changed |= enableGLCBit(
MI);
2239 Changed |= enableSLCBit(
MI);
2242 Changed |= enableDLCBit(
MI);
2279bool SIGfx12CacheControl::insertWaitsBeforeSystemScopeStore(
2296 SIAtomicScope Scope,
2297 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
2298 bool IsCrossAddrSpaceOrdering,
2300 bool Changed =
false;
2305 bool LOADCnt =
false;
2307 bool STORECnt =
false;
2309 if (Pos == Position::AFTER)
2312 if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
2313 SIAtomicAddrSpace::NONE) {
2315 case SIAtomicScope::SYSTEM:
2316 case SIAtomicScope::AGENT:
2317 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
2319 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
2322 case SIAtomicScope::WORKGROUP:
2328 if (!
ST.isCuModeEnabled()) {
2329 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
2331 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
2335 case SIAtomicScope::WAVEFRONT:
2336 case SIAtomicScope::SINGLETHREAD:
2345 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
2347 case SIAtomicScope::SYSTEM:
2348 case SIAtomicScope::AGENT:
2349 case SIAtomicScope::WORKGROUP:
2356 DSCnt |= IsCrossAddrSpaceOrdering;
2358 case SIAtomicScope::WAVEFRONT:
2359 case SIAtomicScope::SINGLETHREAD:
2380 if (Order != AtomicOrdering::Acquire) {
2398 if (Pos == Position::AFTER)
2405 SIAtomicScope Scope,
2406 SIAtomicAddrSpace AddrSpace,
2407 Position Pos)
const {
2408 if (!InsertCacheInv)
2420 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) == SIAtomicAddrSpace::NONE)
2425 case SIAtomicScope::SYSTEM:
2428 case SIAtomicScope::AGENT:
2431 case SIAtomicScope::WORKGROUP:
2436 if (
ST.isCuModeEnabled())
2441 case SIAtomicScope::WAVEFRONT:
2442 case SIAtomicScope::SINGLETHREAD:
2449 if (Pos == Position::AFTER)
2454 if (Pos == Position::AFTER)
2461 SIAtomicScope Scope,
2462 SIAtomicAddrSpace AddrSpace,
2463 bool IsCrossAddrSpaceOrdering,
2464 Position Pos)
const {
2474 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) == SIAtomicAddrSpace::NONE)
2477 if (Pos == Position::AFTER)
2485 case SIAtomicScope::SYSTEM:
2489 case SIAtomicScope::AGENT:
2490 case SIAtomicScope::WORKGROUP:
2493 case SIAtomicScope::WAVEFRONT:
2494 case SIAtomicScope::SINGLETHREAD:
2501 if (Pos == Position::AFTER)
2507 insertWait(
MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
2508 IsCrossAddrSpaceOrdering, Pos, AtomicOrdering::Release);
2513bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal(
2515 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
2524 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
2526 bool Changed =
false;
2531 }
else if (IsNonTemporal) {
2539 if (
Op == SIMemOp::STORE)
2540 Changed |= insertWaitsBeforeSystemScopeStore(
MI);
2547 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
2548 Position::AFTER, AtomicOrdering::Unordered);
2554bool SIGfx12CacheControl::expandSystemScopeStore(
2558 return insertWaitsBeforeSystemScopeStore(
MI);
2564 SIAtomicScope Scope,
2565 SIAtomicAddrSpace AddrSpace)
const {
2566 bool Changed =
false;
2568 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
2570 case SIAtomicScope::SYSTEM:
2573 case SIAtomicScope::AGENT:
2576 case SIAtomicScope::WORKGROUP:
2579 if (!
ST.isCuModeEnabled())
2582 case SIAtomicScope::WAVEFRONT:
2583 case SIAtomicScope::SINGLETHREAD:
2601bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
2602 if (AtomicPseudoMIs.empty())
2605 for (
auto &
MI : AtomicPseudoMIs)
2606 MI->eraseFromParent();
2608 AtomicPseudoMIs.clear();
2612bool SIMemoryLegalizer::expandLoad(
const SIMemOpInfo &MOI,
2616 bool Changed =
false;
2618 if (MOI.isAtomic()) {
2620 if (Order == AtomicOrdering::Monotonic ||
2621 Order == AtomicOrdering::Acquire ||
2622 Order == AtomicOrdering::SequentiallyConsistent) {
2623 Changed |=
CC->enableLoadCacheBypass(
MI, MOI.getScope(),
2624 MOI.getOrderingAddrSpace());
2627 if (Order == AtomicOrdering::SequentiallyConsistent)
2628 Changed |=
CC->insertWait(
MI, MOI.getScope(), MOI.getOrderingAddrSpace(),
2629 SIMemOp::LOAD | SIMemOp::STORE,
2630 MOI.getIsCrossAddressSpaceOrdering(),
2631 Position::BEFORE, Order);
2633 if (Order == AtomicOrdering::Acquire ||
2634 Order == AtomicOrdering::SequentiallyConsistent) {
2635 Changed |=
CC->insertWait(
2636 MI, MOI.getScope(), MOI.getInstrAddrSpace(), SIMemOp::LOAD,
2637 MOI.getIsCrossAddressSpaceOrdering(), Position::AFTER, Order);
2638 Changed |=
CC->insertAcquire(
MI, MOI.getScope(),
2639 MOI.getOrderingAddrSpace(),
2649 Changed |=
CC->enableVolatileAndOrNonTemporal(
2650 MI, MOI.getInstrAddrSpace(), SIMemOp::LOAD, MOI.isVolatile(),
2651 MOI.isNonTemporal(), MOI.isLastUse());
2656bool SIMemoryLegalizer::expandStore(
const SIMemOpInfo &MOI,
2660 bool Changed =
false;
2662 if (MOI.isAtomic()) {
2663 if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
2664 MOI.getOrdering() == AtomicOrdering::Release ||
2665 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
2666 Changed |=
CC->enableStoreCacheBypass(
MI, MOI.getScope(),
2667 MOI.getOrderingAddrSpace());
2670 if (MOI.getOrdering() == AtomicOrdering::Release ||
2671 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
2672 Changed |=
CC->insertRelease(
MI, MOI.getScope(),
2673 MOI.getOrderingAddrSpace(),
2674 MOI.getIsCrossAddressSpaceOrdering(),
2683 Changed |=
CC->enableVolatileAndOrNonTemporal(
2684 MI, MOI.getInstrAddrSpace(), SIMemOp::STORE, MOI.isVolatile(),
2685 MOI.isNonTemporal());
2689 Changed |=
CC->expandSystemScopeStore(
MI);
2693bool SIMemoryLegalizer::expandAtomicFence(
const SIMemOpInfo &MOI,
2695 assert(
MI->getOpcode() == AMDGPU::ATOMIC_FENCE);
2697 AtomicPseudoMIs.push_back(
MI);
2698 bool Changed =
false;
2703 auto OrderingAddrSpace =
2704 getFenceAddrSpaceMMRA(*
MI, MOI.getOrderingAddrSpace());
2706 if (MOI.isAtomic()) {
2708 if (Order == AtomicOrdering::Acquire) {
2709 Changed |=
CC->insertWait(
2710 MI, MOI.getScope(), OrderingAddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
2711 MOI.getIsCrossAddressSpaceOrdering(), Position::BEFORE, Order);
2714 if (Order == AtomicOrdering::Release ||
2715 Order == AtomicOrdering::AcquireRelease ||
2716 Order == AtomicOrdering::SequentiallyConsistent)
2724 Changed |=
CC->insertRelease(
MI, MOI.getScope(), OrderingAddrSpace,
2725 MOI.getIsCrossAddressSpaceOrdering(),
2733 if (Order == AtomicOrdering::Acquire ||
2734 Order == AtomicOrdering::AcquireRelease ||
2735 Order == AtomicOrdering::SequentiallyConsistent)
2736 Changed |=
CC->insertAcquire(
MI, MOI.getScope(), OrderingAddrSpace,
2745bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(
const SIMemOpInfo &MOI,
2749 bool Changed =
false;
2751 if (MOI.isAtomic()) {
2753 if (Order == AtomicOrdering::Monotonic ||
2754 Order == AtomicOrdering::Acquire || Order == AtomicOrdering::Release ||
2755 Order == AtomicOrdering::AcquireRelease ||
2756 Order == AtomicOrdering::SequentiallyConsistent) {
2757 Changed |=
CC->enableRMWCacheBypass(
MI, MOI.getScope(),
2758 MOI.getInstrAddrSpace());
2761 if (Order == AtomicOrdering::Release ||
2762 Order == AtomicOrdering::AcquireRelease ||
2763 Order == AtomicOrdering::SequentiallyConsistent ||
2764 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent)
2765 Changed |=
CC->insertRelease(
MI, MOI.getScope(),
2766 MOI.getOrderingAddrSpace(),
2767 MOI.getIsCrossAddressSpaceOrdering(),
2770 if (Order == AtomicOrdering::Acquire ||
2771 Order == AtomicOrdering::AcquireRelease ||
2772 Order == AtomicOrdering::SequentiallyConsistent ||
2773 MOI.getFailureOrdering() == AtomicOrdering::Acquire ||
2774 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) {
2775 Changed |=
CC->insertWait(
2776 MI, MOI.getScope(), MOI.getInstrAddrSpace(),
2777 isAtomicRet(*
MI) ? SIMemOp::LOAD : SIMemOp::STORE,
2778 MOI.getIsCrossAddressSpaceOrdering(), Position::AFTER, Order);
2779 Changed |=
CC->insertAcquire(
MI, MOI.getScope(),
2780 MOI.getOrderingAddrSpace(),
2791 bool Changed =
false;
2794 getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
2799 for (
auto &
MBB : MF) {
2803 if (
MI->isBundle() &&
MI->mayLoadOrStore()) {
2806 I != E &&
I->isBundledWithPred(); ++
I) {
2807 I->unbundleFromPred();
2810 MO.setIsInternalRead(
false);
2813 MI->eraseFromParent();
2814 MI =
II->getIterator();
2820 if (
const auto &MOI = MOA.getLoadInfo(
MI))
2821 Changed |= expandLoad(*MOI,
MI);
2822 else if (
const auto &MOI = MOA.getStoreInfo(
MI)) {
2823 Changed |= expandStore(*MOI,
MI);
2824 Changed |=
CC->tryForceStoreSC0SC1(*MOI,
MI);
2825 }
else if (
const auto &MOI = MOA.getAtomicFenceInfo(
MI))
2826 Changed |= expandAtomicFence(*MOI,
MI);
2827 else if (
const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(
MI))
2828 Changed |= expandAtomicCmpxchgOrRmw(*MOI,
MI);
2832 Changed |= removeAtomicPseudoMIs();
2838char SIMemoryLegalizer::
ID = 0;
2842 return new SIMemoryLegalizer();
static std::optional< LoadInfo > getLoadInfo(const MachineInstr &MI)
Provides AMDGPU specific target descriptions.
AMDGPU Machine Module Info.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
#define LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE() pulls the operator overloads used by LLVM_MARK_AS_BITMASK_EN...
#define LLVM_MARK_AS_BITMASK_ENUM(LargestValue)
LLVM_MARK_AS_BITMASK_ENUM lets you opt in an individual enum type so you can perform bitwise operatio...
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static cl::opt< bool > AmdgcnSkipCacheInvalidations("amdgcn-skip-cache-invalidations", cl::init(false), cl::Hidden, cl::desc("Use this to skip inserting cache invalidating instructions."))
static const uint32_t IV[8]
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
This class represents an Operation in the Expression.
Diagnostic information for unsupported feature in backend.
FunctionPass class - This class is used to implement most global optimizations.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Instructions::iterator instr_iterator
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
This class contains meta information specific to a module.
Ty & getObjFileInfo()
Keep track of various per-module pieces of information for backends that would like to do so.
MachineOperand class - Representation of each machine instruction operand.
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
static bool isAtomicRet(const MachineInstr &MI)
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
StringRef - Represent a constant reference to a string, i.e.
LLVM Value Representation.
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
IsaVersion getIsaVersion(StringRef GPU)
unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
unsigned getVmcntBitMask(const IsaVersion &Version)
unsigned getLgkmcntBitMask(const IsaVersion &Version)
unsigned getExpcntBitMask(const IsaVersion &Version)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Undef
Value of the register doesn't matter.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
@ System
Synchronized with respect to all concurrently executing threads.
initializer< Ty > init(const Ty &Val)
Scope
Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
char & SIMemoryLegalizerID
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
AtomicOrdering getMergedAtomicOrdering(AtomicOrdering AO, AtomicOrdering Other)
Return a single atomic ordering that is at least as strong as both the AO and Other orderings for an ...
AtomicOrdering
Atomic ordering for LLVM's memory model.
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
FunctionPass * createSIMemoryLegalizerPass()
Instruction set architecture version.