35#define DEBUG_TYPE "si-memory-legalizer"
36#define PASS_NAME "SI Memory Legalizer"
40 cl::desc(
"Use this to skip inserting cache invalidating instructions."));
62enum class SIAtomicScope {
74enum class SIAtomicAddrSpace {
83 FLAT = GLOBAL |
LDS | SCRATCH,
86 ATOMIC = GLOBAL |
LDS | SCRATCH | GDS,
89 ALL = GLOBAL |
LDS | SCRATCH | GDS | OTHER,
94class SIMemOpInfo final {
97 friend class SIMemOpAccess;
101 SIAtomicScope Scope = SIAtomicScope::SYSTEM;
102 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
103 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
104 bool IsCrossAddressSpaceOrdering =
false;
105 bool IsVolatile =
false;
106 bool IsNonTemporal =
false;
107 bool IsLastUse =
false;
108 bool IsCooperative =
false;
112 const GCNSubtarget &ST,
114 SIAtomicScope Scope = SIAtomicScope::SYSTEM,
115 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::ATOMIC,
116 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::ALL,
117 bool IsCrossAddressSpaceOrdering =
true,
118 AtomicOrdering FailureOrdering = AtomicOrdering::SequentiallyConsistent,
119 bool IsVolatile =
false,
bool IsNonTemporal =
false,
120 bool IsLastUse =
false,
bool IsCooperative =
false)
121 : Ordering(Ordering), FailureOrdering(FailureOrdering), Scope(Scope),
122 OrderingAddrSpace(OrderingAddrSpace), InstrAddrSpace(InstrAddrSpace),
123 IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
124 IsVolatile(IsVolatile), IsNonTemporal(IsNonTemporal),
125 IsLastUse(IsLastUse), IsCooperative(IsCooperative) {
127 if (Ordering == AtomicOrdering::NotAtomic) {
128 assert(!IsCooperative &&
"Cannot be cooperative & non-atomic!");
129 assert(Scope == SIAtomicScope::NONE &&
130 OrderingAddrSpace == SIAtomicAddrSpace::NONE &&
131 !IsCrossAddressSpaceOrdering &&
132 FailureOrdering == AtomicOrdering::NotAtomic);
136 assert(Scope != SIAtomicScope::NONE &&
137 (OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
138 SIAtomicAddrSpace::NONE &&
139 (InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
140 SIAtomicAddrSpace::NONE);
145 if ((OrderingAddrSpace == InstrAddrSpace) &&
147 this->IsCrossAddressSpaceOrdering =
false;
151 if ((InstrAddrSpace & ~SIAtomicAddrSpace::SCRATCH) ==
152 SIAtomicAddrSpace::NONE) {
153 this->Scope = std::min(Scope, SIAtomicScope::SINGLETHREAD);
154 }
else if ((InstrAddrSpace &
155 ~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS)) ==
156 SIAtomicAddrSpace::NONE) {
157 this->Scope = std::min(Scope, SIAtomicScope::WORKGROUP);
158 }
else if ((InstrAddrSpace &
159 ~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS |
160 SIAtomicAddrSpace::GDS)) == SIAtomicAddrSpace::NONE) {
161 this->Scope = std::min(Scope, SIAtomicScope::AGENT);
166 if (this->Scope == SIAtomicScope::CLUSTER && !
ST.hasClusters())
167 this->Scope = SIAtomicScope::AGENT;
173 SIAtomicScope getScope()
const {
186 return FailureOrdering;
191 SIAtomicAddrSpace getInstrAddrSpace()
const {
192 return InstrAddrSpace;
197 SIAtomicAddrSpace getOrderingAddrSpace()
const {
198 return OrderingAddrSpace;
203 bool getIsCrossAddressSpaceOrdering()
const {
204 return IsCrossAddressSpaceOrdering;
209 bool isVolatile()
const {
215 bool isNonTemporal()
const {
216 return IsNonTemporal;
221 bool isLastUse()
const {
return IsLastUse; }
224 bool isCooperative()
const {
return IsCooperative; }
228 bool isAtomic()
const {
229 return Ordering != AtomicOrdering::NotAtomic;
234class SIMemOpAccess final {
236 const AMDGPUMachineModuleInfo *MMI =
nullptr;
237 const GCNSubtarget &ST;
241 const char *Msg)
const;
247 std::optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
248 toSIAtomicScope(
SyncScope::ID SSID, SIAtomicAddrSpace InstrAddrSpace)
const;
251 SIAtomicAddrSpace toSIAtomicAddrSpace(
unsigned AS)
const;
255 std::optional<SIMemOpInfo>
261 SIMemOpAccess(
const AMDGPUMachineModuleInfo &MMI,
const GCNSubtarget &ST);
264 std::optional<SIMemOpInfo>
269 std::optional<SIMemOpInfo>
274 std::optional<SIMemOpInfo>
279 std::optional<SIMemOpInfo>
285 std::optional<SIMemOpInfo>
289class SICacheControl {
293 const GCNSubtarget &ST;
296 const SIInstrInfo *TII =
nullptr;
303 SICacheControl(
const GCNSubtarget &ST);
308 unsigned Bits)
const;
312 bool canAffectGlobalAddrSpace(SIAtomicAddrSpace AS)
const;
318 static std::unique_ptr<SICacheControl> create(
const GCNSubtarget &ST);
325 SIAtomicAddrSpace AddrSpace)
const = 0;
332 SIAtomicAddrSpace AddrSpace)
const = 0;
339 SIAtomicAddrSpace AddrSpace)
const = 0;
345 SIAtomicAddrSpace AddrSpace,
346 SIMemOp
Op,
bool IsVolatile,
348 bool IsLastUse =
false)
const = 0;
355 virtual bool finalizeStore(MachineInstr &
MI,
bool Atomic)
const {
360 virtual bool handleCooperativeAtomic(MachineInstr &
MI)
const {
362 "cooperative atomics are not available on this architecture");
375 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
376 bool IsCrossAddrSpaceOrdering, Position Pos,
386 SIAtomicAddrSpace AddrSpace,
387 Position Pos)
const = 0;
397 SIAtomicAddrSpace AddrSpace,
398 bool IsCrossAddrSpaceOrdering,
399 Position Pos)
const = 0;
403 virtual bool handleNonVolatile(MachineInstr &
MI)
const {
return false; }
406 virtual ~SICacheControl() =
default;
411class SIGfx6CacheControl final :
public SICacheControl {
414 SIGfx6CacheControl(
const GCNSubtarget &ST) : SICacheControl(
ST) {}
418 SIAtomicAddrSpace AddrSpace)
const override;
422 SIAtomicAddrSpace AddrSpace)
const override;
426 SIAtomicAddrSpace AddrSpace)
const override;
429 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
430 bool IsVolatile,
bool IsNonTemporal,
431 bool IsLastUse)
const override;
434 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
435 bool IsCrossAddrSpaceOrdering, Position Pos,
440 SIAtomicAddrSpace AddrSpace,
441 Position Pos)
const override;
445 SIAtomicAddrSpace AddrSpace,
446 bool IsCrossAddrSpaceOrdering,
447 Position Pos)
const override;
451class SIGfx10CacheControl final :
public SICacheControl {
453 SIGfx10CacheControl(
const GCNSubtarget &ST) : SICacheControl(
ST) {}
457 SIAtomicAddrSpace AddrSpace)
const override;
461 SIAtomicAddrSpace AddrSpace)
const override {
467 SIAtomicAddrSpace AddrSpace)
const override {
472 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
473 bool IsVolatile,
bool IsNonTemporal,
474 bool IsLastUse)
const override;
477 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
478 bool IsCrossAddrSpaceOrdering, Position Pos,
482 SIAtomicAddrSpace AddrSpace, Position Pos)
const override;
485 SIAtomicAddrSpace AddrSpace,
bool IsCrossAddrSpaceOrdering,
486 Position Pos)
const override {
487 return insertWait(
MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
488 IsCrossAddrSpaceOrdering, Pos, AtomicOrdering::Release,
493class SIGfx12CacheControl final :
public SICacheControl {
515 SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace)
const;
518 SIGfx12CacheControl(
const GCNSubtarget &ST) : SICacheControl(
ST) {
521 assert(!
ST.hasGFX1250Insts() ||
ST.hasGFX13Insts() ||
ST.isCuModeEnabled());
525 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
526 bool IsCrossAddrSpaceOrdering, Position Pos,
530 SIAtomicAddrSpace AddrSpace, Position Pos)
const override;
533 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
534 bool IsVolatile,
bool IsNonTemporal,
535 bool IsLastUse)
const override;
537 bool finalizeStore(MachineInstr &
MI,
bool Atomic)
const override;
539 bool handleCooperativeAtomic(MachineInstr &
MI)
const override;
542 SIAtomicAddrSpace AddrSpace,
bool IsCrossAddrSpaceOrdering,
543 Position Pos)
const override;
547 SIAtomicAddrSpace AddrSpace)
const override {
548 return setAtomicScope(
MI, Scope, AddrSpace);
553 SIAtomicAddrSpace AddrSpace)
const override {
554 return setAtomicScope(
MI, Scope, AddrSpace);
559 SIAtomicAddrSpace AddrSpace)
const override {
560 return setAtomicScope(
MI, Scope, AddrSpace);
563 bool handleNonVolatile(MachineInstr &
MI)
const override;
566class SIMemoryLegalizer final {
568 const MachineModuleInfo &MMI;
570 std::unique_ptr<SICacheControl> CC =
nullptr;
573 std::list<MachineBasicBlock::iterator> AtomicPseudoMIs;
577 bool isAtomicRet(
const MachineInstr &
MI)
const {
583 bool removeAtomicPseudoMIs();
587 bool expandLoad(
const SIMemOpInfo &MOI,
591 bool expandStore(
const SIMemOpInfo &MOI,
595 bool expandAtomicFence(
const SIMemOpInfo &MOI,
599 bool expandAtomicCmpxchgOrRmw(
const SIMemOpInfo &MOI,
606 SIMemoryLegalizer(
const MachineModuleInfo &MMI) : MMI(MMI) {};
607 bool run(MachineFunction &MF);
614 SIMemoryLegalizerLegacy() : MachineFunctionPass(ID) {}
616 void getAnalysisUsage(AnalysisUsage &AU)
const override {
621 StringRef getPassName()
const override {
625 bool runOnMachineFunction(MachineFunction &MF)
override;
629 {
"global", SIAtomicAddrSpace::GLOBAL},
630 {
"local", SIAtomicAddrSpace::LDS},
638 OS <<
"unknown address space '" << AS <<
"'; expected one of ";
640 for (
const auto &[Name, Val] : ASNames)
641 OS <<
LS <<
'\'' <<
Name <<
'\'';
649static std::optional<SIAtomicAddrSpace>
651 static constexpr StringLiteral FenceASPrefix =
"amdgpu-synchronize-as";
657 SIAtomicAddrSpace
Result = SIAtomicAddrSpace::NONE;
658 for (
const auto &[Prefix, Suffix] : MMRA) {
659 if (Prefix != FenceASPrefix)
662 if (
auto It = ASNames.find(Suffix); It != ASNames.end())
665 diagnoseUnknownMMRAASName(
MI, Suffix);
668 if (Result == SIAtomicAddrSpace::NONE)
677 const char *Msg)
const {
679 Func.getContext().diagnose(
680 DiagnosticInfoUnsupported(Func, Msg,
MI->getDebugLoc()));
683std::optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
685 SIAtomicAddrSpace InstrAddrSpace)
const {
687 return std::tuple(SIAtomicScope::SYSTEM, SIAtomicAddrSpace::ATOMIC,
true);
689 return std::tuple(SIAtomicScope::AGENT, SIAtomicAddrSpace::ATOMIC,
true);
691 return std::tuple(SIAtomicScope::CLUSTER, SIAtomicAddrSpace::ATOMIC,
true);
693 return std::tuple(SIAtomicScope::WORKGROUP, SIAtomicAddrSpace::ATOMIC,
696 return std::tuple(SIAtomicScope::WAVEFRONT, SIAtomicAddrSpace::ATOMIC,
699 return std::tuple(SIAtomicScope::SINGLETHREAD, SIAtomicAddrSpace::ATOMIC,
702 return std::tuple(SIAtomicScope::SYSTEM,
703 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
705 return std::tuple(SIAtomicScope::AGENT,
706 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
708 return std::tuple(SIAtomicScope::CLUSTER,
709 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
711 return std::tuple(SIAtomicScope::WORKGROUP,
712 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
714 return std::tuple(SIAtomicScope::WAVEFRONT,
715 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
717 return std::tuple(SIAtomicScope::SINGLETHREAD,
718 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
722SIAtomicAddrSpace SIMemOpAccess::toSIAtomicAddrSpace(
unsigned AS)
const {
724 return SIAtomicAddrSpace::FLAT;
726 return SIAtomicAddrSpace::GLOBAL;
728 return SIAtomicAddrSpace::LDS;
730 return SIAtomicAddrSpace::SCRATCH;
732 return SIAtomicAddrSpace::GDS;
735 return SIAtomicAddrSpace::GLOBAL;
737 return SIAtomicAddrSpace::OTHER;
740SIMemOpAccess::SIMemOpAccess(
const AMDGPUMachineModuleInfo &MMI_,
741 const GCNSubtarget &ST)
742 : MMI(&MMI_),
ST(
ST) {}
744std::optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
746 assert(
MI->getNumMemOperands() > 0);
751 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
752 bool IsNonTemporal =
true;
754 bool IsLastUse =
false;
755 bool IsCooperative =
false;
759 for (
const auto &MMO :
MI->memoperands()) {
760 IsNonTemporal &= MMO->isNonTemporal();
762 IsLastUse |= MMO->getFlags() &
MOLastUse;
765 toSIAtomicAddrSpace(MMO->getPointerInfo().getAddrSpace());
767 if (OpOrdering != AtomicOrdering::NotAtomic) {
768 const auto &IsSyncScopeInclusion =
770 if (!IsSyncScopeInclusion) {
771 reportUnsupported(
MI,
772 "Unsupported non-inclusive atomic synchronization scope");
776 SSID = *IsSyncScopeInclusion ? SSID : MMO->getSyncScopeID();
778 assert(MMO->getFailureOrdering() != AtomicOrdering::Release &&
779 MMO->getFailureOrdering() != AtomicOrdering::AcquireRelease);
790 Ordering = AtomicOrdering::Monotonic;
792 SIAtomicScope
Scope = SIAtomicScope::NONE;
793 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
794 bool IsCrossAddressSpaceOrdering =
false;
795 if (Ordering != AtomicOrdering::NotAtomic) {
796 auto ScopeOrNone = toSIAtomicScope(SSID, InstrAddrSpace);
798 reportUnsupported(
MI,
"Unsupported atomic synchronization scope");
801 std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
803 if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
804 ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace) ||
805 ((InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) == SIAtomicAddrSpace::NONE)) {
806 reportUnsupported(
MI,
"Unsupported atomic address space");
810 return SIMemOpInfo(ST, Ordering, Scope, OrderingAddrSpace, InstrAddrSpace,
811 IsCrossAddressSpaceOrdering, FailureOrdering, IsVolatile,
812 IsNonTemporal, IsLastUse, IsCooperative);
815std::optional<SIMemOpInfo>
819 if (!(
MI->mayLoad() && !
MI->mayStore()))
823 if (
MI->getNumMemOperands() == 0)
824 return SIMemOpInfo(ST);
826 return constructFromMIWithMMO(
MI);
829std::optional<SIMemOpInfo>
833 if (!(!
MI->mayLoad() &&
MI->mayStore()))
837 if (
MI->getNumMemOperands() == 0)
838 return SIMemOpInfo(ST);
840 return constructFromMIWithMMO(
MI);
843std::optional<SIMemOpInfo>
847 if (
MI->getOpcode() != AMDGPU::ATOMIC_FENCE)
854 auto ScopeOrNone = toSIAtomicScope(SSID, SIAtomicAddrSpace::ATOMIC);
856 reportUnsupported(
MI,
"Unsupported atomic synchronization scope");
860 SIAtomicScope
Scope = SIAtomicScope::NONE;
861 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
862 bool IsCrossAddressSpaceOrdering =
false;
863 std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
866 if (OrderingAddrSpace != SIAtomicAddrSpace::ATOMIC) {
871 reportUnsupported(
MI,
"Unsupported atomic address space");
875 auto SynchronizeAS = getSynchronizeAddrSpaceMD(*
MI);
877 OrderingAddrSpace = *SynchronizeAS;
879 return SIMemOpInfo(ST, Ordering, Scope, OrderingAddrSpace,
880 SIAtomicAddrSpace::ATOMIC, IsCrossAddressSpaceOrdering,
881 AtomicOrdering::NotAtomic);
884std::optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
888 if (!(
MI->mayLoad() &&
MI->mayStore()))
892 if (
MI->getNumMemOperands() == 0)
893 return SIMemOpInfo(ST);
895 return constructFromMIWithMMO(
MI);
898std::optional<SIMemOpInfo>
905 return constructFromMIWithMMO(
MI);
913 if (
MI.getNumMemOperands() == 0)
916 return MMO->getFlags() & (MOThreadPrivate | MachineMemOperand::MOInvariant);
920SICacheControl::SICacheControl(
const GCNSubtarget &ST) :
ST(
ST) {
921 TII =
ST.getInstrInfo();
927 unsigned Bits)
const {
928 MachineOperand *CPol =
TII->getNamedOperand(*
MI, AMDGPU::OpName::cpol);
932 CPol->setImm(
CPol->getImm() | Bits);
936bool SICacheControl::canAffectGlobalAddrSpace(SIAtomicAddrSpace AS)
const {
937 assert((!
ST.hasGloballyAddressableScratch() ||
938 (AS & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ||
939 (AS & SIAtomicAddrSpace::SCRATCH) == SIAtomicAddrSpace::NONE) &&
940 "scratch instructions should already be replaced by flat "
941 "instructions if GloballyAddressableScratch is enabled");
942 return (AS & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE;
946std::unique_ptr<SICacheControl> SICacheControl::create(
const GCNSubtarget &ST) {
947 GCNSubtarget::Generation Generation =
ST.getGeneration();
948 if (Generation < AMDGPUSubtarget::GFX10)
949 return std::make_unique<SIGfx6CacheControl>(ST);
950 if (Generation < AMDGPUSubtarget::GFX12)
951 return std::make_unique<SIGfx10CacheControl>(ST);
952 return std::make_unique<SIGfx12CacheControl>(ST);
955bool SIGfx6CacheControl::enableLoadCacheBypass(
958 SIAtomicAddrSpace AddrSpace)
const {
961 if (!canAffectGlobalAddrSpace(AddrSpace)) {
973 case SIAtomicScope::SYSTEM:
974 if (
ST.hasGFX940Insts()) {
980 case SIAtomicScope::AGENT:
981 if (
ST.hasGFX940Insts()) {
990 case SIAtomicScope::WORKGROUP:
991 if (
ST.hasGFX940Insts()) {
998 }
else if (
ST.hasGFX90AInsts()) {
1003 if (
ST.isTgSplitEnabled())
1007 case SIAtomicScope::WAVEFRONT:
1008 case SIAtomicScope::SINGLETHREAD:
1018bool SIGfx6CacheControl::enableStoreCacheBypass(
1020 SIAtomicScope Scope,
1021 SIAtomicAddrSpace AddrSpace)
const {
1029 if (
ST.hasGFX940Insts() && canAffectGlobalAddrSpace(AddrSpace)) {
1031 case SIAtomicScope::SYSTEM:
1035 case SIAtomicScope::AGENT:
1039 case SIAtomicScope::WORKGROUP:
1043 case SIAtomicScope::WAVEFRONT:
1044 case SIAtomicScope::SINGLETHREAD:
1062bool SIGfx6CacheControl::enableRMWCacheBypass(
1064 SIAtomicScope Scope,
1065 SIAtomicAddrSpace AddrSpace)
const {
1075 if (
ST.hasGFX940Insts() && canAffectGlobalAddrSpace(AddrSpace)) {
1077 case SIAtomicScope::SYSTEM:
1081 case SIAtomicScope::AGENT:
1082 case SIAtomicScope::WORKGROUP:
1083 case SIAtomicScope::WAVEFRONT:
1084 case SIAtomicScope::SINGLETHREAD:
1098bool SIGfx6CacheControl::enableVolatileAndOrNonTemporal(
1100 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
1110 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
1115 if (
ST.hasGFX940Insts()) {
1118 }
else if (
Op == SIMemOp::LOAD) {
1130 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1131 Position::AFTER, AtomicOrdering::Unordered,
1137 if (IsNonTemporal) {
1138 if (
ST.hasGFX940Insts()) {
1152 SIAtomicScope Scope,
1153 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
1154 bool IsCrossAddrSpaceOrdering, Position Pos,
1156 bool AtomicsOnly)
const {
1159 MachineBasicBlock &
MBB = *
MI->getParent();
1162 if (Pos == Position::AFTER)
1166 if (
ST.hasGFX90AInsts() &&
ST.isTgSplitEnabled()) {
1174 if (((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH |
1175 SIAtomicAddrSpace::GDS)) != SIAtomicAddrSpace::NONE) &&
1176 (Scope == SIAtomicScope::WORKGROUP)) {
1178 Scope = SIAtomicScope::AGENT;
1182 AddrSpace &= ~SIAtomicAddrSpace
::LDS;
1186 bool LGKMCnt =
false;
1188 if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
1189 SIAtomicAddrSpace::NONE) {
1191 case SIAtomicScope::SYSTEM:
1192 case SIAtomicScope::AGENT:
1195 case SIAtomicScope::WORKGROUP:
1196 case SIAtomicScope::WAVEFRONT:
1197 case SIAtomicScope::SINGLETHREAD:
1206 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1208 case SIAtomicScope::SYSTEM:
1209 case SIAtomicScope::AGENT:
1210 case SIAtomicScope::WORKGROUP:
1217 LGKMCnt |= IsCrossAddrSpaceOrdering;
1219 case SIAtomicScope::WAVEFRONT:
1220 case SIAtomicScope::SINGLETHREAD:
1229 if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
1231 case SIAtomicScope::SYSTEM:
1232 case SIAtomicScope::AGENT:
1239 LGKMCnt |= IsCrossAddrSpaceOrdering;
1241 case SIAtomicScope::WORKGROUP:
1242 case SIAtomicScope::WAVEFRONT:
1243 case SIAtomicScope::SINGLETHREAD:
1252 if (VMCnt || LGKMCnt) {
1253 unsigned WaitCntImmediate =
1259 .
addImm(WaitCntImmediate);
1267 Scope == SIAtomicScope::WORKGROUP &&
1268 (AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1273 if (Pos == Position::AFTER)
1282 return !ST.isAmdPalOS() && !ST.isMesa3DOS();
1286 SIAtomicScope Scope,
1287 SIAtomicAddrSpace AddrSpace,
1288 Position Pos)
const {
1289 if (!InsertCacheInv)
1294 MachineBasicBlock &
MBB = *
MI->getParent();
1297 if (Pos == Position::AFTER)
1301 ? AMDGPU::BUFFER_WBINVL1_VOL
1302 : AMDGPU::BUFFER_WBINVL1;
1304 if (canAffectGlobalAddrSpace(AddrSpace)) {
1306 case SIAtomicScope::SYSTEM:
1307 if (
ST.hasGFX940Insts()) {
1323 if (
ST.hasGFX90AInsts()) {
1338 case SIAtomicScope::AGENT:
1339 if (
ST.hasGFX940Insts()) {
1354 case SIAtomicScope::WORKGROUP:
1355 if (
ST.isTgSplitEnabled()) {
1356 if (
ST.hasGFX940Insts()) {
1375 }
else if (
ST.hasGFX90AInsts()) {
1381 case SIAtomicScope::WAVEFRONT:
1382 case SIAtomicScope::SINGLETHREAD:
1399 if (Pos == Position::AFTER)
1406 SIAtomicScope Scope,
1407 SIAtomicAddrSpace AddrSpace,
1408 bool IsCrossAddrSpaceOrdering,
1409 Position Pos)
const {
1412 if (
ST.hasGFX90AInsts()) {
1413 MachineBasicBlock &
MBB = *
MI->getParent();
1416 if (Pos == Position::AFTER)
1419 if (canAffectGlobalAddrSpace(AddrSpace)) {
1421 case SIAtomicScope::SYSTEM:
1433 case SIAtomicScope::AGENT:
1434 if (
ST.hasGFX940Insts()) {
1445 case SIAtomicScope::WORKGROUP:
1446 case SIAtomicScope::WAVEFRONT:
1447 case SIAtomicScope::SINGLETHREAD:
1457 if (Pos == Position::AFTER)
1463 Changed |= insertWait(
MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
1464 IsCrossAddrSpaceOrdering, Pos, AtomicOrdering::Release,
1470bool SIGfx10CacheControl::enableLoadCacheBypass(
1472 SIAtomicAddrSpace AddrSpace)
const {
1476 if (canAffectGlobalAddrSpace(AddrSpace)) {
1478 case SIAtomicScope::SYSTEM:
1479 case SIAtomicScope::AGENT:
1486 case SIAtomicScope::WORKGROUP:
1491 if (!
ST.isCuModeEnabled())
1494 case SIAtomicScope::WAVEFRONT:
1495 case SIAtomicScope::SINGLETHREAD:
1513bool SIGfx10CacheControl::enableVolatileAndOrNonTemporal(
1515 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
1526 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
1534 if (
Op == SIMemOp::LOAD) {
1547 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1548 Position::AFTER, AtomicOrdering::Unordered,
1553 if (IsNonTemporal) {
1558 if (
Op == SIMemOp::STORE)
1573 SIAtomicScope Scope,
1574 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
1575 bool IsCrossAddrSpaceOrdering,
1577 bool AtomicsOnly)
const {
1580 MachineBasicBlock &
MBB = *
MI->getParent();
1583 if (Pos == Position::AFTER)
1588 bool LGKMCnt =
false;
1590 if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
1591 SIAtomicAddrSpace::NONE) {
1593 case SIAtomicScope::SYSTEM:
1594 case SIAtomicScope::AGENT:
1595 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
1597 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
1600 case SIAtomicScope::WORKGROUP:
1610 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
1612 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
1616 case SIAtomicScope::WAVEFRONT:
1617 case SIAtomicScope::SINGLETHREAD:
1626 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1628 case SIAtomicScope::SYSTEM:
1629 case SIAtomicScope::AGENT:
1630 case SIAtomicScope::WORKGROUP:
1637 LGKMCnt |= IsCrossAddrSpaceOrdering;
1639 case SIAtomicScope::WAVEFRONT:
1640 case SIAtomicScope::SINGLETHREAD:
1649 if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
1651 case SIAtomicScope::SYSTEM:
1652 case SIAtomicScope::AGENT:
1659 LGKMCnt |= IsCrossAddrSpaceOrdering;
1661 case SIAtomicScope::WORKGROUP:
1662 case SIAtomicScope::WAVEFRONT:
1663 case SIAtomicScope::SINGLETHREAD:
1672 if (VMCnt || LGKMCnt) {
1673 unsigned WaitCntImmediate =
1679 .
addImm(WaitCntImmediate);
1687 Scope == SIAtomicScope::WORKGROUP &&
1688 (AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1695 .
addReg(AMDGPU::SGPR_NULL, RegState::Undef)
1700 if (Pos == Position::AFTER)
1707 SIAtomicScope Scope,
1708 SIAtomicAddrSpace AddrSpace,
1709 Position Pos)
const {
1710 if (!InsertCacheInv)
1715 MachineBasicBlock &
MBB = *
MI->getParent();
1718 if (Pos == Position::AFTER)
1721 if (canAffectGlobalAddrSpace(AddrSpace)) {
1723 case SIAtomicScope::SYSTEM:
1724 case SIAtomicScope::AGENT:
1732 case SIAtomicScope::WORKGROUP:
1737 if (!
ST.isCuModeEnabled()) {
1742 case SIAtomicScope::WAVEFRONT:
1743 case SIAtomicScope::SINGLETHREAD:
1758 if (Pos == Position::AFTER)
1766 MachineOperand *
CPol =
TII->getNamedOperand(*
MI, OpName::cpol);
1781 MachineOperand *
CPol =
TII->getNamedOperand(*
MI, OpName::cpol);
1794bool SIGfx12CacheControl::insertWaitsBeforeSystemScopeStore(
1798 MachineBasicBlock &
MBB = *
MI->getParent();
1802 if (
ST.hasImageInsts()) {
1813 SIAtomicScope Scope,
1814 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
1815 bool IsCrossAddrSpaceOrdering,
1817 bool AtomicsOnly)
const {
1820 MachineBasicBlock &
MBB = *
MI->getParent();
1823 bool LOADCnt =
false;
1825 bool STORECnt =
false;
1827 if (Pos == Position::AFTER)
1830 if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
1831 SIAtomicAddrSpace::NONE) {
1833 case SIAtomicScope::SYSTEM:
1834 case SIAtomicScope::AGENT:
1835 case SIAtomicScope::CLUSTER:
1836 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
1838 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
1841 case SIAtomicScope::WORKGROUP:
1858 if (!
ST.isCuModeEnabled() ||
ST.hasGFX1250Insts() ||
1860 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
1862 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
1866 case SIAtomicScope::WAVEFRONT:
1867 case SIAtomicScope::SINGLETHREAD:
1876 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1878 case SIAtomicScope::SYSTEM:
1879 case SIAtomicScope::AGENT:
1880 case SIAtomicScope::CLUSTER:
1881 case SIAtomicScope::WORKGROUP:
1888 DSCnt |= IsCrossAddrSpaceOrdering;
1890 case SIAtomicScope::WAVEFRONT:
1891 case SIAtomicScope::SINGLETHREAD:
1912 if (!AtomicsOnly &&
ST.hasImageInsts()) {
1930 if (Pos == Position::AFTER)
1937 SIAtomicScope Scope,
1938 SIAtomicAddrSpace AddrSpace,
1939 Position Pos)
const {
1940 if (!InsertCacheInv)
1943 MachineBasicBlock &
MBB = *
MI->getParent();
1952 if (!canAffectGlobalAddrSpace(AddrSpace))
1957 case SIAtomicScope::SYSTEM:
1960 case SIAtomicScope::AGENT:
1963 case SIAtomicScope::CLUSTER:
1966 case SIAtomicScope::WORKGROUP:
1974 if (
ST.isCuModeEnabled())
1979 case SIAtomicScope::WAVEFRONT:
1980 case SIAtomicScope::SINGLETHREAD:
1987 if (Pos == Position::AFTER)
1992 if (Pos == Position::AFTER)
1997 if (
ST.hasINVWBL2WaitCntRequirement() && Scope > SIAtomicScope::CLUSTER) {
1998 insertWait(
MI, Scope, AddrSpace, SIMemOp::LOAD,
1999 false, Pos, AtomicOrdering::Acquire,
2002 if (Pos == Position::AFTER)
2010 SIAtomicScope Scope,
2011 SIAtomicAddrSpace AddrSpace,
2012 bool IsCrossAddrSpaceOrdering,
2013 Position Pos)
const {
2016 MachineBasicBlock &
MBB = *
MI->getParent();
2023 if (canAffectGlobalAddrSpace(AddrSpace)) {
2024 if (Pos == Position::AFTER)
2033 std::optional<AMDGPU::CPol::CPol> NeedsWB;
2035 case SIAtomicScope::SYSTEM:
2038 case SIAtomicScope::AGENT:
2040 if (
ST.hasGFX1250Insts())
2043 case SIAtomicScope::CLUSTER:
2044 case SIAtomicScope::WORKGROUP:
2046 case SIAtomicScope::WAVEFRONT:
2047 case SIAtomicScope::SINGLETHREAD:
2058 if (
ST.hasINVWBL2WaitCntRequirement())
2059 insertWait(
MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
2061 AtomicOrdering::Release,
2068 if (Pos == Position::AFTER)
2075 Changed |= insertWait(
MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
2076 IsCrossAddrSpaceOrdering, Pos, AtomicOrdering::Release,
2082bool SIGfx12CacheControl::handleNonVolatile(MachineInstr &
MI)
const {
2084 if (!
ST.hasGFX1250Insts())
2086 MachineOperand *
CPol =
TII->getNamedOperand(
MI, OpName::cpol);
2093bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal(
2095 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
2104 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
2111 }
else if (IsNonTemporal) {
2119 if (
ST.requiresWaitXCntForSingleAccessInstructions() &&
2121 MachineBasicBlock &
MBB = *
MI->getParent();
2131 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
2132 Position::AFTER, AtomicOrdering::Unordered,
2139bool SIGfx12CacheControl::finalizeStore(MachineInstr &
MI,
bool Atomic)
const {
2140 assert(
MI.mayStore() &&
"Not a Store inst");
2141 const bool IsRMW = (
MI.mayLoad() &&
MI.mayStore());
2144 if (Atomic &&
ST.requiresWaitXCntForSingleAccessInstructions() &&
2146 MachineBasicBlock &
MBB = *
MI.getParent();
2155 MachineOperand *
CPol =
TII->getNamedOperand(
MI, OpName::cpol);
2161 if (
ST.requiresWaitsBeforeSystemScopeStores() && !Atomic &&
2163 Changed |= insertWaitsBeforeSystemScopeStore(
MI.getIterator());
2168bool SIGfx12CacheControl::handleCooperativeAtomic(MachineInstr &
MI)
const {
2169 if (!
ST.hasGFX1250Insts())
2173 MachineOperand *
CPol =
TII->getNamedOperand(
MI, OpName::cpol);
2174 assert(CPol &&
"No CPol operand?");
2182 SIAtomicScope Scope,
2183 SIAtomicAddrSpace AddrSpace)
const {
2186 if (canAffectGlobalAddrSpace(AddrSpace)) {
2188 case SIAtomicScope::SYSTEM:
2191 case SIAtomicScope::AGENT:
2194 case SIAtomicScope::CLUSTER:
2197 case SIAtomicScope::WORKGROUP:
2200 if (!
ST.isCuModeEnabled())
2203 case SIAtomicScope::WAVEFRONT:
2204 case SIAtomicScope::SINGLETHREAD:
2222bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
2223 if (AtomicPseudoMIs.empty())
2226 for (
auto &
MI : AtomicPseudoMIs)
2227 MI->eraseFromParent();
2229 AtomicPseudoMIs.clear();
2233bool SIMemoryLegalizer::expandLoad(
const SIMemOpInfo &MOI,
2239 if (MOI.isAtomic()) {
2241 if (Order == AtomicOrdering::Monotonic ||
2242 Order == AtomicOrdering::Acquire ||
2243 Order == AtomicOrdering::SequentiallyConsistent) {
2244 Changed |= CC->enableLoadCacheBypass(
MI, MOI.getScope(),
2245 MOI.getOrderingAddrSpace());
2250 if (MOI.isCooperative())
2251 Changed |= CC->handleCooperativeAtomic(*
MI);
2253 if (Order == AtomicOrdering::SequentiallyConsistent)
2254 Changed |= CC->insertWait(
MI, MOI.getScope(), MOI.getOrderingAddrSpace(),
2255 SIMemOp::LOAD | SIMemOp::STORE,
2256 MOI.getIsCrossAddressSpaceOrdering(),
2257 Position::BEFORE, Order,
false);
2259 if (Order == AtomicOrdering::Acquire ||
2260 Order == AtomicOrdering::SequentiallyConsistent) {
2263 CC->insertWait(
MI, MOI.getScope(), MOI.getInstrAddrSpace(),
2264 SIMemOp::LOAD, MOI.getIsCrossAddressSpaceOrdering(),
2265 Position::AFTER, Order,
true);
2266 Changed |= CC->insertAcquire(
MI, MOI.getScope(),
2267 MOI.getOrderingAddrSpace(),
2277 Changed |= CC->enableVolatileAndOrNonTemporal(
2278 MI, MOI.getInstrAddrSpace(), SIMemOp::LOAD, MOI.isVolatile(),
2279 MOI.isNonTemporal(), MOI.isLastUse());
2284bool SIMemoryLegalizer::expandStore(
const SIMemOpInfo &MOI,
2290 MachineInstr &StoreMI = *
MI;
2292 if (MOI.isAtomic()) {
2293 if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
2294 MOI.getOrdering() == AtomicOrdering::Release ||
2295 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
2296 Changed |= CC->enableStoreCacheBypass(
MI, MOI.getScope(),
2297 MOI.getOrderingAddrSpace());
2302 if (MOI.isCooperative())
2303 Changed |= CC->handleCooperativeAtomic(*
MI);
2305 if (MOI.getOrdering() == AtomicOrdering::Release ||
2306 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
2307 Changed |= CC->insertRelease(
MI, MOI.getScope(),
2308 MOI.getOrderingAddrSpace(),
2309 MOI.getIsCrossAddressSpaceOrdering(),
2312 Changed |= CC->finalizeStore(StoreMI,
true);
2319 Changed |= CC->enableVolatileAndOrNonTemporal(
2320 MI, MOI.getInstrAddrSpace(), SIMemOp::STORE, MOI.isVolatile(),
2321 MOI.isNonTemporal());
2325 Changed |= CC->finalizeStore(StoreMI,
false);
2329bool SIMemoryLegalizer::expandAtomicFence(
const SIMemOpInfo &MOI,
2331 assert(
MI->getOpcode() == AMDGPU::ATOMIC_FENCE);
2333 AtomicPseudoMIs.push_back(
MI);
2336 const SIAtomicAddrSpace OrderingAddrSpace = MOI.getOrderingAddrSpace();
2338 if (MOI.isAtomic()) {
2340 if (Order == AtomicOrdering::Acquire) {
2342 Changed |= CC->insertWait(
MI, MOI.getScope(), OrderingAddrSpace,
2343 SIMemOp::LOAD | SIMemOp::STORE,
2344 MOI.getIsCrossAddressSpaceOrdering(),
2345 Position::BEFORE, Order,
true);
2348 if (Order == AtomicOrdering::Release ||
2349 Order == AtomicOrdering::AcquireRelease ||
2350 Order == AtomicOrdering::SequentiallyConsistent)
2358 Changed |= CC->insertRelease(
MI, MOI.getScope(), OrderingAddrSpace,
2359 MOI.getIsCrossAddressSpaceOrdering(),
2367 if (Order == AtomicOrdering::Acquire ||
2368 Order == AtomicOrdering::AcquireRelease ||
2369 Order == AtomicOrdering::SequentiallyConsistent)
2370 Changed |= CC->insertAcquire(
MI, MOI.getScope(), OrderingAddrSpace,
2379bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(
const SIMemOpInfo &MOI,
2384 MachineInstr &RMWMI = *
MI;
2386 if (MOI.isAtomic()) {
2388 if (Order == AtomicOrdering::Monotonic ||
2389 Order == AtomicOrdering::Acquire || Order == AtomicOrdering::Release ||
2390 Order == AtomicOrdering::AcquireRelease ||
2391 Order == AtomicOrdering::SequentiallyConsistent) {
2392 Changed |= CC->enableRMWCacheBypass(
MI, MOI.getScope(),
2393 MOI.getInstrAddrSpace());
2396 if (Order == AtomicOrdering::Release ||
2397 Order == AtomicOrdering::AcquireRelease ||
2398 Order == AtomicOrdering::SequentiallyConsistent ||
2399 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent)
2400 Changed |= CC->insertRelease(
MI, MOI.getScope(),
2401 MOI.getOrderingAddrSpace(),
2402 MOI.getIsCrossAddressSpaceOrdering(),
2405 if (Order == AtomicOrdering::Acquire ||
2406 Order == AtomicOrdering::AcquireRelease ||
2407 Order == AtomicOrdering::SequentiallyConsistent ||
2408 MOI.getFailureOrdering() == AtomicOrdering::Acquire ||
2409 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) {
2412 CC->insertWait(
MI, MOI.getScope(), MOI.getInstrAddrSpace(),
2413 isAtomicRet(*
MI) ? SIMemOp::LOAD : SIMemOp::STORE,
2414 MOI.getIsCrossAddressSpaceOrdering(), Position::AFTER,
2416 Changed |= CC->insertAcquire(
MI, MOI.getScope(),
2417 MOI.getOrderingAddrSpace(),
2421 Changed |= CC->finalizeStore(RMWMI,
true);
2428bool SIMemoryLegalizer::expandLDSDMA(
const SIMemOpInfo &MOI,
2440 return CC->enableVolatileAndOrNonTemporal(
2441 MI, MOI.getInstrAddrSpace(), OpKind, MOI.isVolatile(),
2442 MOI.isNonTemporal(), MOI.isLastUse());
2445bool SIMemoryLegalizerLegacy::runOnMachineFunction(MachineFunction &MF) {
2446 const MachineModuleInfo &MMI =
2447 getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
2448 return SIMemoryLegalizer(MMI).run(MF);
2455 .getCachedResult<MachineModuleAnalysis>(
2457 assert(MMI &&
"MachineModuleAnalysis must be available");
2458 if (!SIMemoryLegalizer(MMI->getMMI()).run(MF))
2468 CC = SICacheControl::create(ST);
2470 for (
auto &
MBB : MF) {
2474 if (
MI->isBundle() &&
MI->mayLoadOrStore()) {
2477 I != E &&
I->isBundledWithPred(); ++
I) {
2478 I->unbundleFromPred();
2481 MO.setIsInternalRead(
false);
2484 MI->eraseFromParent();
2485 MI =
II->getIterator();
2489 if (
const auto &MOI = MOA.getLoadInfo(
MI))
2491 else if (
const auto &MOI = MOA.getStoreInfo(
MI))
2493 else if (
const auto &MOI = MOA.getLDSDMAInfo(
MI))
2495 else if (
const auto &MOI = MOA.getAtomicFenceInfo(
MI))
2497 else if (
const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(
MI))
2498 Changed |= expandAtomicCmpxchgOrRmw(*MOI,
MI);
2506 Changed |= removeAtomicPseudoMIs();
2512char SIMemoryLegalizerLegacy::
ID = 0;
2516 return new SIMemoryLegalizerLegacy();
static std::optional< LoadInfo > getLoadInfo(const MachineInstr &MI)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
Provides AMDGPU specific target descriptions.
AMDGPU Machine Module Info.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
This header defines various interfaces for pass management in LLVM.
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static cl::opt< bool > AmdgcnSkipCacheInvalidations("amdgcn-skip-cache-invalidations", cl::init(false), cl::Hidden, cl::desc("Use this to skip inserting cache invalidating instructions."))
static bool isNonVolatileMemoryAccess(const MachineInstr &MI)
static bool canUseBUFFER_WBINVL1_VOL(const GCNSubtarget &ST)
static const uint32_t IV[8]
SyncScope::ID getWorkgroupSSID() const
SyncScope::ID getWavefrontSSID() const
SyncScope::ID getAgentSSID() const
SyncScope::ID getClusterOneAddressSpaceSSID() const
SyncScope::ID getClusterSSID() const
std::optional< bool > isSyncScopeInclusion(SyncScope::ID A, SyncScope::ID B) const
In AMDGPU target synchronization scopes are inclusive, meaning a larger synchronization scope is incl...
SyncScope::ID getAgentOneAddressSpaceSSID() const
SyncScope::ID getSingleThreadOneAddressSpaceSSID() const
SyncScope::ID getWavefrontOneAddressSpaceSSID() const
SyncScope::ID getSystemOneAddressSpaceSSID() const
SyncScope::ID getWorkgroupOneAddressSpaceSSID() const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Represents analyses that only rely on functions' control flow.
Diagnostic information for unsupported feature in backend.
FunctionPass class - This class is used to implement most global optimizations.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
A helper class to return the specified delimiter string after the first invocation of operator String...
Instructions::iterator instr_iterator
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Representation of each machine instruction.
A description of a memory reference used in the backend.
Ty & getObjFileInfo()
Keep track of various per-module pieces of information for backends that would like to do so.
MachineOperand class - Representation of each machine instruction operand.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
static bool isVMEM(const MachineInstr &MI)
static bool mayWriteLDSThroughDMA(const MachineInstr &MI)
static bool isBUF(const MachineInstr &MI)
static bool isAtomicRet(const MachineInstr &MI)
static bool isAtomic(const MachineInstr &MI)
static bool isLDSDMA(const MachineInstr &MI)
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
StringRef - Represent a constant reference to a string, i.e.
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ BUFFER_STRIDED_POINTER
Address space for 192-bit fat buffer pointers with an additional index.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ BUFFER_FAT_POINTER
Address space for 160-bit buffer fat pointers.
@ PRIVATE_ADDRESS
Address space for private memory.
@ BUFFER_RESOURCE
Address space for 128-bit buffer resources.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
bool isGFX10(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
unsigned getVmcntBitMask(const IsaVersion &Version)
unsigned getLgkmcntBitMask(const IsaVersion &Version)
unsigned getExpcntBitMask(const IsaVersion &Version)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
@ System
Synchronized with respect to all concurrently executing threads.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Scope
Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
OuterAnalysisManagerProxy< ModuleAnalysisManager, MachineFunction > ModuleAnalysisManagerMachineFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
char & SIMemoryLegalizerID
@ LLVM_MARK_AS_BITMASK_ENUM
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
bool isReleaseOrStronger(AtomicOrdering AO)
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
AtomicOrdering getMergedAtomicOrdering(AtomicOrdering AO, AtomicOrdering Other)
Return a single atomic ordering that is at least as strong as both the AO and Other orderings for an ...
static const MachineMemOperand::Flags MOCooperative
Mark the MMO of cooperative load/store atomics.
AtomicOrdering
Atomic ordering for LLVM's memory model.
DWARFExpression::Operation Op
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
FunctionPass * createSIMemoryLegalizerPass()