30 #define DEBUG_TYPE "si-memory-legalizer"
31 #define PASS_NAME "SI Memory Legalizer"
35 cl::desc(
"Use this to skip inserting cache invalidating instructions."));
57 enum class SIAtomicScope {
68 enum class SIAtomicAddrSpace {
88 class SIMemOpInfo final {
91 friend class SIMemOpAccess;
95 SIAtomicScope
Scope = SIAtomicScope::SYSTEM;
98 bool IsCrossAddressSpaceOrdering =
false;
100 bool IsNonTemporal =
false;
103 SIAtomicScope
Scope = SIAtomicScope::SYSTEM,
104 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::ATOMIC,
106 bool IsCrossAddressSpaceOrdering =
true,
110 bool IsNonTemporal =
false)
111 : Ordering(Ordering), FailureOrdering(FailureOrdering),
112 Scope(
Scope), OrderingAddrSpace(OrderingAddrSpace),
113 InstrAddrSpace(InstrAddrSpace),
114 IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
116 IsNonTemporal(IsNonTemporal) {
121 !IsCrossAddressSpaceOrdering &&
127 (OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
129 (InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
135 if ((OrderingAddrSpace == InstrAddrSpace) &&
137 this->IsCrossAddressSpaceOrdering =
false;
141 if ((InstrAddrSpace & ~SIAtomicAddrSpace::SCRATCH) ==
144 }
else if ((InstrAddrSpace &
148 }
else if ((InstrAddrSpace &
171 return FailureOrdering;
176 SIAtomicAddrSpace getInstrAddrSpace()
const {
177 return InstrAddrSpace;
182 SIAtomicAddrSpace getOrderingAddrSpace()
const {
183 return OrderingAddrSpace;
188 bool getIsCrossAddressSpaceOrdering()
const {
189 return IsCrossAddressSpaceOrdering;
194 bool isVolatile()
const {
200 bool isNonTemporal()
const {
201 return IsNonTemporal;
206 bool isAtomic()
const {
212 class SIMemOpAccess final {
218 const char *
Msg)
const;
224 std::optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
225 toSIAtomicScope(
SyncScope::ID SSID, SIAtomicAddrSpace InstrAddrSpace)
const;
228 SIAtomicAddrSpace toSIAtomicAddrSpace(
unsigned AS)
const;
232 std::optional<SIMemOpInfo>
241 std::optional<SIMemOpInfo>
246 std::optional<SIMemOpInfo>
251 std::optional<SIMemOpInfo>
256 std::optional<SIMemOpInfo>
260 class SICacheControl {
284 static std::unique_ptr<SICacheControl> create(
const GCNSubtarget &
ST);
291 SIAtomicAddrSpace AddrSpace)
const = 0;
298 SIAtomicAddrSpace AddrSpace)
const = 0;
305 SIAtomicAddrSpace AddrSpace)
const = 0;
311 SIAtomicAddrSpace AddrSpace,
312 SIMemOp
Op,
bool IsVolatile,
313 bool IsNonTemporal)
const = 0;
324 SIAtomicAddrSpace AddrSpace,
326 bool IsCrossAddrSpaceOrdering,
327 Position Pos)
const = 0;
336 SIAtomicAddrSpace AddrSpace,
337 Position Pos)
const = 0;
347 SIAtomicAddrSpace AddrSpace,
348 bool IsCrossAddrSpaceOrdering,
349 Position Pos)
const = 0;
352 virtual ~SICacheControl() =
default;
356 class SIGfx6CacheControl :
public SICacheControl {
377 SIAtomicAddrSpace AddrSpace)
const override;
381 SIAtomicAddrSpace AddrSpace)
const override;
385 SIAtomicAddrSpace AddrSpace)
const override;
388 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
390 bool IsNonTemporal)
const override;
394 SIAtomicAddrSpace AddrSpace,
396 bool IsCrossAddrSpaceOrdering,
397 Position Pos)
const override;
401 SIAtomicAddrSpace AddrSpace,
402 Position Pos)
const override;
406 SIAtomicAddrSpace AddrSpace,
407 bool IsCrossAddrSpaceOrdering,
408 Position Pos)
const override;
411 class SIGfx7CacheControl :
public SIGfx6CacheControl {
418 SIAtomicAddrSpace AddrSpace,
419 Position Pos)
const override;
423 class SIGfx90ACacheControl :
public SIGfx7CacheControl {
430 SIAtomicAddrSpace AddrSpace)
const override;
434 SIAtomicAddrSpace AddrSpace)
const override;
438 SIAtomicAddrSpace AddrSpace)
const override;
441 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
443 bool IsNonTemporal)
const override;
447 SIAtomicAddrSpace AddrSpace,
449 bool IsCrossAddrSpaceOrdering,
450 Position Pos)
const override;
454 SIAtomicAddrSpace AddrSpace,
455 Position Pos)
const override;
459 SIAtomicAddrSpace AddrSpace,
460 bool IsCrossAddrSpaceOrdering,
461 Position Pos)
const override;
464 class SIGfx940CacheControl :
public SIGfx90ACacheControl {
491 SIAtomicAddrSpace AddrSpace)
const override;
495 SIAtomicAddrSpace AddrSpace)
const override;
499 SIAtomicAddrSpace AddrSpace)
const override;
502 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
504 bool IsNonTemporal)
const override;
507 SIAtomicAddrSpace AddrSpace, Position Pos)
const override;
510 SIAtomicAddrSpace AddrSpace,
bool IsCrossAddrSpaceOrdering,
511 Position Pos)
const override;
514 class SIGfx10CacheControl :
public SIGfx7CacheControl {
529 SIAtomicAddrSpace AddrSpace)
const override;
532 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
534 bool IsNonTemporal)
const override;
538 SIAtomicAddrSpace AddrSpace,
540 bool IsCrossAddrSpaceOrdering,
541 Position Pos)
const override;
545 SIAtomicAddrSpace AddrSpace,
546 Position Pos)
const override;
549 class SIGfx11CacheControl :
public SIGfx10CacheControl {
555 SIAtomicAddrSpace AddrSpace)
const override;
558 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
560 bool IsNonTemporal)
const override;
567 std::unique_ptr<SICacheControl>
CC =
nullptr;
570 std::list<MachineBasicBlock::iterator> AtomicPseudoMIs;
580 bool removeAtomicPseudoMIs();
584 bool expandLoad(
const SIMemOpInfo &MOI,
588 bool expandStore(
const SIMemOpInfo &MOI,
592 bool expandAtomicFence(
const SIMemOpInfo &MOI,
596 bool expandAtomicCmpxchgOrRmw(
const SIMemOpInfo &MOI,
619 const char *
Msg)
const {
620 const Function &
Func =
MI->getParent()->getParent()->getFunction();
622 Func.getContext().diagnose(Diag);
625 std::optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
627 SIAtomicAddrSpace InstrAddrSpace)
const {
629 return std::tuple(SIAtomicScope::SYSTEM, SIAtomicAddrSpace::ATOMIC,
true);
630 if (SSID == MMI->getAgentSSID())
631 return std::tuple(SIAtomicScope::AGENT, SIAtomicAddrSpace::ATOMIC,
true);
632 if (SSID == MMI->getWorkgroupSSID())
633 return std::tuple(SIAtomicScope::WORKGROUP, SIAtomicAddrSpace::ATOMIC,
635 if (SSID == MMI->getWavefrontSSID())
636 return std::tuple(SIAtomicScope::WAVEFRONT, SIAtomicAddrSpace::ATOMIC,
639 return std::tuple(SIAtomicScope::SINGLETHREAD, SIAtomicAddrSpace::ATOMIC,
641 if (SSID == MMI->getSystemOneAddressSpaceSSID())
642 return std::tuple(SIAtomicScope::SYSTEM,
643 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
644 if (SSID == MMI->getAgentOneAddressSpaceSSID())
645 return std::tuple(SIAtomicScope::AGENT,
646 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
647 if (SSID == MMI->getWorkgroupOneAddressSpaceSSID())
648 return std::tuple(SIAtomicScope::WORKGROUP,
649 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
650 if (SSID == MMI->getWavefrontOneAddressSpaceSSID())
651 return std::tuple(SIAtomicScope::WAVEFRONT,
652 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
653 if (SSID == MMI->getSingleThreadOneAddressSpaceSSID())
654 return std::tuple(SIAtomicScope::SINGLETHREAD,
655 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
659 SIAtomicAddrSpace SIMemOpAccess::toSIAtomicAddrSpace(
unsigned AS)
const {
667 return SIAtomicAddrSpace::SCRATCH;
669 return SIAtomicAddrSpace::GDS;
671 return SIAtomicAddrSpace::OTHER;
678 std::optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
680 assert(
MI->getNumMemOperands() > 0);
686 bool IsNonTemporal =
true;
691 for (
const auto &MMO :
MI->memoperands()) {
692 IsNonTemporal &= MMO->isNonTemporal();
695 toSIAtomicAddrSpace(MMO->getPointerInfo().getAddrSpace());
698 const auto &IsSyncScopeInclusion =
699 MMI->isSyncScopeInclusion(SSID, MMO->getSyncScopeID());
700 if (!IsSyncScopeInclusion) {
701 reportUnsupported(
MI,
702 "Unsupported non-inclusive atomic synchronization scope");
706 SSID = *IsSyncScopeInclusion ? SSID : MMO->getSyncScopeID();
717 bool IsCrossAddressSpaceOrdering =
false;
719 auto ScopeOrNone = toSIAtomicScope(SSID, InstrAddrSpace);
721 reportUnsupported(
MI,
"Unsupported atomic synchronization scope");
724 std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
727 ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace) ||
729 reportUnsupported(
MI,
"Unsupported atomic address space");
733 return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, InstrAddrSpace,
734 IsCrossAddressSpaceOrdering, FailureOrdering, IsVolatile,
738 std::optional<SIMemOpInfo>
742 if (!(
MI->mayLoad() && !
MI->mayStore()))
746 if (
MI->getNumMemOperands() == 0)
747 return SIMemOpInfo();
749 return constructFromMIWithMMO(
MI);
752 std::optional<SIMemOpInfo>
756 if (!(!
MI->mayLoad() &&
MI->mayStore()))
760 if (
MI->getNumMemOperands() == 0)
761 return SIMemOpInfo();
763 return constructFromMIWithMMO(
MI);
766 std::optional<SIMemOpInfo>
777 auto ScopeOrNone = toSIAtomicScope(SSID, SIAtomicAddrSpace::ATOMIC);
779 reportUnsupported(
MI,
"Unsupported atomic synchronization scope");
785 bool IsCrossAddressSpaceOrdering =
false;
786 std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
790 ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace)) {
791 reportUnsupported(
MI,
"Unsupported atomic address space");
795 return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, SIAtomicAddrSpace::ATOMIC,
799 std::optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
803 if (!(
MI->mayLoad() &&
MI->mayStore()))
807 if (
MI->getNumMemOperands() == 0)
808 return SIMemOpInfo();
810 return constructFromMIWithMMO(
MI);
814 TII =
ST.getInstrInfo();
830 std::unique_ptr<SICacheControl> SICacheControl::create(
const GCNSubtarget &
ST) {
831 GCNSubtarget::Generation Generation =
ST.getGeneration();
832 if (
ST.hasGFX940Insts())
833 return std::make_unique<SIGfx940CacheControl>(
ST);
834 if (
ST.hasGFX90AInsts())
835 return std::make_unique<SIGfx90ACacheControl>(
ST);
836 if (Generation <= AMDGPUSubtarget::SOUTHERN_ISLANDS)
837 return std::make_unique<SIGfx6CacheControl>(
ST);
839 return std::make_unique<SIGfx7CacheControl>(
ST);
841 return std::make_unique<SIGfx10CacheControl>(
ST);
842 return std::make_unique<SIGfx11CacheControl>(
ST);
845 bool SIGfx6CacheControl::enableLoadCacheBypass(
848 SIAtomicAddrSpace AddrSpace)
const {
850 bool Changed =
false;
854 case SIAtomicScope::SYSTEM:
855 case SIAtomicScope::AGENT:
858 Changed |= enableGLCBit(
MI);
860 case SIAtomicScope::WORKGROUP:
861 case SIAtomicScope::WAVEFRONT:
862 case SIAtomicScope::SINGLETHREAD:
880 bool SIGfx6CacheControl::enableStoreCacheBypass(
883 SIAtomicAddrSpace AddrSpace)
const {
885 bool Changed =
false;
893 bool SIGfx6CacheControl::enableRMWCacheBypass(
896 SIAtomicAddrSpace AddrSpace)
const {
898 bool Changed =
false;
908 bool SIGfx6CacheControl::enableVolatileAndOrNonTemporal(
922 bool Changed =
false;
929 Changed |= enableGLCBit(
MI);
936 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
945 Changed |= enableGLCBit(
MI);
946 Changed |= enableSLCBit(
MI);
955 SIAtomicAddrSpace AddrSpace,
957 bool IsCrossAddrSpaceOrdering,
958 Position Pos)
const {
959 bool Changed =
false;
964 if (Pos == Position::AFTER)
968 bool LGKMCnt =
false;
973 case SIAtomicScope::SYSTEM:
974 case SIAtomicScope::AGENT:
977 case SIAtomicScope::WORKGROUP:
978 case SIAtomicScope::WAVEFRONT:
979 case SIAtomicScope::SINGLETHREAD:
990 case SIAtomicScope::SYSTEM:
991 case SIAtomicScope::AGENT:
992 case SIAtomicScope::WORKGROUP:
999 LGKMCnt |= IsCrossAddrSpaceOrdering;
1001 case SIAtomicScope::WAVEFRONT:
1002 case SIAtomicScope::SINGLETHREAD:
1013 case SIAtomicScope::SYSTEM:
1014 case SIAtomicScope::AGENT:
1021 LGKMCnt |= IsCrossAddrSpaceOrdering;
1023 case SIAtomicScope::WORKGROUP:
1024 case SIAtomicScope::WAVEFRONT:
1025 case SIAtomicScope::SINGLETHREAD:
1034 if (VMCnt || LGKMCnt) {
1035 unsigned WaitCntImmediate =
1044 if (Pos == Position::AFTER)
1051 SIAtomicScope
Scope,
1052 SIAtomicAddrSpace AddrSpace,
1053 Position Pos)
const {
1054 if (!InsertCacheInv)
1057 bool Changed =
false;
1062 if (Pos == Position::AFTER)
1067 case SIAtomicScope::SYSTEM:
1068 case SIAtomicScope::AGENT:
1072 case SIAtomicScope::WORKGROUP:
1073 case SIAtomicScope::WAVEFRONT:
1074 case SIAtomicScope::SINGLETHREAD:
1089 if (Pos == Position::AFTER)
1096 SIAtomicScope
Scope,
1097 SIAtomicAddrSpace AddrSpace,
1098 bool IsCrossAddrSpaceOrdering,
1099 Position Pos)
const {
1101 IsCrossAddrSpaceOrdering, Pos);
1105 SIAtomicScope
Scope,
1106 SIAtomicAddrSpace AddrSpace,
1107 Position Pos)
const {
1108 if (!InsertCacheInv)
1111 bool Changed =
false;
1119 ? AMDGPU::BUFFER_WBINVL1
1120 : AMDGPU::BUFFER_WBINVL1_VOL;
1122 if (Pos == Position::AFTER)
1127 case SIAtomicScope::SYSTEM:
1128 case SIAtomicScope::AGENT:
1132 case SIAtomicScope::WORKGROUP:
1133 case SIAtomicScope::WAVEFRONT:
1134 case SIAtomicScope::SINGLETHREAD:
1149 if (Pos == Position::AFTER)
1155 bool SIGfx90ACacheControl::enableLoadCacheBypass(
1157 SIAtomicScope
Scope,
1158 SIAtomicAddrSpace AddrSpace)
const {
1160 bool Changed =
false;
1164 case SIAtomicScope::SYSTEM:
1165 case SIAtomicScope::AGENT:
1168 Changed |= enableGLCBit(
MI);
1170 case SIAtomicScope::WORKGROUP:
1175 if (
ST.isTgSplitEnabled())
1176 Changed |= enableGLCBit(
MI);
1178 case SIAtomicScope::WAVEFRONT:
1179 case SIAtomicScope::SINGLETHREAD:
1197 bool SIGfx90ACacheControl::enableStoreCacheBypass(
1199 SIAtomicScope
Scope,
1200 SIAtomicAddrSpace AddrSpace)
const {
1202 bool Changed =
false;
1206 case SIAtomicScope::SYSTEM:
1207 case SIAtomicScope::AGENT:
1211 case SIAtomicScope::WORKGROUP:
1212 case SIAtomicScope::WAVEFRONT:
1213 case SIAtomicScope::SINGLETHREAD:
1232 bool SIGfx90ACacheControl::enableRMWCacheBypass(
1234 SIAtomicScope
Scope,
1235 SIAtomicAddrSpace AddrSpace)
const {
1237 bool Changed =
false;
1241 case SIAtomicScope::SYSTEM:
1242 case SIAtomicScope::AGENT:
1247 case SIAtomicScope::WORKGROUP:
1248 case SIAtomicScope::WAVEFRONT:
1249 case SIAtomicScope::SINGLETHREAD:
1260 bool SIGfx90ACacheControl::enableVolatileAndOrNonTemporal(
1274 bool Changed =
false;
1281 Changed |= enableGLCBit(
MI);
1288 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1294 if (IsNonTemporal) {
1297 Changed |= enableGLCBit(
MI);
1298 Changed |= enableSLCBit(
MI);
1306 SIAtomicScope
Scope,
1307 SIAtomicAddrSpace AddrSpace,
1309 bool IsCrossAddrSpaceOrdering,
1310 Position Pos)
const {
1311 if (
ST.isTgSplitEnabled()) {
1321 (
Scope == SIAtomicScope::WORKGROUP)) {
1323 Scope = SIAtomicScope::AGENT;
1327 AddrSpace &= ~SIAtomicAddrSpace
::LDS;
1329 return SIGfx7CacheControl::insertWait(
MI,
Scope, AddrSpace,
Op,
1330 IsCrossAddrSpaceOrdering, Pos);
1334 SIAtomicScope
Scope,
1335 SIAtomicAddrSpace AddrSpace,
1336 Position Pos)
const {
1337 if (!InsertCacheInv)
1340 bool Changed =
false;
1345 if (Pos == Position::AFTER)
1350 case SIAtomicScope::SYSTEM:
1362 case SIAtomicScope::AGENT:
1365 case SIAtomicScope::WORKGROUP:
1370 if (
ST.isTgSplitEnabled()) {
1372 Scope = SIAtomicScope::AGENT;
1375 case SIAtomicScope::WAVEFRONT:
1376 case SIAtomicScope::SINGLETHREAD:
1391 if (Pos == Position::AFTER)
1394 Changed |= SIGfx7CacheControl::insertAcquire(
MI,
Scope, AddrSpace, Pos);
1400 SIAtomicScope
Scope,
1401 SIAtomicAddrSpace AddrSpace,
1402 bool IsCrossAddrSpaceOrdering,
1403 Position Pos)
const {
1404 bool Changed =
false;
1409 if (Pos == Position::AFTER)
1414 case SIAtomicScope::SYSTEM:
1428 case SIAtomicScope::AGENT:
1429 case SIAtomicScope::WORKGROUP:
1430 case SIAtomicScope::WAVEFRONT:
1431 case SIAtomicScope::SINGLETHREAD:
1439 if (Pos == Position::AFTER)
1443 SIGfx7CacheControl::insertRelease(
MI,
Scope, AddrSpace,
1444 IsCrossAddrSpaceOrdering, Pos);
1449 bool SIGfx940CacheControl::enableLoadCacheBypass(
1451 SIAtomicAddrSpace AddrSpace)
const {
1453 bool Changed =
false;
1457 case SIAtomicScope::SYSTEM:
1459 Changed |= enableSC0Bit(
MI);
1460 Changed |= enableSC1Bit(
MI);
1462 case SIAtomicScope::AGENT:
1464 Changed |= enableSC1Bit(
MI);
1466 case SIAtomicScope::WORKGROUP:
1472 Changed |= enableSC0Bit(
MI);
1474 case SIAtomicScope::WAVEFRONT:
1475 case SIAtomicScope::SINGLETHREAD:
1493 bool SIGfx940CacheControl::enableStoreCacheBypass(
1495 SIAtomicScope
Scope, SIAtomicAddrSpace AddrSpace)
const {
1497 bool Changed =
false;
1501 case SIAtomicScope::SYSTEM:
1503 Changed |= enableSC0Bit(
MI);
1504 Changed |= enableSC1Bit(
MI);
1506 case SIAtomicScope::AGENT:
1508 Changed |= enableSC1Bit(
MI);
1510 case SIAtomicScope::WORKGROUP:
1512 Changed |= enableSC0Bit(
MI);
1514 case SIAtomicScope::WAVEFRONT:
1515 case SIAtomicScope::SINGLETHREAD:
1533 bool SIGfx940CacheControl::enableRMWCacheBypass(
1535 SIAtomicAddrSpace AddrSpace)
const {
1537 bool Changed =
false;
1541 case SIAtomicScope::SYSTEM:
1543 Changed |= enableSC1Bit(
MI);
1545 case SIAtomicScope::AGENT:
1546 case SIAtomicScope::WORKGROUP:
1547 case SIAtomicScope::WAVEFRONT:
1548 case SIAtomicScope::SINGLETHREAD:
1562 bool SIGfx940CacheControl::enableVolatileAndOrNonTemporal(
1576 bool Changed =
false;
1580 Changed |= enableSC0Bit(
MI);
1581 Changed |= enableSC1Bit(
MI);
1588 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1594 if (IsNonTemporal) {
1595 Changed |= enableNTBit(
MI);
1603 SIAtomicScope
Scope,
1604 SIAtomicAddrSpace AddrSpace,
1605 Position Pos)
const {
1606 if (!InsertCacheInv)
1609 bool Changed =
false;
1614 if (Pos == Position::AFTER)
1619 case SIAtomicScope::SYSTEM:
1633 case SIAtomicScope::AGENT:
1646 case SIAtomicScope::WORKGROUP:
1651 if (
ST.isTgSplitEnabled()) {
1665 case SIAtomicScope::WAVEFRONT:
1666 case SIAtomicScope::SINGLETHREAD:
1682 if (Pos == Position::AFTER)
1689 SIAtomicScope
Scope,
1690 SIAtomicAddrSpace AddrSpace,
1691 bool IsCrossAddrSpaceOrdering,
1692 Position Pos)
const {
1693 bool Changed =
false;
1698 if (Pos == Position::AFTER)
1703 case SIAtomicScope::SYSTEM:
1718 case SIAtomicScope::AGENT:
1728 case SIAtomicScope::WORKGROUP:
1729 case SIAtomicScope::WAVEFRONT:
1730 case SIAtomicScope::SINGLETHREAD:
1740 if (Pos == Position::AFTER)
1746 IsCrossAddrSpaceOrdering, Pos);
1751 bool SIGfx10CacheControl::enableLoadCacheBypass(
1753 SIAtomicScope
Scope,
1754 SIAtomicAddrSpace AddrSpace)
const {
1756 bool Changed =
false;
1760 case SIAtomicScope::SYSTEM:
1761 case SIAtomicScope::AGENT:
1764 Changed |= enableGLCBit(
MI);
1765 Changed |= enableDLCBit(
MI);
1767 case SIAtomicScope::WORKGROUP:
1772 if (!
ST.isCuModeEnabled())
1773 Changed |= enableGLCBit(
MI);
1775 case SIAtomicScope::WAVEFRONT:
1776 case SIAtomicScope::SINGLETHREAD:
1794 bool SIGfx10CacheControl::enableVolatileAndOrNonTemporal(
1809 bool Changed =
false;
1816 Changed |= enableGLCBit(
MI);
1817 Changed |= enableDLCBit(
MI);
1825 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1830 if (IsNonTemporal) {
1836 Changed |= enableGLCBit(
MI);
1837 Changed |= enableSLCBit(
MI);
1846 SIAtomicScope
Scope,
1847 SIAtomicAddrSpace AddrSpace,
1849 bool IsCrossAddrSpaceOrdering,
1850 Position Pos)
const {
1851 bool Changed =
false;
1856 if (Pos == Position::AFTER)
1861 bool LGKMCnt =
false;
1866 case SIAtomicScope::SYSTEM:
1867 case SIAtomicScope::AGENT:
1873 case SIAtomicScope::WORKGROUP:
1879 if (!
ST.isCuModeEnabled()) {
1886 case SIAtomicScope::WAVEFRONT:
1887 case SIAtomicScope::SINGLETHREAD:
1898 case SIAtomicScope::SYSTEM:
1899 case SIAtomicScope::AGENT:
1900 case SIAtomicScope::WORKGROUP:
1907 LGKMCnt |= IsCrossAddrSpaceOrdering;
1909 case SIAtomicScope::WAVEFRONT:
1910 case SIAtomicScope::SINGLETHREAD:
1921 case SIAtomicScope::SYSTEM:
1922 case SIAtomicScope::AGENT:
1929 LGKMCnt |= IsCrossAddrSpaceOrdering;
1931 case SIAtomicScope::WORKGROUP:
1932 case SIAtomicScope::WAVEFRONT:
1933 case SIAtomicScope::SINGLETHREAD:
1942 if (VMCnt || LGKMCnt) {
1943 unsigned WaitCntImmediate =
1959 if (Pos == Position::AFTER)
1966 SIAtomicScope
Scope,
1967 SIAtomicAddrSpace AddrSpace,
1968 Position Pos)
const {
1969 if (!InsertCacheInv)
1972 bool Changed =
false;
1977 if (Pos == Position::AFTER)
1982 case SIAtomicScope::SYSTEM:
1983 case SIAtomicScope::AGENT:
1988 case SIAtomicScope::WORKGROUP:
1993 if (!
ST.isCuModeEnabled()) {
1998 case SIAtomicScope::WAVEFRONT:
1999 case SIAtomicScope::SINGLETHREAD:
2014 if (Pos == Position::AFTER)
2020 bool SIGfx11CacheControl::enableLoadCacheBypass(
2022 SIAtomicAddrSpace AddrSpace)
const {
2024 bool Changed =
false;
2028 case SIAtomicScope::SYSTEM:
2029 case SIAtomicScope::AGENT:
2032 Changed |= enableGLCBit(
MI);
2034 case SIAtomicScope::WORKGROUP:
2039 if (!
ST.isCuModeEnabled())
2040 Changed |= enableGLCBit(
MI);
2042 case SIAtomicScope::WAVEFRONT:
2043 case SIAtomicScope::SINGLETHREAD:
2061 bool SIGfx11CacheControl::enableVolatileAndOrNonTemporal(
2076 bool Changed =
false;
2083 Changed |= enableGLCBit(
MI);
2086 Changed |= enableDLCBit(
MI);
2093 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
2098 if (IsNonTemporal) {
2104 Changed |= enableGLCBit(
MI);
2105 Changed |= enableSLCBit(
MI);
2108 Changed |= enableDLCBit(
MI);
2115 bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
2116 if (AtomicPseudoMIs.empty())
2119 for (
auto &
MI : AtomicPseudoMIs)
2120 MI->eraseFromParent();
2122 AtomicPseudoMIs.clear();
2126 bool SIMemoryLegalizer::expandLoad(
const SIMemOpInfo &MOI,
2130 bool Changed =
false;
2132 if (MOI.isAtomic()) {
2133 if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
2134 MOI.getOrdering() == AtomicOrdering::Acquire ||
2135 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
2136 Changed |=
CC->enableLoadCacheBypass(
MI, MOI.getScope(),
2137 MOI.getOrderingAddrSpace());
2140 if (MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
2141 Changed |=
CC->insertWait(
MI, MOI.getScope(),
2142 MOI.getOrderingAddrSpace(),
2144 MOI.getIsCrossAddressSpaceOrdering(),
2147 if (MOI.getOrdering() == AtomicOrdering::Acquire ||
2148 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
2149 Changed |=
CC->insertWait(
MI, MOI.getScope(),
2150 MOI.getInstrAddrSpace(),
2152 MOI.getIsCrossAddressSpaceOrdering(),
2154 Changed |=
CC->insertAcquire(
MI, MOI.getScope(),
2155 MOI.getOrderingAddrSpace(),
2165 Changed |=
CC->enableVolatileAndOrNonTemporal(
MI, MOI.getInstrAddrSpace(),
2167 MOI.isNonTemporal());
2171 bool SIMemoryLegalizer::expandStore(
const SIMemOpInfo &MOI,
2175 bool Changed =
false;
2177 if (MOI.isAtomic()) {
2178 if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
2179 MOI.getOrdering() == AtomicOrdering::Release ||
2180 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
2181 Changed |=
CC->enableStoreCacheBypass(
MI, MOI.getScope(),
2182 MOI.getOrderingAddrSpace());
2185 if (MOI.getOrdering() == AtomicOrdering::Release ||
2186 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
2187 Changed |=
CC->insertRelease(
MI, MOI.getScope(),
2188 MOI.getOrderingAddrSpace(),
2189 MOI.getIsCrossAddressSpaceOrdering(),
2198 Changed |=
CC->enableVolatileAndOrNonTemporal(
2200 MOI.isNonTemporal());
2204 bool SIMemoryLegalizer::expandAtomicFence(
const SIMemOpInfo &MOI,
2208 AtomicPseudoMIs.push_back(
MI);
2209 bool Changed =
false;
2211 if (MOI.isAtomic()) {
2212 if (MOI.getOrdering() == AtomicOrdering::Acquire ||
2213 MOI.getOrdering() == AtomicOrdering::Release ||
2214 MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
2215 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
2223 Changed |=
CC->insertRelease(
MI, MOI.getScope(),
2224 MOI.getOrderingAddrSpace(),
2225 MOI.getIsCrossAddressSpaceOrdering(),
2233 if (MOI.getOrdering() == AtomicOrdering::Acquire ||
2234 MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
2235 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
2236 Changed |=
CC->insertAcquire(
MI, MOI.getScope(),
2237 MOI.getOrderingAddrSpace(),
2246 bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(
const SIMemOpInfo &MOI,
2250 bool Changed =
false;
2252 if (MOI.isAtomic()) {
2253 if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
2254 MOI.getOrdering() == AtomicOrdering::Acquire ||
2255 MOI.getOrdering() == AtomicOrdering::Release ||
2256 MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
2257 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
2258 Changed |=
CC->enableRMWCacheBypass(
MI, MOI.getScope(),
2259 MOI.getInstrAddrSpace());
2262 if (MOI.getOrdering() == AtomicOrdering::Release ||
2263 MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
2264 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
2265 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent)
2266 Changed |=
CC->insertRelease(
MI, MOI.getScope(),
2267 MOI.getOrderingAddrSpace(),
2268 MOI.getIsCrossAddressSpaceOrdering(),
2271 if (MOI.getOrdering() == AtomicOrdering::Acquire ||
2272 MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
2273 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
2274 MOI.getFailureOrdering() == AtomicOrdering::Acquire ||
2275 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) {
2276 Changed |=
CC->insertWait(
MI, MOI.getScope(),
2277 MOI.getInstrAddrSpace(),
2280 MOI.getIsCrossAddressSpaceOrdering(),
2282 Changed |=
CC->insertAcquire(
MI, MOI.getScope(),
2283 MOI.getOrderingAddrSpace(),
2294 bool Changed =
false;
2296 SIMemOpAccess MOA(MF);
2299 for (
auto &
MBB : MF) {
2303 if (
MI->isBundle() &&
MI->mayLoadOrStore()) {
2306 I !=
E &&
I->isBundledWithPred(); ++
I) {
2307 I->unbundleFromPred();
2310 MO.setIsInternalRead(
false);
2313 MI->eraseFromParent();
2314 MI = II->getIterator();
2320 if (
const auto &MOI = MOA.getLoadInfo(
MI))
2321 Changed |= expandLoad(*MOI,
MI);
2322 else if (
const auto &MOI = MOA.getStoreInfo(
MI))
2323 Changed |= expandStore(*MOI,
MI);
2324 else if (
const auto &MOI = MOA.getAtomicFenceInfo(
MI))
2325 Changed |= expandAtomicFence(*MOI,
MI);
2326 else if (
const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(
MI))
2327 Changed |= expandAtomicCmpxchgOrRmw(*MOI,
MI);
2331 Changed |= removeAtomicPseudoMIs();
2337 char SIMemoryLegalizer::
ID = 0;
2341 return new SIMemoryLegalizer();