47 #define DEBUG_TYPE "si-memory-legalizer" 48 #define PASS_NAME "SI Memory Legalizer" 90 FLAT = GLOBAL | LDS | SCRATCH,
93 ATOMIC = GLOBAL | LDS | SCRATCH | GDS,
96 ALL = GLOBAL | LDS | SCRATCH | GDS | OTHER,
103 template <u
int16_t BitName>
117 class SIMemOpInfo final {
120 friend class SIMemOpAccess;
127 bool IsCrossAddressSpaceOrdering =
false;
128 bool IsNonTemporal =
false;
134 bool IsCrossAddressSpaceOrdering =
true,
137 bool IsNonTemporal =
false)
138 : Ordering(Ordering), FailureOrdering(FailureOrdering),
139 Scope(Scope), OrderingAddrSpace(OrderingAddrSpace),
140 InstrAddrSpace(InstrAddrSpace),
141 IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
142 IsNonTemporal(IsNonTemporal) {
146 if ((OrderingAddrSpace == InstrAddrSpace) &&
148 this->IsCrossAddressSpaceOrdering =
false;
167 return FailureOrdering;
173 return InstrAddrSpace;
179 return OrderingAddrSpace;
184 bool getIsCrossAddressSpaceOrdering()
const {
185 return IsCrossAddressSpaceOrdering;
190 bool isNonTemporal()
const {
191 return IsNonTemporal;
202 class SIMemOpAccess final {
208 const char *Msg)
const;
249 class SICacheControl {
262 static std::unique_ptr<SICacheControl> create(
const GCNSubtarget &ST);
297 bool IsCrossAddrSpaceOrdering,
301 virtual ~SICacheControl() =
default;
305 class SIGfx6CacheControl :
public SICacheControl {
311 return enableNamedBit<AMDGPU::OpName::glc>(
MI);
317 return enableNamedBit<AMDGPU::OpName::slc>(
MI);
322 SIGfx6CacheControl(
const GCNSubtarget &
ST) : SICacheControl(ST) {};
339 bool IsCrossAddrSpaceOrdering,
343 class SIGfx7CacheControl :
public SIGfx6CacheControl {
346 SIGfx7CacheControl(
const GCNSubtarget &
ST) : SIGfx6CacheControl(ST) {};
355 class SIGfx10CacheControl :
public SIGfx7CacheControl {
362 return enableNamedBit<AMDGPU::OpName::dlc>(
MI);
368 SIGfx7CacheControl(ST), CuMode(CuMode) {};
385 bool IsCrossAddrSpaceOrdering,
393 std::unique_ptr<SICacheControl> CC =
nullptr;
396 std::list<MachineBasicBlock::iterator> AtomicPseudoMIs;
406 bool removeAtomicPseudoMIs();
410 bool expandLoad(
const SIMemOpInfo &MOI,
414 bool expandStore(
const SIMemOpInfo &MOI,
418 bool expandAtomicFence(
const SIMemOpInfo &MOI,
422 bool expandAtomicCmpxchgOrRmw(
const SIMemOpInfo &MOI,
445 const char *Msg)
const {
446 const Function &Func = MI->getParent()->getParent()->getFunction();
455 return std::make_tuple(SIAtomicScope::SYSTEM,
456 SIAtomicAddrSpace::ATOMIC,
458 if (SSID == MMI->getAgentSSID())
459 return std::make_tuple(SIAtomicScope::AGENT,
460 SIAtomicAddrSpace::ATOMIC,
462 if (SSID == MMI->getWorkgroupSSID())
463 return std::make_tuple(SIAtomicScope::WORKGROUP,
464 SIAtomicAddrSpace::ATOMIC,
466 if (SSID == MMI->getWavefrontSSID())
467 return std::make_tuple(SIAtomicScope::WAVEFRONT,
468 SIAtomicAddrSpace::ATOMIC,
471 return std::make_tuple(SIAtomicScope::SINGLETHREAD,
472 SIAtomicAddrSpace::ATOMIC,
474 if (SSID == MMI->getSystemOneAddressSpaceSSID())
475 return std::make_tuple(SIAtomicScope::SYSTEM,
476 SIAtomicAddrSpace::ATOMIC & InstrScope,
478 if (SSID == MMI->getAgentOneAddressSpaceSSID())
479 return std::make_tuple(SIAtomicScope::AGENT,
480 SIAtomicAddrSpace::ATOMIC & InstrScope,
482 if (SSID == MMI->getWorkgroupOneAddressSpaceSSID())
483 return std::make_tuple(SIAtomicScope::WORKGROUP,
484 SIAtomicAddrSpace::ATOMIC & InstrScope,
486 if (SSID == MMI->getWavefrontOneAddressSpaceSSID())
487 return std::make_tuple(SIAtomicScope::WAVEFRONT,
488 SIAtomicAddrSpace::ATOMIC & InstrScope,
490 if (SSID == MMI->getSingleThreadOneAddressSpaceSSID())
491 return std::make_tuple(SIAtomicScope::SINGLETHREAD,
492 SIAtomicAddrSpace::ATOMIC & InstrScope,
505 return SIAtomicAddrSpace::SCRATCH;
507 return SIAtomicAddrSpace::GDS;
509 return SIAtomicAddrSpace::OTHER;
518 assert(MI->getNumMemOperands() > 0);
524 bool IsNonTemporal =
true;
528 for (
const auto &MMO : MI->memoperands()) {
529 IsNonTemporal &= MMO->isNonTemporal();
531 toSIAtomicAddrSpace(MMO->getPointerInfo().getAddrSpace());
534 const auto &IsSyncScopeInclusion =
535 MMI->isSyncScopeInclusion(SSID, MMO->getSyncScopeID());
536 if (!IsSyncScopeInclusion) {
537 reportUnsupported(MI,
538 "Unsupported non-inclusive atomic synchronization scope");
542 SSID = IsSyncScopeInclusion.getValue() ? SSID : MMO->getSyncScopeID();
545 Ordering : MMO->getOrdering();
550 FailureOrdering : MMO->getFailureOrdering();
556 bool IsCrossAddressSpaceOrdering =
false;
558 auto ScopeOrNone = toSIAtomicScope(SSID, InstrAddrSpace);
560 reportUnsupported(MI,
"Unsupported atomic synchronization scope");
563 std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
564 ScopeOrNone.getValue();
566 ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace)) {
567 reportUnsupported(MI,
"Unsupported atomic address space");
571 return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, InstrAddrSpace,
572 IsCrossAddressSpaceOrdering, FailureOrdering, IsNonTemporal);
579 if (!(MI->mayLoad() && !MI->mayStore()))
583 if (MI->getNumMemOperands() == 0)
584 return SIMemOpInfo();
586 return constructFromMIWithMMO(MI);
593 if (!(!MI->mayLoad() && MI->mayStore()))
597 if (MI->getNumMemOperands() == 0)
598 return SIMemOpInfo();
600 return constructFromMIWithMMO(MI);
614 auto ScopeOrNone = toSIAtomicScope(SSID, SIAtomicAddrSpace::ATOMIC);
616 reportUnsupported(MI,
"Unsupported atomic synchronization scope");
622 bool IsCrossAddressSpaceOrdering =
false;
623 std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
624 ScopeOrNone.getValue();
627 ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace)) {
628 reportUnsupported(MI,
"Unsupported atomic address space");
632 return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, SIAtomicAddrSpace::ATOMIC,
633 IsCrossAddressSpaceOrdering);
640 if (!(MI->mayLoad() && MI->mayStore()))
644 if (MI->getNumMemOperands() == 0)
645 return SIMemOpInfo();
647 return constructFromMIWithMMO(MI);
656 std::unique_ptr<SICacheControl> SICacheControl::create(
const GCNSubtarget &ST) {
659 return std::make_unique<SIGfx6CacheControl>(
ST);
661 return std::make_unique<SIGfx7CacheControl>(
ST);
662 return std::make_unique<SIGfx10CacheControl>(
ST, ST.isCuModeEnabled());
665 bool SIGfx6CacheControl::enableLoadCacheBypass(
669 assert(MI->mayLoad() && !MI->mayStore());
670 bool Changed =
false;
677 case SIAtomicScope::SYSTEM:
678 case SIAtomicScope::AGENT:
679 Changed |= enableGLCBit(MI);
681 case SIAtomicScope::WORKGROUP:
682 case SIAtomicScope::WAVEFRONT:
683 case SIAtomicScope::SINGLETHREAD:
701 bool SIGfx6CacheControl::enableNonTemporal(
703 assert(MI->mayLoad() ^ MI->mayStore());
704 bool Changed =
false;
707 Changed |= enableGLCBit(MI);
708 Changed |= enableSLCBit(MI);
717 bool Changed =
false;
722 if (Pos == Position::AFTER)
727 case SIAtomicScope::SYSTEM:
728 case SIAtomicScope::AGENT:
729 BuildMI(MBB, MI, DL,
TII->get(AMDGPU::BUFFER_WBINVL1));
732 case SIAtomicScope::WORKGROUP:
733 case SIAtomicScope::WAVEFRONT:
734 case SIAtomicScope::SINGLETHREAD:
749 if (Pos == Position::AFTER)
759 bool IsCrossAddrSpaceOrdering,
761 bool Changed =
false;
766 if (Pos == Position::AFTER)
770 bool LGKMCnt =
false;
774 case SIAtomicScope::SYSTEM:
775 case SIAtomicScope::AGENT:
778 case SIAtomicScope::WORKGROUP:
779 case SIAtomicScope::WAVEFRONT:
780 case SIAtomicScope::SINGLETHREAD:
791 case SIAtomicScope::SYSTEM:
792 case SIAtomicScope::AGENT:
793 case SIAtomicScope::WORKGROUP:
800 LGKMCnt |= IsCrossAddrSpaceOrdering;
802 case SIAtomicScope::WAVEFRONT:
803 case SIAtomicScope::SINGLETHREAD:
814 case SIAtomicScope::SYSTEM:
815 case SIAtomicScope::AGENT:
822 LGKMCnt |= IsCrossAddrSpaceOrdering;
824 case SIAtomicScope::WORKGROUP:
825 case SIAtomicScope::WAVEFRONT:
826 case SIAtomicScope::SINGLETHREAD:
835 if (VMCnt || LGKMCnt) {
836 unsigned WaitCntImmediate =
841 BuildMI(MBB, MI, DL,
TII->get(AMDGPU::S_WAITCNT)).addImm(WaitCntImmediate);
845 if (Pos == Position::AFTER)
855 bool Changed =
false;
863 ? AMDGPU::BUFFER_WBINVL1
864 : AMDGPU::BUFFER_WBINVL1_VOL;
866 if (Pos == Position::AFTER)
871 case SIAtomicScope::SYSTEM:
872 case SIAtomicScope::AGENT:
876 case SIAtomicScope::WORKGROUP:
877 case SIAtomicScope::WAVEFRONT:
878 case SIAtomicScope::SINGLETHREAD:
893 if (Pos == Position::AFTER)
899 bool SIGfx10CacheControl::enableLoadCacheBypass(
903 assert(MI->mayLoad() && !MI->mayStore());
904 bool Changed =
false;
911 case SIAtomicScope::SYSTEM:
912 case SIAtomicScope::AGENT:
913 Changed |= enableGLCBit(MI);
914 Changed |= enableDLCBit(MI);
916 case SIAtomicScope::WORKGROUP:
921 if (!CuMode) Changed |= enableGLCBit(MI);
923 case SIAtomicScope::WAVEFRONT:
924 case SIAtomicScope::SINGLETHREAD:
942 bool SIGfx10CacheControl::enableNonTemporal(
944 assert(MI->mayLoad() ^ MI->mayStore());
945 bool Changed =
false;
947 Changed |= enableSLCBit(MI);
957 bool Changed =
false;
962 if (Pos == Position::AFTER)
967 case SIAtomicScope::SYSTEM:
968 case SIAtomicScope::AGENT:
969 BuildMI(MBB, MI, DL,
TII->get(AMDGPU::BUFFER_GL0_INV));
970 BuildMI(MBB, MI, DL,
TII->get(AMDGPU::BUFFER_GL1_INV));
973 case SIAtomicScope::WORKGROUP:
979 BuildMI(MBB, MI, DL,
TII->get(AMDGPU::BUFFER_GL0_INV));
983 case SIAtomicScope::WAVEFRONT:
984 case SIAtomicScope::SINGLETHREAD:
999 if (Pos == Position::AFTER)
1009 bool IsCrossAddrSpaceOrdering,
1011 bool Changed =
false;
1016 if (Pos == Position::AFTER)
1021 bool LGKMCnt =
false;
1025 case SIAtomicScope::SYSTEM:
1026 case SIAtomicScope::AGENT:
1032 case SIAtomicScope::WORKGROUP:
1045 case SIAtomicScope::WAVEFRONT:
1046 case SIAtomicScope::SINGLETHREAD:
1057 case SIAtomicScope::SYSTEM:
1058 case SIAtomicScope::AGENT:
1059 case SIAtomicScope::WORKGROUP:
1066 LGKMCnt |= IsCrossAddrSpaceOrdering;
1068 case SIAtomicScope::WAVEFRONT:
1069 case SIAtomicScope::SINGLETHREAD:
1080 case SIAtomicScope::SYSTEM:
1081 case SIAtomicScope::AGENT:
1088 LGKMCnt |= IsCrossAddrSpaceOrdering;
1090 case SIAtomicScope::WORKGROUP:
1091 case SIAtomicScope::WAVEFRONT:
1092 case SIAtomicScope::SINGLETHREAD:
1101 if (VMCnt || LGKMCnt) {
1102 unsigned WaitCntImmediate =
1107 BuildMI(MBB, MI, DL,
TII->get(AMDGPU::S_WAITCNT)).addImm(WaitCntImmediate);
1112 BuildMI(MBB, MI, DL,
TII->get(AMDGPU::S_WAITCNT_VSCNT))
1118 if (Pos == Position::AFTER)
1124 bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
1125 if (AtomicPseudoMIs.empty())
1128 for (
auto &MI : AtomicPseudoMIs)
1129 MI->eraseFromParent();
1131 AtomicPseudoMIs.clear();
1135 bool SIMemoryLegalizer::expandLoad(
const SIMemOpInfo &MOI,
1137 assert(MI->mayLoad() && !MI->mayStore());
1139 bool Changed =
false;
1141 if (MOI.isAtomic()) {
1145 Changed |= CC->enableLoadCacheBypass(MI, MOI.getScope(),
1146 MOI.getOrderingAddrSpace());
1150 Changed |= CC->insertWait(MI, MOI.getScope(),
1151 MOI.getOrderingAddrSpace(),
1153 MOI.getIsCrossAddressSpaceOrdering(),
1158 Changed |= CC->insertWait(MI, MOI.getScope(),
1159 MOI.getInstrAddrSpace(),
1161 MOI.getIsCrossAddressSpaceOrdering(),
1163 Changed |= CC->insertCacheInvalidate(MI, MOI.getScope(),
1164 MOI.getOrderingAddrSpace(),
1172 if (MOI.isNonTemporal()) {
1173 Changed |= CC->enableNonTemporal(MI);
1180 bool SIMemoryLegalizer::expandStore(
const SIMemOpInfo &MOI,
1182 assert(!MI->mayLoad() && MI->mayStore());
1184 bool Changed =
false;
1186 if (MOI.isAtomic()) {
1189 Changed |= CC->insertWait(MI, MOI.getScope(),
1190 MOI.getOrderingAddrSpace(),
1192 MOI.getIsCrossAddressSpaceOrdering(),
1199 if (MOI.isNonTemporal()) {
1200 Changed |= CC->enableNonTemporal(MI);
1207 bool SIMemoryLegalizer::expandAtomicFence(
const SIMemOpInfo &MOI,
1211 AtomicPseudoMIs.push_back(MI);
1212 bool Changed =
false;
1214 if (MOI.isAtomic()) {
1226 Changed |= CC->insertWait(MI, MOI.getScope(),
1227 MOI.getOrderingAddrSpace(),
1229 MOI.getIsCrossAddressSpaceOrdering(),
1235 Changed |= CC->insertCacheInvalidate(MI, MOI.getScope(),
1236 MOI.getOrderingAddrSpace(),
1245 bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(
const SIMemOpInfo &MOI,
1247 assert(MI->mayLoad() && MI->mayStore());
1249 bool Changed =
false;
1251 if (MOI.isAtomic()) {
1256 Changed |= CC->insertWait(MI, MOI.getScope(),
1257 MOI.getOrderingAddrSpace(),
1259 MOI.getIsCrossAddressSpaceOrdering(),
1267 Changed |= CC->insertWait(MI, MOI.getScope(),
1268 MOI.getOrderingAddrSpace(),
1271 MOI.getIsCrossAddressSpaceOrdering(),
1273 Changed |= CC->insertCacheInvalidate(MI, MOI.getScope(),
1274 MOI.getOrderingAddrSpace(),
1285 bool Changed =
false;
1287 SIMemOpAccess MOA(MF);
1290 for (
auto &MBB : MF) {
1291 for (
auto MI = MBB.begin(); MI != MBB.end(); ++
MI) {
1295 if (
const auto &MOI = MOA.getLoadInfo(MI))
1296 Changed |= expandLoad(MOI.getValue(),
MI);
1297 else if (
const auto &MOI = MOA.getStoreInfo(MI))
1298 Changed |= expandStore(MOI.getValue(),
MI);
1299 else if (
const auto &MOI = MOA.getAtomicFenceInfo(MI))
1300 Changed |= expandAtomicFence(MOI.getValue(),
MI);
1301 else if (
const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(MI))
1302 Changed |= expandAtomicCmpxchgOrRmw(MOI.getValue(),
MI);
1306 Changed |= removeAtomicPseudoMIs();
1316 return new SIMemoryLegalizer();
Scope
Defines the scope in which this symbol should be visible: Default – Visible in the public interface ...
#define LLVM_MARK_AS_BITMASK_ENUM(LargestValue)
LLVM_MARK_AS_BITMASK_ENUM lets you opt in an individual enum type so you can perform bitwise operatio...
Diagnostic information for unsupported feature in backend.
AMDGPU specific subclass of TargetSubtarget.
Atomic ordering constants.
This class represents lattice values for constants.
unsigned getExpcntBitMask(const IsaVersion &Version)
Instruction set architecture version.
static Optional< LoadInfo > getLoadInfo(const MachineInstr &MI)
const SIInstrInfo * getInstrInfo() const override
MachineModuleInfo & getMMI() const
LLVM_READONLY int getAtomicNoRetOp(uint16_t Opcode)
Address space for region memory. (GDS)
FunctionPass * createSIMemoryLegalizerPass()
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
Address space for global memory (RAT0, VTX0).
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
bool isStrongerThan(AtomicOrdering ao, AtomicOrdering other)
Returns true if ao is stronger than other as defined by the AtomicOrdering lattice, which is based on C++'s definition.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Position
Position to insert a new instruction relative to an existing instruction.
AtomicOrdering
Atomic ordering for LLVM's memory model.
unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
Address space for private memory.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Address space for local memory.
Address space for flat memory.
Represent the analysis usage information of a pass.
SIAtomicScope
The atomic synchronization scopes supported by the AMDGPU target.
AMDGPU Machine Module Info.
void setImm(int64_t immVal)
FunctionPass class - This class is used to implement most global optimizations.
Generation getGeneration() const
Ty & getObjFileInfo()
Keep track of various per-function pieces of information for backends that would like to do so...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
static bool isAtomic(Instruction *I)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
IsaVersion getIsaVersion(StringRef GPU)
SIAtomicAddrSpace
The distinct address spaces supported by the AMDGPU target for atomic memory operation.
MachineOperand class - Representation of each machine instruction operand.
char & SIMemoryLegalizerID
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Synchronized with respect to all concurrently executing threads.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
SIMemOp
Memory operation flags. Can be ORed together.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Interface definition for SIInstrInfo.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Synchronized with respect to signal handlers executing in the same thread.
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
StringRef - Represent a constant reference to a string, i.e.
unsigned getLgkmcntBitMask(const IsaVersion &Version)
unsigned getVmcntBitMask(const IsaVersion &Version)