LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - SIMemoryLegalizer.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 217 268 81.0 %
Date: 2018-10-20 13:21:21 Functions: 24 39 61.5 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- SIMemoryLegalizer.cpp ----------------------------------------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : /// Memory legalizer - implements memory model. More information can be
      12             : /// found here:
      13             : ///   http://llvm.org/docs/AMDGPUUsage.html#memory-model
      14             : //
      15             : //===----------------------------------------------------------------------===//
      16             : 
      17             : #include "AMDGPU.h"
      18             : #include "AMDGPUMachineModuleInfo.h"
      19             : #include "AMDGPUSubtarget.h"
      20             : #include "SIDefines.h"
      21             : #include "SIInstrInfo.h"
      22             : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
      23             : #include "Utils/AMDGPUBaseInfo.h"
      24             : #include "llvm/ADT/BitmaskEnum.h"
      25             : #include "llvm/ADT/None.h"
      26             : #include "llvm/ADT/Optional.h"
      27             : #include "llvm/CodeGen/MachineBasicBlock.h"
      28             : #include "llvm/CodeGen/MachineFunction.h"
      29             : #include "llvm/CodeGen/MachineFunctionPass.h"
      30             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      31             : #include "llvm/CodeGen/MachineMemOperand.h"
      32             : #include "llvm/CodeGen/MachineModuleInfo.h"
      33             : #include "llvm/CodeGen/MachineOperand.h"
      34             : #include "llvm/IR/DebugLoc.h"
      35             : #include "llvm/IR/DiagnosticInfo.h"
      36             : #include "llvm/IR/Function.h"
      37             : #include "llvm/IR/LLVMContext.h"
      38             : #include "llvm/MC/MCInstrDesc.h"
      39             : #include "llvm/Pass.h"
      40             : #include "llvm/Support/AtomicOrdering.h"
      41             : #include "llvm/Support/MathExtras.h"
      42             : #include <cassert>
      43             : #include <list>
      44             : 
      45             : using namespace llvm;
      46             : using namespace llvm::AMDGPU;
      47             : 
      48             : #define DEBUG_TYPE "si-memory-legalizer"
      49             : #define PASS_NAME "SI Memory Legalizer"
      50             : 
      51             : namespace {
      52             : 
      53             : LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
      54             : 
      55             : /// Memory operation flags. Can be ORed together.
      56             : enum class SIMemOp {
      57             :   NONE = 0u,
      58             :   LOAD = 1u << 0,
      59             :   STORE = 1u << 1,
      60             :   LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ STORE)
      61             : };
      62             : 
      63             : /// Position to insert a new instruction relative to an existing
      64             : /// instruction.
      65             : enum class Position {
      66             :   BEFORE,
      67             :   AFTER
      68             : };
      69             : 
      70             : /// The atomic synchronization scopes supported by the AMDGPU target.
      71             : enum class SIAtomicScope {
      72             :   NONE,
      73             :   SINGLETHREAD,
      74             :   WAVEFRONT,
      75             :   WORKGROUP,
      76             :   AGENT,
      77             :   SYSTEM
      78             : };
      79             : 
      80             : /// The distinct address spaces supported by the AMDGPU target for
      81             : /// atomic memory operation. Can be ORed toether.
      82             : enum class SIAtomicAddrSpace {
      83             :   NONE = 0u,
      84             :   GLOBAL = 1u << 0,
      85             :   LDS = 1u << 1,
      86             :   SCRATCH = 1u << 2,
      87             :   GDS = 1u << 3,
      88             :   OTHER = 1u << 4,
      89             : 
      90             :   /// The address spaces that can be accessed by a FLAT instruction.
      91             :   FLAT = GLOBAL | LDS | SCRATCH,
      92             : 
      93             :   /// The address spaces that support atomic instructions.
      94             :   ATOMIC = GLOBAL | LDS | SCRATCH | GDS,
      95             : 
      96             :   /// All address spaces.
      97             :   ALL = GLOBAL | LDS | SCRATCH | GDS | OTHER,
      98             : 
      99             :   LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ ALL)
     100             : };
     101             : 
     102             : /// Sets named bit \p BitName to "true" if present in instruction \p MI.
     103             : /// \returns Returns true if \p MI is modified, false otherwise.
     104             : template <uint16_t BitName>
     105         173 : bool enableNamedBit(const MachineBasicBlock::iterator &MI) {
     106         173 :   int BitIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), BitName);
     107         173 :   if (BitIdx == -1)
     108             :     return false;
     109             : 
     110         173 :   MachineOperand &Bit = MI->getOperand(BitIdx);
     111         173 :   if (Bit.getImm() != 0)
     112             :     return false;
     113             : 
     114             :   Bit.setImm(1);
     115         159 :   return true;
     116             : }
     117          45 : 
     118          45 : class SIMemOpInfo final {
     119          45 : private:
     120             : 
     121             :   friend class SIMemOpAccess;
     122          45 : 
     123          45 :   AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
     124             :   AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic;
     125             :   SIAtomicScope Scope = SIAtomicScope::SYSTEM;
     126             :   SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
     127          45 :   SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
     128             :   bool IsCrossAddressSpaceOrdering = false;
     129         128 :   bool IsNonTemporal = false;
     130         128 : 
     131         128 :   SIMemOpInfo(AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent,
     132             :               SIAtomicScope Scope = SIAtomicScope::SYSTEM,
     133             :               SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::ATOMIC,
     134         128 :               SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::ALL,
     135         128 :               bool IsCrossAddressSpaceOrdering = true,
     136             :               AtomicOrdering FailureOrdering =
     137             :                 AtomicOrdering::SequentiallyConsistent,
     138             :               bool IsNonTemporal = false)
     139         114 :     : Ordering(Ordering), FailureOrdering(FailureOrdering),
     140             :       Scope(Scope), OrderingAddrSpace(OrderingAddrSpace),
     141             :       InstrAddrSpace(InstrAddrSpace),
     142             :       IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
     143             :       IsNonTemporal(IsNonTemporal) {
     144             :     // There is also no cross address space ordering if the ordering
     145             :     // address space is the same as the instruction address space and
     146             :     // only contains a single address space.
     147             :     if ((OrderingAddrSpace == InstrAddrSpace) &&
     148             :         isPowerOf2_32(uint32_t(InstrAddrSpace)))
     149             :       IsCrossAddressSpaceOrdering = false;
     150             :   }
     151             : 
     152             : public:
     153             :   /// \returns Atomic synchronization scope of the machine instruction used to
     154             :   /// create this SIMemOpInfo.
     155             :   SIAtomicScope getScope() const {
     156             :     return Scope;
     157             :   }
     158             : 
     159             :   /// \returns Ordering constraint of the machine instruction used to
     160             :   /// create this SIMemOpInfo.
     161             :   AtomicOrdering getOrdering() const {
     162             :     return Ordering;
     163             :   }
     164             : 
     165             :   /// \returns Failure ordering constraint of the machine instruction used to
     166             :   /// create this SIMemOpInfo.
     167          76 :   AtomicOrdering getFailureOrdering() const {
     168             :     return FailureOrdering;
     169             :   }
     170             : 
     171             :   /// \returns The address spaces be accessed by the machine
     172             :   /// instruction used to create this SiMemOpInfo.
     173             :   SIAtomicAddrSpace getInstrAddrSpace() const {
     174             :     return InstrAddrSpace;
     175             :   }
     176             : 
     177             :   /// \returns The address spaces that must be ordered by the machine
     178             :   /// instruction used to create this SiMemOpInfo.
     179           0 :   SIAtomicAddrSpace getOrderingAddrSpace() const {
     180           0 :     return OrderingAddrSpace;
     181             :   }
     182             : 
     183             :   /// \returns Return true iff memory ordering of operations on
     184             :   /// different address spaces is required.
     185           0 :   bool getIsCrossAddressSpaceOrdering() const {
     186           0 :     return IsCrossAddressSpaceOrdering;
     187             :   }
     188             : 
     189             :   /// \returns True if memory access of the machine instruction used to
     190             :   /// create this SIMemOpInfo is non-temporal, false otherwise.
     191           0 :   bool isNonTemporal() const {
     192           0 :     return IsNonTemporal;
     193             :   }
     194             : 
     195             :   /// \returns True if ordering constraint of the machine instruction used to
     196             :   /// create this SIMemOpInfo is unordered or higher, false otherwise.
     197           0 :   bool isAtomic() const {
     198           0 :     return Ordering != AtomicOrdering::NotAtomic;
     199             :   }
     200             : 
     201             : };
     202             : 
     203           0 : class SIMemOpAccess final {
     204           0 : private:
     205             :   AMDGPUMachineModuleInfo *MMI = nullptr;
     206             : 
     207             :   /// Reports unsupported message \p Msg for \p MI to LLVM context.
     208             :   void reportUnsupported(const MachineBasicBlock::iterator &MI,
     209           0 :                          const char *Msg) const;
     210           0 : 
     211             :   /// Inspects the target synchonization scope \p SSID and determines
     212             :   /// the SI atomic scope it corresponds to, the address spaces it
     213             :   /// covers, and whether the memory ordering applies between address
     214             :   /// spaces.
     215           0 :   Optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
     216           0 :   toSIAtomicScope(SyncScope::ID SSID, SIAtomicAddrSpace InstrScope) const;
     217             : 
     218             :   /// \return Return a bit set of the address spaces accessed by \p AS.
     219             :   SIAtomicAddrSpace toSIAtomicAddrSpace(unsigned AS) const;
     220             : 
     221           0 :   /// \returns Info constructed from \p MI, which has at least machine memory
     222           0 :   /// operand.
     223             :   Optional<SIMemOpInfo> constructFromMIWithMMO(
     224             :       const MachineBasicBlock::iterator &MI) const;
     225             : 
     226             : public:
     227             :   /// Construct class to support accessing the machine memory operands
     228             :   /// of instructions in the machine function \p MF.
     229             :   SIMemOpAccess(MachineFunction &MF);
     230             : 
     231             :   /// \returns Load info if \p MI is a load operation, "None" otherwise.
     232             :   Optional<SIMemOpInfo> getLoadInfo(
     233             :       const MachineBasicBlock::iterator &MI) const;
     234             : 
     235             :   /// \returns Store info if \p MI is a store operation, "None" otherwise.
     236             :   Optional<SIMemOpInfo> getStoreInfo(
     237             :       const MachineBasicBlock::iterator &MI) const;
     238             : 
     239             :   /// \returns Atomic fence info if \p MI is an atomic fence operation,
     240             :   /// "None" otherwise.
     241             :   Optional<SIMemOpInfo> getAtomicFenceInfo(
     242             :       const MachineBasicBlock::iterator &MI) const;
     243             : 
     244             :   /// \returns Atomic cmpxchg/rmw info if \p MI is an atomic cmpxchg or
     245             :   /// rmw operation, "None" otherwise.
     246             :   Optional<SIMemOpInfo> getAtomicCmpxchgOrRmwInfo(
     247             :       const MachineBasicBlock::iterator &MI) const;
     248             : };
     249             : 
     250             : class SICacheControl {
     251             : protected:
     252             : 
     253             :   /// Instruction info.
     254             :   const SIInstrInfo *TII = nullptr;
     255             : 
     256             :   IsaVersion IV;
     257             : 
     258             :   SICacheControl(const GCNSubtarget &ST);
     259             : 
     260             : public:
     261             : 
     262             :   /// Create a cache control for the subtarget \p ST.
     263             :   static std::unique_ptr<SICacheControl> create(const GCNSubtarget &ST);
     264             : 
     265             :   /// Update \p MI memory load instruction to bypass any caches up to
     266             :   /// the \p Scope memory scope for address spaces \p
     267             :   /// AddrSpace. Return true iff the instruction was modified.
     268             :   virtual bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
     269             :                                      SIAtomicScope Scope,
     270             :                                      SIAtomicAddrSpace AddrSpace) const = 0;
     271             : 
     272             :   /// Update \p MI memory instruction to indicate it is
     273             :   /// nontemporal. Return true iff the instruction was modified.
     274             :   virtual bool enableNonTemporal(const MachineBasicBlock::iterator &MI)
     275             :     const = 0;
     276             : 
     277             :   /// Inserts any necessary instructions at position \p Pos relative
     278             :   /// to instruction \p MI to ensure any caches associated with
     279             :   /// address spaces \p AddrSpace for memory scopes up to memory scope
     280             :   /// \p Scope are invalidated. Returns true iff any instructions
     281             :   /// inserted.
     282             :   virtual bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
     283             :                                      SIAtomicScope Scope,
     284             :                                      SIAtomicAddrSpace AddrSpace,
     285             :                                      Position Pos) const = 0;
     286             : 
     287             :   /// Inserts any necessary instructions at position \p Pos relative
     288             :   /// to instruction \p MI to ensure memory instructions of kind \p Op
     289             :   /// associated with address spaces \p AddrSpace have completed as
     290             :   /// observed by other memory instructions executing in memory scope
     291             :   /// \p Scope. \p IsCrossAddrSpaceOrdering indicates if the memory
     292             :   /// ordering is between address spaces. Returns true iff any
     293             :   /// instructions inserted.
     294             :   virtual bool insertWait(MachineBasicBlock::iterator &MI,
     295             :                           SIAtomicScope Scope,
     296             :                           SIAtomicAddrSpace AddrSpace,
     297             :                           SIMemOp Op,
     298             :                           bool IsCrossAddrSpaceOrdering,
     299             :                           Position Pos) const = 0;
     300             : 
     301             :   /// Virtual destructor to allow derivations to be deleted.
     302             :   virtual ~SICacheControl() = default;
     303             : 
     304             : };
     305             : 
     306             : class SIGfx6CacheControl : public SICacheControl {
     307             : protected:
     308             : 
     309             :   /// Sets GLC bit to "true" if present in \p MI. Returns true if \p MI
     310             :   /// is modified, false otherwise.
     311             :   bool enableGLCBit(const MachineBasicBlock::iterator &MI) const {
     312             :     return enableNamedBit<AMDGPU::OpName::glc>(MI);
     313             :   }
     314             : 
     315             :   /// Sets SLC bit to "true" if present in \p MI. Returns true if \p MI
     316             :   /// is modified, false otherwise.
     317             :   bool enableSLCBit(const MachineBasicBlock::iterator &MI) const {
     318             :     return enableNamedBit<AMDGPU::OpName::slc>(MI);
     319             :   }
     320             : 
     321             : public:
     322             : 
     323             :   SIGfx6CacheControl(const GCNSubtarget &ST) : SICacheControl(ST) {};
     324             : 
     325             :   bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
     326             :                              SIAtomicScope Scope,
     327             :                              SIAtomicAddrSpace AddrSpace) const override;
     328             : 
     329             :   bool enableNonTemporal(const MachineBasicBlock::iterator &MI) const override;
     330             : 
     331             :   bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
     332             :                              SIAtomicScope Scope,
     333             :                              SIAtomicAddrSpace AddrSpace,
     334             :                              Position Pos) const override;
     335           0 : 
     336         128 :   bool insertWait(MachineBasicBlock::iterator &MI,
     337             :                   SIAtomicScope Scope,
     338             :                   SIAtomicAddrSpace AddrSpace,
     339             :                   SIMemOp Op,
     340             :                   bool IsCrossAddrSpaceOrdering,
     341           0 :                   Position Pos) const override;
     342          45 : };
     343             : 
     344             : class SIGfx7CacheControl : public SIGfx6CacheControl {
     345             : public:
     346             : 
     347       19828 :   SIGfx7CacheControl(const GCNSubtarget &ST) : SIGfx6CacheControl(ST) {};
     348             : 
     349             :   bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
     350             :                              SIAtomicScope Scope,
     351             :                              SIAtomicAddrSpace AddrSpace,
     352             :                              Position Pos) const override;
     353             : 
     354             : };
     355             : 
     356             : class SIMemoryLegalizer final : public MachineFunctionPass {
     357             : private:
     358             : 
     359             :   /// Cache Control.
     360             :   std::unique_ptr<SICacheControl> CC = nullptr;
     361             : 
     362             :   /// List of atomic pseudo instructions.
     363             :   std::list<MachineBasicBlock::iterator> AtomicPseudoMIs;
     364             : 
     365             :   /// Return true iff instruction \p MI is a atomic instruction that
     366             :   /// returns a result.
     367             :   bool isAtomicRet(const MachineInstr &MI) const {
     368           0 :     return AMDGPU::getAtomicNoRetOp(MI.getOpcode()) != -1;
     369             :   }
     370             : 
     371       13826 :   /// Removes all processed atomic pseudo instructions from the current
     372             :   /// function. Returns true if current function is modified, false otherwise.
     373             :   bool removeAtomicPseudoMIs();
     374             : 
     375             :   /// Expands load operation \p MI. Returns true if instructions are
     376             :   /// added/deleted or \p MI is modified, false otherwise.
     377             :   bool expandLoad(const SIMemOpInfo &MOI,
     378             :                   MachineBasicBlock::iterator &MI);
     379             :   /// Expands store operation \p MI. Returns true if instructions are
     380             :   /// added/deleted or \p MI is modified, false otherwise.
     381             :   bool expandStore(const SIMemOpInfo &MOI,
     382             :                    MachineBasicBlock::iterator &MI);
     383             :   /// Expands atomic fence operation \p MI. Returns true if
     384             :   /// instructions are added/deleted or \p MI is modified, false otherwise.
     385             :   bool expandAtomicFence(const SIMemOpInfo &MOI,
     386             :                          MachineBasicBlock::iterator &MI);
     387             :   /// Expands atomic cmpxchg or rmw operation \p MI. Returns true if
     388             :   /// instructions are added/deleted or \p MI is modified, false otherwise.
     389             :   bool expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
     390             :                                 MachineBasicBlock::iterator &MI);
     391           0 : 
     392        1140 : public:
     393             :   static char ID;
     394             : 
     395             :   SIMemoryLegalizer() : MachineFunctionPass(ID) {}
     396             : 
     397             :   void getAnalysisUsage(AnalysisUsage &AU) const override {
     398             :     AU.setPreservesCFG();
     399             :     MachineFunctionPass::getAnalysisUsage(AU);
     400             :   }
     401             : 
     402             :   StringRef getPassName() const override {
     403             :     return PASS_NAME;
     404             :   }
     405             : 
     406             :   bool runOnMachineFunction(MachineFunction &MF) override;
     407             : };
     408             : 
     409             : } // end namespace anonymous
     410             : 
     411             : void SIMemOpAccess::reportUnsupported(const MachineBasicBlock::iterator &MI,
     412             :                                       const char *Msg) const {
     413             :   const Function &Func = MI->getParent()->getParent()->getFunction();
     414             :   DiagnosticInfoUnsupported Diag(Func, Msg, MI->getDebugLoc());
     415             :   Func.getContext().diagnose(Diag);
     416             : }
     417             : 
     418             : Optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
     419        1971 : SIMemOpAccess::toSIAtomicScope(SyncScope::ID SSID,
     420             :                                SIAtomicAddrSpace InstrScope) const {
     421        1959 :   /// TODO: For now assume OpenCL memory model which treats each
     422        1959 :   /// address space as having a separate happens-before relation, and
     423        1959 :   /// so an instruction only has ordering with respect to the address
     424        1959 :   /// space it accesses, and if it accesses multiple address spaces it
     425             :   /// does not require ordering of operations in different address
     426        1959 :   /// spaces.
     427        1959 :  if (SSID == SyncScope::System)
     428             :     return std::make_tuple(SIAtomicScope::SYSTEM,
     429             :                            SIAtomicAddrSpace::ATOMIC & InstrScope,
     430             :                            false);
     431             :   if (SSID == MMI->getAgentSSID())
     432             :     return std::make_tuple(SIAtomicScope::AGENT,
     433             :                            SIAtomicAddrSpace::ATOMIC & InstrScope,
     434             :                            false);
     435           0 :   if (SSID == MMI->getWorkgroupSSID())
     436             :     return std::make_tuple(SIAtomicScope::WORKGROUP,
     437           0 :                            SIAtomicAddrSpace::ATOMIC & InstrScope,
     438           0 :                            false);
     439           0 :   if (SSID == MMI->getWavefrontSSID())
     440           0 :     return std::make_tuple(SIAtomicScope::WAVEFRONT,
     441             :                            SIAtomicAddrSpace::ATOMIC & InstrScope,
     442             :                            false);
     443        1514 :   if (SSID == SyncScope::SingleThread)
     444             :     return std::make_tuple(SIAtomicScope::SINGLETHREAD,
     445             :                            SIAtomicAddrSpace::ATOMIC & InstrScope,
     446             :                            false);
     447             :   /// TODO: To support HSA Memory Model need to add additional memory
     448             :   /// scopes that specify that do require cross address space
     449             :   /// ordering.
     450             :   return None;
     451        1514 : }
     452             : 
     453             : SIAtomicAddrSpace SIMemOpAccess::toSIAtomicAddrSpace(unsigned AS) const {
     454             :   if (AS == AMDGPUAS::FLAT_ADDRESS)
     455         321 :     return SIAtomicAddrSpace::FLAT;
     456             :   if (AS == AMDGPUAS::GLOBAL_ADDRESS)
     457             :     return SIAtomicAddrSpace::GLOBAL;
     458             :   if (AS == AMDGPUAS::LOCAL_ADDRESS)
     459         244 :     return SIAtomicAddrSpace::LDS;
     460             :   if (AS == AMDGPUAS::PRIVATE_ADDRESS)
     461             :     return SIAtomicAddrSpace::SCRATCH;
     462             :   if (AS == AMDGPUAS::REGION_ADDRESS)
     463         152 :     return SIAtomicAddrSpace::GDS;
     464             : 
     465             :   return SIAtomicAddrSpace::OTHER;
     466             : }
     467          76 : 
     468             : SIMemOpAccess::SIMemOpAccess(MachineFunction &MF) {
     469             :   MMI = &MF.getMMI().getObjFileInfo<AMDGPUMachineModuleInfo>();
     470             : }
     471             : 
     472             : Optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
     473             :     const MachineBasicBlock::iterator &MI) const {
     474             :   assert(MI->getNumMemOperands() > 0);
     475             : 
     476             :   SyncScope::ID SSID = SyncScope::SingleThread;
     477           0 :   AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
     478       48177 :   AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic;
     479           0 :   SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
     480       46458 :   bool IsNonTemporal = true;
     481           0 : 
     482       14136 :   // Validator should check whether or not MMOs cover the entire set of
     483           0 :   // locations accessed by the memory instruction.
     484       14136 :   for (const auto &MMO : MI->memoperands()) {
     485           0 :     IsNonTemporal &= MMO->isNonTemporal();
     486        1224 :     InstrAddrSpace |=
     487           0 :       toSIAtomicAddrSpace(MMO->getPointerInfo().getAddrSpace());
     488             :     AtomicOrdering OpOrdering = MMO->getOrdering();
     489             :     if (OpOrdering != AtomicOrdering::NotAtomic) {
     490             :       const auto &IsSyncScopeInclusion =
     491             :           MMI->isSyncScopeInclusion(SSID, MMO->getSyncScopeID());
     492       19828 :       if (!IsSyncScopeInclusion) {
     493       39656 :         reportUnsupported(MI,
     494             :           "Unsupported non-inclusive atomic synchronization scope");
     495             :         return None;
     496       48132 :       }
     497             : 
     498             :       SSID = IsSyncScopeInclusion.getValue() ? SSID : MMO->getSyncScopeID();
     499             :       Ordering =
     500             :           isStrongerThan(Ordering, OpOrdering) ?
     501             :               Ordering : MMO->getOrdering();
     502             :       assert(MMO->getFailureOrdering() != AtomicOrdering::Release &&
     503             :              MMO->getFailureOrdering() != AtomicOrdering::AcquireRelease);
     504             :       FailureOrdering =
     505             :           isStrongerThan(FailureOrdering, MMO->getFailureOrdering()) ?
     506             :               FailureOrdering : MMO->getFailureOrdering();
     507             :     }
     508       96301 :   }
     509       48177 : 
     510             :   SIAtomicScope Scope = SIAtomicScope::NONE;
     511       48177 :   SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
     512       48177 :   bool IsCrossAddressSpaceOrdering = false;
     513       48177 :   if (Ordering != AtomicOrdering::NotAtomic) {
     514             :     auto ScopeOrNone = toSIAtomicScope(SSID, InstrAddrSpace);
     515        2890 :     if (!ScopeOrNone) {
     516        1445 :       reportUnsupported(MI, "Unsupported atomic synchronization scope");
     517           8 :       return None;
     518             :     }
     519             :     std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
     520             :       ScopeOrNone.getValue();
     521             :     if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
     522        1437 :         ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace)) {
     523             :       reportUnsupported(MI, "Unsupported atomic address space");
     524        1437 :       return None;
     525        1437 :     }
     526             :   }
     527             :   return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, InstrAddrSpace,
     528             :                      IsCrossAddressSpaceOrdering, FailureOrdering, IsNonTemporal);
     529        2874 : }
     530             : 
     531             : Optional<SIMemOpInfo> SIMemOpAccess::getLoadInfo(
     532             :     const MachineBasicBlock::iterator &MI) const {
     533             :   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
     534       48124 : 
     535       48124 :   if (!(MI->mayLoad() && !MI->mayStore()))
     536       48124 :     return None;
     537       48124 : 
     538        1436 :   // Be conservative if there are no memory operands.
     539        1436 :   if (MI->getNumMemOperands() == 0)
     540           0 :     return SIMemOpInfo();
     541             : 
     542             :   return constructFromMIWithMMO(MI);
     543             : }
     544             : 
     545        1436 : Optional<SIMemOpInfo> SIMemOpAccess::getStoreInfo(
     546             :     const MachineBasicBlock::iterator &MI) const {
     547           4 :   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
     548             : 
     549             :   if (!(!MI->mayLoad() && MI->mayStore()))
     550             :     return None;
     551       48120 : 
     552             :   // Be conservative if there are no memory operands.
     553             :   if (MI->getNumMemOperands() == 0)
     554             :     return SIMemOpInfo();
     555       48236 : 
     556             :   return constructFromMIWithMMO(MI);
     557             : }
     558             : 
     559       48236 : Optional<SIMemOpInfo> SIMemOpAccess::getAtomicFenceInfo(
     560             :     const MachineBasicBlock::iterator &MI) const {
     561             :   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
     562             : 
     563       17774 :   if (MI->getOpcode() != AMDGPU::ATOMIC_FENCE)
     564             :     return None;
     565             : 
     566       17760 :   AtomicOrdering Ordering =
     567             :     static_cast<AtomicOrdering>(MI->getOperand(0).getImm());
     568             : 
     569       30465 :   SyncScope::ID SSID = static_cast<SyncScope::ID>(MI->getOperand(1).getImm());
     570             :   auto ScopeOrNone = toSIAtomicScope(SSID, SIAtomicAddrSpace::ATOMIC);
     571             :   if (!ScopeOrNone) {
     572             :     reportUnsupported(MI, "Unsupported atomic synchronization scope");
     573       30465 :     return None;
     574             :   }
     575             : 
     576             :   SIAtomicScope Scope = SIAtomicScope::NONE;
     577       28846 :   SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
     578             :   bool IsCrossAddressSpaceOrdering = false;
     579             :   std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
     580       28834 :     ScopeOrNone.getValue();
     581             : 
     582             :   if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
     583        1622 :       ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace)) {
     584             :     reportUnsupported(MI, "Unsupported atomic address space");
     585             :     return None;
     586             :   }
     587        3244 : 
     588             :   return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, SIAtomicAddrSpace::ATOMIC,
     589             :                      IsCrossAddressSpaceOrdering);
     590             : }
     591          78 : 
     592             : Optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
     593          78 :     const MachineBasicBlock::iterator &MI) const {
     594          78 :   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
     595          78 : 
     596           2 :   if (!(MI->mayLoad() && MI->mayStore()))
     597             :     return None;
     598             : 
     599             :   // Be conservative if there are no memory operands.
     600             :   if (MI->getNumMemOperands() == 0)
     601             :     return SIMemOpInfo();
     602             : 
     603             :   return constructFromMIWithMMO(MI);
     604             : }
     605             : 
     606          76 : SICacheControl::SICacheControl(const GCNSubtarget &ST) {
     607             :   TII = ST.getInstrInfo();
     608           0 :   IV = getIsaVersion(ST.getCPU());
     609             : }
     610             : 
     611             : /* static */
     612             : std::unique_ptr<SICacheControl> SICacheControl::create(const GCNSubtarget &ST) {
     613             :   GCNSubtarget::Generation Generation = ST.getGeneration();
     614             :   if (Generation <= AMDGPUSubtarget::SOUTHERN_ISLANDS)
     615             :     return make_unique<SIGfx6CacheControl>(ST);
     616        1546 :   return make_unique<SIGfx7CacheControl>(ST);
     617             : }
     618             : 
     619             : bool SIGfx6CacheControl::enableLoadCacheBypass(
     620        1546 :     const MachineBasicBlock::iterator &MI,
     621             :     SIAtomicScope Scope,
     622             :     SIAtomicAddrSpace AddrSpace) const {
     623             :   assert(MI->mayLoad() && !MI->mayStore());
     624        1538 :   bool Changed = false;
     625             : 
     626             :   if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
     627        1538 :     /// TODO: Do not set glc for rmw atomic operations as they
     628             :     /// implicitly bypass the L1 cache.
     629             : 
     630       19828 :     switch (Scope) {
     631       19828 :     case SIAtomicScope::SYSTEM:
     632       19828 :     case SIAtomicScope::AGENT:
     633       19828 :       Changed |= enableGLCBit(MI);
     634             :       break;
     635             :     case SIAtomicScope::WORKGROUP:
     636       19828 :     case SIAtomicScope::WAVEFRONT:
     637       19828 :     case SIAtomicScope::SINGLETHREAD:
     638       19828 :       // No cache to bypass.
     639        6002 :       break;
     640       13826 :     default:
     641             :       llvm_unreachable("Unsupported synchronization scope");
     642             :     }
     643           0 :   }
     644             : 
     645             :   /// The scratch address space does not need the global memory caches
     646             :   /// to be bypassed as all memory operations by the same thread are
     647             :   /// sequentially consistent, and no other thread can access scratch
     648             :   /// memory.
     649             : 
     650         119 :   /// Other address spaces do not hava a cache.
     651             : 
     652             :   return Changed;
     653             : }
     654         119 : 
     655             : bool SIGfx6CacheControl::enableNonTemporal(
     656             :     const MachineBasicBlock::iterator &MI) const {
     657             :   assert(MI->mayLoad() ^ MI->mayStore());
     658           0 :   bool Changed = false;
     659             : 
     660             :   /// TODO: Do not enableGLCBit if rmw atomic.
     661             :   Changed |= enableGLCBit(MI);
     662             :   Changed |= enableSLCBit(MI);
     663             : 
     664           0 :   return Changed;
     665           0 : }
     666             : 
     667             : bool SIGfx6CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
     668             :                                                SIAtomicScope Scope,
     669             :                                                SIAtomicAddrSpace AddrSpace,
     670             :                                                Position Pos) const {
     671             :   bool Changed = false;
     672             : 
     673             :   MachineBasicBlock &MBB = *MI->getParent();
     674             :   DebugLoc DL = MI->getDebugLoc();
     675             : 
     676           0 :   if (Pos == Position::AFTER)
     677             :     ++MI;
     678             : 
     679           0 :   if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
     680             :     switch (Scope) {
     681             :     case SIAtomicScope::SYSTEM:
     682             :     case SIAtomicScope::AGENT:
     683             :       BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBINVL1));
     684             :       Changed = true;
     685             :       break;
     686          45 :     case SIAtomicScope::WORKGROUP:
     687             :     case SIAtomicScope::WAVEFRONT:
     688           0 :     case SIAtomicScope::SINGLETHREAD:
     689             :       // No cache to invalidate.
     690             :       break;
     691         133 :     default:
     692             :       llvm_unreachable("Unsupported synchronization scope");
     693             :     }
     694             :   }
     695             : 
     696             :   /// The scratch address space does not need the global memory cache
     697         133 :   /// to be flushed as all memory operations by the same thread are
     698             :   /// sequentially consistent, and no other thread can access scratch
     699             :   /// memory.
     700         133 : 
     701             :   /// Other address spaces do not hava a cache.
     702             : 
     703         133 :   if (Pos == Position::AFTER)
     704         133 :     --MI;
     705         124 : 
     706             :   return Changed;
     707         248 : }
     708             : 
     709         124 : bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
     710             :                                     SIAtomicScope Scope,
     711             :                                     SIAtomicAddrSpace AddrSpace,
     712             :                                     SIMemOp Op,
     713             :                                     bool IsCrossAddrSpaceOrdering,
     714             :                                     Position Pos) const {
     715           0 :   bool Changed = false;
     716           0 : 
     717             :   MachineBasicBlock &MBB = *MI->getParent();
     718             :   DebugLoc DL = MI->getDebugLoc();
     719             : 
     720             :   if (Pos == Position::AFTER)
     721             :     ++MI;
     722             : 
     723             :   bool VMCnt = false;
     724             :   bool LGKMCnt = false;
     725             :   bool EXPCnt = false;
     726             : 
     727         133 :   if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
     728             :     switch (Scope) {
     729             :     case SIAtomicScope::SYSTEM:
     730         133 :     case SIAtomicScope::AGENT:
     731             :       VMCnt = true;
     732             :       break;
     733        2623 :     case SIAtomicScope::WORKGROUP:
     734             :     case SIAtomicScope::WAVEFRONT:
     735             :     case SIAtomicScope::SINGLETHREAD:
     736             :       // The L1 cache keeps all memory operations in order for
     737             :       // wavefronts in the same work-group.
     738             :       break;
     739             :     default:
     740             :       llvm_unreachable("Unsupported synchronization scope");
     741        2623 :     }
     742             :   }
     743             : 
     744        2623 :   if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
     745             :     switch (Scope) {
     746             :     case SIAtomicScope::SYSTEM:
     747             :     case SIAtomicScope::AGENT:
     748             :     case SIAtomicScope::WORKGROUP:
     749             :       // If no cross address space ordering then an LDS waitcnt is not
     750             :       // needed as LDS operations for all waves are executed in a
     751        2623 :       // total global ordering as observed by all waves. Required if
     752        2623 :       // also synchronizing with global/GDS memory as LDS operations
     753        2383 :       // could be reordered with respect to later global/GDS memory
     754             :       // operations of the same wave.
     755             :       LGKMCnt = IsCrossAddrSpaceOrdering;
     756        2383 :       break;
     757             :     case SIAtomicScope::WAVEFRONT:
     758             :     case SIAtomicScope::SINGLETHREAD:
     759             :       // The LDS keeps all memory operations in order for
     760             :       // the same wavesfront.
     761             :       break;
     762             :     default:
     763           0 :       llvm_unreachable("Unsupported synchronization scope");
     764           0 :     }
     765             :   }
     766             : 
     767             :   if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
     768        2623 :     switch (Scope) {
     769        1387 :     case SIAtomicScope::SYSTEM:
     770        1238 :     case SIAtomicScope::AGENT:
     771             :       // If no cross address space ordering then an GDS waitcnt is not
     772             :       // needed as GDS operations for all waves are executed in a
     773             :       // total global ordering as observed by all waves. Required if
     774             :       // also synchronizing with global/LDS memory as GDS operations
     775             :       // could be reordered with respect to later global/LDS memory
     776             :       // operations of the same wave.
     777             :       EXPCnt = IsCrossAddrSpaceOrdering;
     778             :       break;
     779             :     case SIAtomicScope::WORKGROUP:
     780        1238 :     case SIAtomicScope::WAVEFRONT:
     781             :     case SIAtomicScope::SINGLETHREAD:
     782             :       // The GDS keeps all memory operations in order for
     783             :       // the same work-group.
     784             :       break;
     785             :     default:
     786           0 :       llvm_unreachable("Unsupported synchronization scope");
     787           0 :     }
     788             :   }
     789             : 
     790             :   if (VMCnt || LGKMCnt || EXPCnt) {
     791        2623 :     unsigned WaitCntImmediate =
     792         116 :       AMDGPU::encodeWaitcnt(IV,
     793          64 :                             VMCnt ? 0 : getVmcntBitMask(IV),
     794             :                             EXPCnt ? 0 : getExpcntBitMask(IV),
     795             :                             LGKMCnt ? 0 : getLgkmcntBitMask(IV));
     796             :     BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(WaitCntImmediate);
     797             :     Changed = true;
     798             :   }
     799             : 
     800             :   if (Pos == Position::AFTER)
     801             :     --MI;
     802          64 : 
     803             :   return Changed;
     804             : }
     805             : 
     806             : bool SIGfx7CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
     807             :                                                SIAtomicScope Scope,
     808             :                                                SIAtomicAddrSpace AddrSpace,
     809           0 :                                                Position Pos) const {
     810           0 :   bool Changed = false;
     811             : 
     812             :   MachineBasicBlock &MBB = *MI->getParent();
     813             :   DebugLoc DL = MI->getDebugLoc();
     814        2623 : 
     815             :   if (Pos == Position::AFTER)
     816        7069 :     ++MI;
     817           0 : 
     818        2343 :   if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
     819        2343 :     switch (Scope) {
     820        4766 :     case SIAtomicScope::SYSTEM:
     821             :     case SIAtomicScope::AGENT:
     822             :       BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBINVL1_VOL));
     823             :       Changed = true;
     824        2623 :       break;
     825             :     case SIAtomicScope::WORKGROUP:
     826             :     case SIAtomicScope::WAVEFRONT:
     827        2623 :     case SIAtomicScope::SINGLETHREAD:
     828             :       // No cache to invalidate.
     829             :       break;
     830        1159 :     default:
     831             :       llvm_unreachable("Unsupported synchronization scope");
     832             :     }
     833             :   }
     834             : 
     835             :   /// The scratch address space does not need the global memory cache
     836        1159 :   /// to be flushed as all memory operations by the same thread are
     837             :   /// sequentially consistent, and no other thread can access scratch
     838             :   /// memory.
     839        1159 : 
     840             :   /// Other address spaces do not hava a cache.
     841             : 
     842        1159 :   if (Pos == Position::AFTER)
     843        1159 :     --MI;
     844        1043 : 
     845             :   return Changed;
     846        2086 : }
     847             : 
     848        1043 : bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
     849             :   if (AtomicPseudoMIs.empty())
     850             :     return false;
     851             : 
     852             :   for (auto &MI : AtomicPseudoMIs)
     853             :     MI->eraseFromParent();
     854           0 : 
     855           0 :   AtomicPseudoMIs.clear();
     856             :   return true;
     857             : }
     858             : 
     859             : bool SIMemoryLegalizer::expandLoad(const SIMemOpInfo &MOI,
     860             :                                    MachineBasicBlock::iterator &MI) {
     861             :   assert(MI->mayLoad() && !MI->mayStore());
     862             : 
     863             :   bool Changed = false;
     864             : 
     865             :   if (MOI.isAtomic()) {
     866        1159 :     if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
     867             :         MOI.getOrdering() == AtomicOrdering::Acquire ||
     868             :         MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
     869        1159 :       Changed |= CC->enableLoadCacheBypass(MI, MOI.getScope(),
     870             :                                            MOI.getOrderingAddrSpace());
     871             :     }
     872       19828 : 
     873       19828 :     if (MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
     874             :       Changed |= CC->insertWait(MI, MOI.getScope(),
     875             :                                 MOI.getOrderingAddrSpace(),
     876         144 :                                 SIMemOp::LOAD | SIMemOp::STORE,
     877          76 :                                 MOI.getIsCrossAddressSpaceOrdering(),
     878             :                                 Position::BEFORE);
     879             : 
     880          68 :     if (MOI.getOrdering() == AtomicOrdering::Acquire ||
     881             :         MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
     882             :       Changed |= CC->insertWait(MI, MOI.getScope(),
     883       17771 :                                 MOI.getInstrAddrSpace(),
     884             :                                 SIMemOp::LOAD,
     885             :                                 MOI.getIsCrossAddressSpaceOrdering(),
     886             :                                 Position::AFTER);
     887             :       Changed |= CC->insertCacheInvalidate(MI, MOI.getScope(),
     888             :                                            MOI.getOrderingAddrSpace(),
     889       17771 :                                            Position::AFTER);
     890         119 :     }
     891         238 : 
     892             :     return Changed;
     893         119 :   }
     894             : 
     895             :   // Atomic instructions do not have the nontemporal attribute.
     896             :   if (MOI.isNonTemporal()) {
     897         139 :     Changed |= CC->enableNonTemporal(MI);
     898          79 :     return Changed;
     899             :   }
     900             : 
     901          79 :   return Changed;
     902             : }
     903             : 
     904         139 : bool SIMemoryLegalizer::expandStore(const SIMemOpInfo &MOI,
     905             :                                     MachineBasicBlock::iterator &MI) {
     906          99 :   assert(!MI->mayLoad() && MI->mayStore());
     907             : 
     908             :   bool Changed = false;
     909          99 : 
     910             :   if (MOI.isAtomic()) {
     911          99 :     if (MOI.getOrdering() == AtomicOrdering::Release ||
     912             :         MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
     913          99 :       Changed |= CC->insertWait(MI, MOI.getScope(),
     914             :                                 MOI.getOrderingAddrSpace(),
     915             :                                 SIMemOp::LOAD | SIMemOp::STORE,
     916         139 :                                 MOI.getIsCrossAddressSpaceOrdering(),
     917             :                                 Position::BEFORE);
     918             : 
     919             :     return Changed;
     920       17632 :   }
     921             : 
     922          21 :   // Atomic instructions do not have the nontemporal attribute.
     923             :   if (MOI.isNonTemporal()) {
     924             :     Changed |= CC->enableNonTemporal(MI);
     925             :     return Changed;
     926             :   }
     927             : 
     928       28843 :   return Changed;
     929             : }
     930             : 
     931             : bool SIMemoryLegalizer::expandAtomicFence(const SIMemOpInfo &MOI,
     932             :                                           MachineBasicBlock::iterator &MI) {
     933             :   assert(MI->getOpcode() == AMDGPU::ATOMIC_FENCE);
     934       28843 : 
     935         139 :   AtomicPseudoMIs.push_back(MI);
     936             :   bool Changed = false;
     937          99 : 
     938             :   if (MOI.isAtomic()) {
     939             :     if (MOI.getOrdering() == AtomicOrdering::Acquire ||
     940          99 :         MOI.getOrdering() == AtomicOrdering::Release ||
     941             :         MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
     942             :         MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
     943         139 :       /// TODO: This relies on a barrier always generating a waitcnt
     944             :       /// for LDS to ensure it is not reordered with the completion of
     945             :       /// the proceeding LDS operations. If barrier had a memory
     946             :       /// ordering and memory scope, then library does not need to
     947       28704 :       /// generate a fence. Could add support in this file for
     948             :       /// barrier. SIInsertWaitcnt.cpp could then stop unconditionally
     949          24 :       /// adding waitcnt before a S_BARRIER.
     950             :       Changed |= CC->insertWait(MI, MOI.getScope(),
     951             :                                 MOI.getOrderingAddrSpace(),
     952             :                                 SIMemOp::LOAD | SIMemOp::STORE,
     953             :                                 MOI.getIsCrossAddressSpaceOrdering(),
     954             :                                 Position::BEFORE);
     955          76 : 
     956             :     if (MOI.getOrdering() == AtomicOrdering::Acquire ||
     957             :         MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
     958             :         MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
     959          76 :       Changed |= CC->insertCacheInvalidate(MI, MOI.getScope(),
     960             :                                            MOI.getOrderingAddrSpace(),
     961             :                                            Position::BEFORE);
     962          76 : 
     963          53 :     return Changed;
     964          30 :   }
     965          91 : 
     966             :   return Changed;
     967             : }
     968             : 
     969             : bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
     970             :   MachineBasicBlock::iterator &MI) {
     971             :   assert(MI->mayLoad() && MI->mayStore());
     972             : 
     973             :   bool Changed = false;
     974          76 : 
     975             :   if (MOI.isAtomic()) {
     976             :     if (MOI.getOrdering() == AtomicOrdering::Release ||
     977          76 :         MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
     978             :         MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
     979             :         MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent)
     980          53 :       Changed |= CC->insertWait(MI, MOI.getScope(),
     981         114 :                                 MOI.getOrderingAddrSpace(),
     982             :                                 SIMemOp::LOAD | SIMemOp::STORE,
     983          53 :                                 MOI.getIsCrossAddressSpaceOrdering(),
     984             :                                 Position::BEFORE);
     985          53 : 
     986             :     if (MOI.getOrdering() == AtomicOrdering::Acquire ||
     987          76 :         MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
     988             :         MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
     989             :         MOI.getFailureOrdering() == AtomicOrdering::Acquire ||
     990             :         MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) {
     991             :       Changed |= CC->insertWait(MI, MOI.getScope(),
     992             :                                 MOI.getOrderingAddrSpace(),
     993        1532 :                                 isAtomicRet(*MI) ? SIMemOp::LOAD :
     994             :                                                    SIMemOp::STORE,
     995             :                                 MOI.getIsCrossAddressSpaceOrdering(),
     996             :                                 Position::AFTER);
     997             :       Changed |= CC->insertCacheInvalidate(MI, MOI.getScope(),
     998             :                                            MOI.getOrderingAddrSpace(),
     999        1532 :                                            Position::AFTER);
    1000        1150 :     }
    1001        1084 : 
    1002        1180 :     return Changed;
    1003          50 :   }
    1004        1130 : 
    1005             :   return Changed;
    1006             : }
    1007        1130 : 
    1008             : bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) {
    1009             :   bool Changed = false;
    1010        1150 : 
    1011        1084 :   SIMemOpAccess MOA(MF);
    1012          50 :   CC = SICacheControl::create(MF.getSubtarget<GCNSubtarget>());
    1013        1230 : 
    1014             :   for (auto &MBB : MF) {
    1015        1757 :     for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) {
    1016             :       if (!(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic))
    1017             :         continue;
    1018             : 
    1019        1140 :       if (const auto &MOI = MOA.getLoadInfo(MI))
    1020             :         Changed |= expandLoad(MOI.getValue(), MI);
    1021        1140 :       else if (const auto &MOI = MOA.getStoreInfo(MI))
    1022             :         Changed |= expandStore(MOI.getValue(), MI);
    1023        1140 :       else if (const auto &MOI = MOA.getAtomicFenceInfo(MI))
    1024             :         Changed |= expandAtomicFence(MOI.getValue(), MI);
    1025             :       else if (const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(MI))
    1026        1180 :         Changed |= expandAtomicCmpxchgOrRmw(MOI.getValue(), MI);
    1027             :     }
    1028             :   }
    1029             : 
    1030             :   Changed |= removeAtomicPseudoMIs();
    1031             :   return Changed;
    1032       19828 : }
    1033             : 
    1034             : INITIALIZE_PASS(SIMemoryLegalizer, DEBUG_TYPE, PASS_NAME, false, false)
    1035             : 
    1036       39656 : char SIMemoryLegalizer::ID = 0;
    1037             : char &llvm::SIMemoryLegalizerID = SIMemoryLegalizer::ID;
    1038       42076 : 
    1039      340944 : FunctionPass *llvm::createSIMemoryLegalizerPass() {
    1040      318696 :   return new SIMemoryLegalizer();
    1041             : }

Generated by: LCOV version 1.13