LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - SIMemoryLegalizer.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 212 232 91.4 %
Date: 2018-07-13 00:08:38 Functions: 29 33 87.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- SIMemoryLegalizer.cpp ----------------------------------------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : /// Memory legalizer - implements memory model. More information can be
      12             : /// found here:
      13             : ///   http://llvm.org/docs/AMDGPUUsage.html#memory-model
      14             : //
      15             : //===----------------------------------------------------------------------===//
      16             : 
      17             : #include "AMDGPU.h"
      18             : #include "AMDGPUMachineModuleInfo.h"
      19             : #include "AMDGPUSubtarget.h"
      20             : #include "SIDefines.h"
      21             : #include "SIInstrInfo.h"
      22             : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
      23             : #include "Utils/AMDGPUBaseInfo.h"
      24             : #include "llvm/ADT/BitmaskEnum.h"
      25             : #include "llvm/ADT/None.h"
      26             : #include "llvm/ADT/Optional.h"
      27             : #include "llvm/CodeGen/MachineBasicBlock.h"
      28             : #include "llvm/CodeGen/MachineFunction.h"
      29             : #include "llvm/CodeGen/MachineFunctionPass.h"
      30             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      31             : #include "llvm/CodeGen/MachineMemOperand.h"
      32             : #include "llvm/CodeGen/MachineModuleInfo.h"
      33             : #include "llvm/CodeGen/MachineOperand.h"
      34             : #include "llvm/IR/DebugLoc.h"
      35             : #include "llvm/IR/DiagnosticInfo.h"
      36             : #include "llvm/IR/Function.h"
      37             : #include "llvm/IR/LLVMContext.h"
      38             : #include "llvm/MC/MCInstrDesc.h"
      39             : #include "llvm/Pass.h"
      40             : #include "llvm/Support/AtomicOrdering.h"
      41             : #include "llvm/Support/MathExtras.h"
      42             : #include <cassert>
      43             : #include <list>
      44             : 
      45             : using namespace llvm;
      46             : using namespace llvm::AMDGPU;
      47             : 
      48             : #define DEBUG_TYPE "si-memory-legalizer"
      49             : #define PASS_NAME "SI Memory Legalizer"
      50             : 
      51             : namespace {
      52             : 
      53             : LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
      54             : 
      55             : /// Memory operation flags. Can be ORed together.
      56             : enum class SIMemOp {
      57             :   NONE = 0u,
      58             :   LOAD = 1u << 0,
      59             :   STORE = 1u << 1,
      60             :   LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ STORE)
      61             : };
      62             : 
      63             : /// Position to insert a new instruction relative to an existing
      64             : /// instruction.
      65             : enum class Position {
      66             :   BEFORE,
      67             :   AFTER
      68             : };
      69             : 
      70             : /// The atomic synchronization scopes supported by the AMDGPU target.
      71             : enum class SIAtomicScope {
      72             :   NONE,
      73             :   SINGLETHREAD,
      74             :   WAVEFRONT,
      75             :   WORKGROUP,
      76             :   AGENT,
      77             :   SYSTEM
      78             : };
      79             : 
      80             : /// The distinct address spaces supported by the AMDGPU target for
      81             : /// atomic memory operation. Can be ORed toether.
      82             : enum class SIAtomicAddrSpace {
      83             :   NONE = 0u,
      84             :   GLOBAL = 1u << 0,
      85             :   LDS = 1u << 1,
      86             :   SCRATCH = 1u << 2,
      87             :   GDS = 1u << 3,
      88             :   OTHER = 1u << 4,
      89             : 
      90             :   /// The address spaces that can be accessed by a FLAT instruction.
      91             :   FLAT = GLOBAL | LDS | SCRATCH,
      92             : 
      93             :   /// The address spaces that support atomic instructions.
      94             :   ATOMIC = GLOBAL | LDS | SCRATCH | GDS,
      95             : 
      96             :   /// All address spaces.
      97             :   ALL = GLOBAL | LDS | SCRATCH | GDS | OTHER,
      98             : 
      99             :   LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ ALL)
     100             : };
     101             : 
     102             : /// Sets named bit \p BitName to "true" if present in instruction \p MI.
     103             : /// \returns Returns true if \p MI is modified, false otherwise.
     104             : template <uint16_t BitName>
     105         173 : bool enableNamedBit(const MachineBasicBlock::iterator &MI) {
     106         346 :   int BitIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), BitName);
     107         173 :   if (BitIdx == -1)
     108             :     return false;
     109             : 
     110         173 :   MachineOperand &Bit = MI->getOperand(BitIdx);
     111         173 :   if (Bit.getImm() != 0)
     112             :     return false;
     113             : 
     114             :   Bit.setImm(1);
     115         159 :   return true;
     116             : }
     117             : 
     118             : class SIMemOpInfo final {
     119             : private:
     120             : 
     121             :   friend class SIMemOpAccess;
     122             : 
     123             :   AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
     124             :   AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic;
     125             :   SIAtomicScope Scope = SIAtomicScope::SYSTEM;
     126             :   SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
     127             :   SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
     128             :   bool IsCrossAddressSpaceOrdering = false;
     129             :   bool IsNonTemporal = false;
     130             : 
     131             :   SIMemOpInfo(AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent,
     132             :               SIAtomicScope Scope = SIAtomicScope::SYSTEM,
     133             :               SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::ATOMIC,
     134             :               SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::ALL,
     135             :               bool IsCrossAddressSpaceOrdering = true,
     136             :               AtomicOrdering FailureOrdering =
     137             :                 AtomicOrdering::SequentiallyConsistent,
     138             :               bool IsNonTemporal = false)
     139             :     : Ordering(Ordering), FailureOrdering(FailureOrdering),
     140             :       Scope(Scope), OrderingAddrSpace(OrderingAddrSpace),
     141             :       InstrAddrSpace(InstrAddrSpace),
     142             :       IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
     143       45817 :       IsNonTemporal(IsNonTemporal) {
     144             :     // There is also no cross address space ordering if the ordering
     145             :     // address space is the same as the instruction address space and
     146             :     // only contains a single address space.
     147             :     if ((OrderingAddrSpace == InstrAddrSpace) &&
     148             :         isPowerOf2_32(uint32_t(InstrAddrSpace)))
     149             :       IsCrossAddressSpaceOrdering = false;
     150             :   }
     151             : 
     152             : public:
     153             :   /// \returns Atomic synchronization scope of the machine instruction used to
     154             :   /// create this SIMemOpInfo.
     155             :   SIAtomicScope getScope() const {
     156             :     return Scope;
     157             :   }
     158             : 
     159             :   /// \returns Ordering constraint of the machine instruction used to
     160             :   /// create this SIMemOpInfo.
     161             :   AtomicOrdering getOrdering() const {
     162             :     return Ordering;
     163             :   }
     164             : 
     165             :   /// \returns Failure ordering constraint of the machine instruction used to
     166             :   /// create this SIMemOpInfo.
     167             :   AtomicOrdering getFailureOrdering() const {
     168             :     return FailureOrdering;
     169             :   }
     170             : 
     171             :   /// \returns The address spaces be accessed by the machine
     172             :   /// instruction used to create this SiMemOpInfo.
     173             :   SIAtomicAddrSpace getInstrAddrSpace() const {
     174             :     return InstrAddrSpace;
     175             :   }
     176             : 
     177             :   /// \returns The address spaces that must be ordered by the machine
     178             :   /// instruction used to create this SiMemOpInfo.
     179             :   SIAtomicAddrSpace getOrderingAddrSpace() const {
     180             :     return OrderingAddrSpace;
     181             :   }
     182             : 
     183             :   /// \returns Return true iff memory ordering of operations on
     184             :   /// different address spaces is required.
     185             :   bool getIsCrossAddressSpaceOrdering() const {
     186             :     return IsCrossAddressSpaceOrdering;
     187             :   }
     188             : 
     189             :   /// \returns True if memory access of the machine instruction used to
     190             :   /// create this SIMemOpInfo is non-temporal, false otherwise.
     191             :   bool isNonTemporal() const {
     192             :     return IsNonTemporal;
     193             :   }
     194             : 
     195             :   /// \returns True if ordering constraint of the machine instruction used to
     196             :   /// create this SIMemOpInfo is unordered or higher, false otherwise.
     197             :   bool isAtomic() const {
     198             :     return Ordering != AtomicOrdering::NotAtomic;
     199             :   }
     200             : 
     201             : };
     202             : 
     203             : class SIMemOpAccess final {
     204             : private:
     205             : 
     206             :   AMDGPUAS SIAddrSpaceInfo;
     207             :   AMDGPUMachineModuleInfo *MMI = nullptr;
     208             : 
     209             :   /// Reports unsupported message \p Msg for \p MI to LLVM context.
     210             :   void reportUnsupported(const MachineBasicBlock::iterator &MI,
     211             :                          const char *Msg) const;
     212             : 
     213             :   /// Inspects the target synchonization scope \p SSID and determines
     214             :   /// the SI atomic scope it corresponds to, the address spaces it
     215             :   /// covers, and whether the memory ordering applies between address
     216             :   /// spaces.
     217             :   Optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
     218             :   toSIAtomicScope(SyncScope::ID SSID, SIAtomicAddrSpace InstrScope) const;
     219             : 
     220             :   /// \return Return a bit set of the address spaces accessed by \p AS.
     221             :   SIAtomicAddrSpace toSIAtomicAddrSpace(unsigned AS) const;
     222             : 
     223             :   /// \returns Info constructed from \p MI, which has at least machine memory
     224             :   /// operand.
     225             :   Optional<SIMemOpInfo> constructFromMIWithMMO(
     226             :       const MachineBasicBlock::iterator &MI) const;
     227             : 
     228             : public:
     229             :   /// Construct class to support accessing the machine memory operands
     230             :   /// of instructions in the machine function \p MF.
     231             :   SIMemOpAccess(MachineFunction &MF);
     232             : 
     233             :   /// \returns Load info if \p MI is a load operation, "None" otherwise.
     234             :   Optional<SIMemOpInfo> getLoadInfo(
     235             :       const MachineBasicBlock::iterator &MI) const;
     236             : 
     237             :   /// \returns Store info if \p MI is a store operation, "None" otherwise.
     238             :   Optional<SIMemOpInfo> getStoreInfo(
     239             :       const MachineBasicBlock::iterator &MI) const;
     240             : 
     241             :   /// \returns Atomic fence info if \p MI is an atomic fence operation,
     242             :   /// "None" otherwise.
     243             :   Optional<SIMemOpInfo> getAtomicFenceInfo(
     244             :       const MachineBasicBlock::iterator &MI) const;
     245             : 
     246             :   /// \returns Atomic cmpxchg/rmw info if \p MI is an atomic cmpxchg or
     247             :   /// rmw operation, "None" otherwise.
     248             :   Optional<SIMemOpInfo> getAtomicCmpxchgOrRmwInfo(
     249             :       const MachineBasicBlock::iterator &MI) const;
     250             : };
     251             : 
     252             : class SICacheControl {
     253             : protected:
     254             : 
     255             :   /// Instruction info.
     256             :   const SIInstrInfo *TII = nullptr;
     257             : 
     258             :   IsaInfo::IsaVersion IV;
     259             : 
     260             :   SICacheControl(const SISubtarget &ST);
     261             : 
     262             : public:
     263             : 
     264             :   /// Create a cache control for the subtarget \p ST.
     265             :   static std::unique_ptr<SICacheControl> create(const SISubtarget &ST);
     266             : 
     267             :   /// Update \p MI memory load instruction to bypass any caches up to
     268             :   /// the \p Scope memory scope for address spaces \p
     269             :   /// AddrSpace. Return true iff the instruction was modified.
     270             :   virtual bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
     271             :                                      SIAtomicScope Scope,
     272             :                                      SIAtomicAddrSpace AddrSpace) const = 0;
     273             : 
     274             :   /// Update \p MI memory instruction to indicate it is
     275             :   /// nontemporal. Return true iff the instruction was modified.
     276             :   virtual bool enableNonTemporal(const MachineBasicBlock::iterator &MI)
     277             :     const = 0;
     278             : 
     279             :   /// Inserts any necessary instructions at position \p Pos relative
     280             :   /// to instruction \p MI to ensure any caches associated with
     281             :   /// address spaces \p AddrSpace for memory scopes up to memory scope
     282             :   /// \p Scope are invalidated. Returns true iff any instructions
     283             :   /// inserted.
     284             :   virtual bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
     285             :                                      SIAtomicScope Scope,
     286             :                                      SIAtomicAddrSpace AddrSpace,
     287             :                                      Position Pos) const = 0;
     288             : 
     289             :   /// Inserts any necessary instructions at position \p Pos relative
     290             :   /// to instruction \p MI to ensure memory instructions of kind \p Op
     291             :   /// associated with address spaces \p AddrSpace have completed as
     292             :   /// observed by other memory instructions executing in memory scope
     293             :   /// \p Scope. \p IsCrossAddrSpaceOrdering indicates if the memory
     294             :   /// ordering is between address spaces. Returns true iff any
     295             :   /// instructions inserted.
     296             :   virtual bool insertWait(MachineBasicBlock::iterator &MI,
     297             :                           SIAtomicScope Scope,
     298             :                           SIAtomicAddrSpace AddrSpace,
     299             :                           SIMemOp Op,
     300             :                           bool IsCrossAddrSpaceOrdering,
     301             :                           Position Pos) const = 0;
     302             : 
     303             :   /// Virtual destructor to allow derivations to be deleted.
     304             :   virtual ~SICacheControl() = default;
     305             : 
     306             : };
     307             : 
     308        5877 : class SIGfx6CacheControl : public SICacheControl {
     309             : protected:
     310             : 
     311             :   /// Sets GLC bit to "true" if present in \p MI. Returns true if \p MI
     312             :   /// is modified, false otherwise.
     313             :   bool enableGLCBit(const MachineBasicBlock::iterator &MI) const {
     314         128 :     return enableNamedBit<AMDGPU::OpName::glc>(MI);
     315             :   }
     316             : 
     317             :   /// Sets SLC bit to "true" if present in \p MI. Returns true if \p MI
     318             :   /// is modified, false otherwise.
     319             :   bool enableSLCBit(const MachineBasicBlock::iterator &MI) const {
     320          45 :     return enableNamedBit<AMDGPU::OpName::slc>(MI);
     321             :   }
     322             : 
     323             : public:
     324             : 
     325       23852 :   SIGfx6CacheControl(const SISubtarget &ST) : SICacheControl(ST) {};
     326             : 
     327             :   bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
     328             :                              SIAtomicScope Scope,
     329             :                              SIAtomicAddrSpace AddrSpace) const override;
     330             : 
     331             :   bool enableNonTemporal(const MachineBasicBlock::iterator &MI) const override;
     332             : 
     333             :   bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
     334             :                              SIAtomicScope Scope,
     335             :                              SIAtomicAddrSpace AddrSpace,
     336             :                              Position Pos) const override;
     337             : 
     338             :   bool insertWait(MachineBasicBlock::iterator &MI,
     339             :                   SIAtomicScope Scope,
     340             :                   SIAtomicAddrSpace AddrSpace,
     341             :                   SIMemOp Op,
     342             :                   bool IsCrossAddrSpaceOrdering,
     343             :                   Position Pos) const override;
     344             : };
     345             : 
     346       12096 : class SIGfx7CacheControl : public SIGfx6CacheControl {
     347             : public:
     348             : 
     349       12096 :   SIGfx7CacheControl(const SISubtarget &ST) : SIGfx6CacheControl(ST) {};
     350             : 
     351             :   bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
     352             :                              SIAtomicScope Scope,
     353             :                              SIAtomicAddrSpace AddrSpace,
     354             :                              Position Pos) const override;
     355             : 
     356             : };
     357             : 
     358        5382 : class SIMemoryLegalizer final : public MachineFunctionPass {
     359             : private:
     360             : 
     361             :   /// Cache Control.
     362             :   std::unique_ptr<SICacheControl> CC = nullptr;
     363             : 
     364             :   /// List of atomic pseudo instructions.
     365             :   std::list<MachineBasicBlock::iterator> AtomicPseudoMIs;
     366             : 
     367             :   /// Return true iff instruction \p MI is a atomic instruction that
     368             :   /// returns a result.
     369             :   bool isAtomicRet(const MachineInstr &MI) const {
     370        2204 :     return AMDGPU::getAtomicNoRetOp(MI.getOpcode()) != -1;
     371             :   }
     372             : 
     373             :   /// Removes all processed atomic pseudo instructions from the current
     374             :   /// function. Returns true if current function is modified, false otherwise.
     375             :   bool removeAtomicPseudoMIs();
     376             : 
     377             :   /// Expands load operation \p MI. Returns true if instructions are
     378             :   /// added/deleted or \p MI is modified, false otherwise.
     379             :   bool expandLoad(const SIMemOpInfo &MOI,
     380             :                   MachineBasicBlock::iterator &MI);
     381             :   /// Expands store operation \p MI. Returns true if instructions are
     382             :   /// added/deleted or \p MI is modified, false otherwise.
     383             :   bool expandStore(const SIMemOpInfo &MOI,
     384             :                    MachineBasicBlock::iterator &MI);
     385             :   /// Expands atomic fence operation \p MI. Returns true if
     386             :   /// instructions are added/deleted or \p MI is modified, false otherwise.
     387             :   bool expandAtomicFence(const SIMemOpInfo &MOI,
     388             :                          MachineBasicBlock::iterator &MI);
     389             :   /// Expands atomic cmpxchg or rmw operation \p MI. Returns true if
     390             :   /// instructions are added/deleted or \p MI is modified, false otherwise.
     391             :   bool expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
     392             :                                 MachineBasicBlock::iterator &MI);
     393             : 
     394             : public:
     395             :   static char ID;
     396             : 
     397        1802 :   SIMemoryLegalizer() : MachineFunctionPass(ID) {}
     398             : 
     399        1790 :   void getAnalysisUsage(AnalysisUsage &AU) const override {
     400        1790 :     AU.setPreservesCFG();
     401        1790 :     MachineFunctionPass::getAnalysisUsage(AU);
     402        1790 :   }
     403             : 
     404        1790 :   StringRef getPassName() const override {
     405        1790 :     return PASS_NAME;
     406             :   }
     407             : 
     408             :   bool runOnMachineFunction(MachineFunction &MF) override;
     409             : };
     410             : 
     411             : } // end namespace anonymous
     412             : 
     413          14 : void SIMemOpAccess::reportUnsupported(const MachineBasicBlock::iterator &MI,
     414             :                                       const char *Msg) const {
     415          14 :   const Function &Func = MI->getParent()->getParent()->getFunction();
     416          28 :   DiagnosticInfoUnsupported Diag(Func, Msg, MI->getDebugLoc());
     417          14 :   Func.getContext().diagnose(Diag);
     418          14 : }
     419             : 
     420             : Optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
     421        1473 : SIMemOpAccess::toSIAtomicScope(SyncScope::ID SSID,
     422             :                                SIAtomicAddrSpace InstrScope) const {
     423             :   /// TODO: For now assume OpenCL memory model which treats each
     424             :   /// address space as having a separate happens-before relation, and
     425             :   /// so an instruction only has ordering with respect to the address
     426             :   /// space it accesses, and if it accesses multiple address spaces it
     427             :   /// does not require ordering of operations in different address
     428             :   /// spaces.
     429        1473 :  if (SSID == SyncScope::System)
     430             :     return std::make_tuple(SIAtomicScope::SYSTEM,
     431             :                            SIAtomicAddrSpace::ATOMIC & InstrScope,
     432             :                            false);
     433         317 :   if (SSID == MMI->getAgentSSID())
     434             :     return std::make_tuple(SIAtomicScope::AGENT,
     435             :                            SIAtomicAddrSpace::ATOMIC & InstrScope,
     436             :                            false);
     437         240 :   if (SSID == MMI->getWorkgroupSSID())
     438             :     return std::make_tuple(SIAtomicScope::WORKGROUP,
     439             :                            SIAtomicAddrSpace::ATOMIC & InstrScope,
     440             :                            false);
     441         152 :   if (SSID == MMI->getWavefrontSSID())
     442             :     return std::make_tuple(SIAtomicScope::WAVEFRONT,
     443             :                            SIAtomicAddrSpace::ATOMIC & InstrScope,
     444             :                            false);
     445          76 :   if (SSID == SyncScope::SingleThread)
     446             :     return std::make_tuple(SIAtomicScope::SINGLETHREAD,
     447             :                            SIAtomicAddrSpace::ATOMIC & InstrScope,
     448             :                            false);
     449             :   /// TODO: To support HSA Memory Model need to add additional memory
     450             :   /// scopes that specify that do require cross address space
     451             :   /// ordering.
     452             :   return None;
     453             : }
     454             : 
     455             : SIAtomicAddrSpace SIMemOpAccess::toSIAtomicAddrSpace(unsigned AS) const {
     456       45801 :   if (AS == SIAddrSpaceInfo.FLAT_ADDRESS)
     457             :     return SIAtomicAddrSpace::FLAT;
     458       44148 :   if (AS == SIAddrSpaceInfo.GLOBAL_ADDRESS)
     459             :     return SIAtomicAddrSpace::GLOBAL;
     460       13471 :   if (AS == SIAddrSpaceInfo.LOCAL_ADDRESS)
     461             :     return SIAtomicAddrSpace::LDS;
     462       13471 :   if (AS == SIAddrSpaceInfo.PRIVATE_ADDRESS)
     463             :     return SIAtomicAddrSpace::SCRATCH;
     464         755 :   if (AS == SIAddrSpaceInfo.REGION_ADDRESS)
     465             :     return SIAtomicAddrSpace::GDS;
     466             : 
     467             :   return SIAtomicAddrSpace::OTHER;
     468             : }
     469             : 
     470       17974 : SIMemOpAccess::SIMemOpAccess(MachineFunction &MF) {
     471       17974 :   SIAddrSpaceInfo = getAMDGPUAS(MF.getTarget());
     472       17974 :   MMI = &MF.getMMI().getObjFileInfo<AMDGPUMachineModuleInfo>();
     473       17974 : }
     474             : 
     475       45756 : Optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
     476             :     const MachineBasicBlock::iterator &MI) const {
     477             :   assert(MI->getNumMemOperands() > 0);
     478             : 
     479             :   SyncScope::ID SSID = SyncScope::SingleThread;
     480             :   AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
     481             :   AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic;
     482             :   SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
     483             :   bool IsNonTemporal = true;
     484             : 
     485             :   // Validator should check whether or not MMOs cover the entire set of
     486             :   // locations accessed by the memory instruction.
     487      137342 :   for (const auto &MMO : MI->memoperands()) {
     488       45801 :     IsNonTemporal &= MMO->isNonTemporal();
     489             :     InstrAddrSpace |=
     490       45801 :       toSIAtomicAddrSpace(MMO->getPointerInfo().getAddrSpace());
     491       45801 :     AtomicOrdering OpOrdering = MMO->getOrdering();
     492       45801 :     if (OpOrdering != AtomicOrdering::NotAtomic) {
     493             :       const auto &IsSyncScopeInclusion =
     494        2814 :           MMI->isSyncScopeInclusion(SSID, MMO->getSyncScopeID());
     495        1407 :       if (!IsSyncScopeInclusion) {
     496           8 :         reportUnsupported(MI,
     497             :           "Unsupported non-inclusive atomic synchronization scope");
     498             :         return None;
     499             :       }
     500             : 
     501        1399 :       SSID = IsSyncScopeInclusion.getValue() ? SSID : MMO->getSyncScopeID();
     502             :       Ordering =
     503        1399 :           isStrongerThan(Ordering, OpOrdering) ?
     504        1399 :               Ordering : MMO->getOrdering();
     505             :       assert(MMO->getFailureOrdering() != AtomicOrdering::Release &&
     506             :              MMO->getFailureOrdering() != AtomicOrdering::AcquireRelease);
     507             :       FailureOrdering =
     508        2798 :           isStrongerThan(FailureOrdering, MMO->getFailureOrdering()) ?
     509             :               FailureOrdering : MMO->getFailureOrdering();
     510             :     }
     511             :   }
     512             : 
     513       45748 :   SIAtomicScope Scope = SIAtomicScope::NONE;
     514       45748 :   SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
     515       45748 :   bool IsCrossAddressSpaceOrdering = false;
     516       45748 :   if (Ordering != AtomicOrdering::NotAtomic) {
     517        1398 :     auto ScopeOrNone = toSIAtomicScope(SSID, InstrAddrSpace);
     518        1398 :     if (!ScopeOrNone) {
     519           0 :       reportUnsupported(MI, "Unsupported atomic synchronization scope");
     520             :       return None;
     521             :     }
     522             :     std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
     523             :       ScopeOrNone.getValue();
     524        2792 :     if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
     525             :         ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace)) {
     526           4 :       reportUnsupported(MI, "Unsupported atomic address space");
     527             :       return None;
     528             :     }
     529             :   }
     530       45744 :   return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, InstrAddrSpace,
     531             :                      IsCrossAddressSpaceOrdering, FailureOrdering, IsNonTemporal);
     532             : }
     533             : 
     534       45857 : Optional<SIMemOpInfo> SIMemOpAccess::getLoadInfo(
     535             :     const MachineBasicBlock::iterator &MI) const {
     536             :   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
     537             : 
     538       64169 :   if (!(MI->mayLoad() && !MI->mayStore()))
     539             :     return None;
     540             : 
     541             :   // Be conservative if there are no memory operands.
     542       16974 :   if (MI->getNumMemOperands() == 0)
     543             :     return SIMemOpInfo();
     544             : 
     545       16960 :   return constructFromMIWithMMO(MI);
     546             : }
     547             : 
     548       28886 : Optional<SIMemOpInfo> SIMemOpAccess::getStoreInfo(
     549             :     const MachineBasicBlock::iterator &MI) const {
     550             :   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
     551             : 
     552       56431 :   if (!(!MI->mayLoad() && MI->mayStore()))
     553             :     return None;
     554             : 
     555             :   // Be conservative if there are no memory operands.
     556       27470 :   if (MI->getNumMemOperands() == 0)
     557             :     return SIMemOpInfo();
     558             : 
     559       27458 :   return constructFromMIWithMMO(MI);
     560             : }
     561             : 
     562        1419 : Optional<SIMemOpInfo> SIMemOpAccess::getAtomicFenceInfo(
     563             :     const MachineBasicBlock::iterator &MI) const {
     564             :   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
     565             : 
     566        2838 :   if (MI->getOpcode() != AMDGPU::ATOMIC_FENCE)
     567             :     return None;
     568             : 
     569             :   AtomicOrdering Ordering =
     570          75 :     static_cast<AtomicOrdering>(MI->getOperand(0).getImm());
     571             : 
     572          75 :   SyncScope::ID SSID = static_cast<SyncScope::ID>(MI->getOperand(1).getImm());
     573          75 :   auto ScopeOrNone = toSIAtomicScope(SSID, SIAtomicAddrSpace::ATOMIC);
     574          75 :   if (!ScopeOrNone) {
     575           2 :     reportUnsupported(MI, "Unsupported atomic synchronization scope");
     576             :     return None;
     577             :   }
     578             : 
     579             :   SIAtomicScope Scope = SIAtomicScope::NONE;
     580             :   SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
     581             :   bool IsCrossAddressSpaceOrdering = false;
     582             :   std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
     583             :     ScopeOrNone.getValue();
     584             : 
     585         146 :   if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
     586             :       ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace)) {
     587           0 :     reportUnsupported(MI, "Unsupported atomic address space");
     588             :     return None;
     589             :   }
     590             : 
     591             :   return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, SIAtomicAddrSpace::ATOMIC,
     592             :                      IsCrossAddressSpaceOrdering);
     593             : }
     594             : 
     595        1346 : Optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
     596             :     const MachineBasicBlock::iterator &MI) const {
     597             :   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
     598             : 
     599        2687 :   if (!(MI->mayLoad() && MI->mayStore()))
     600             :     return None;
     601             : 
     602             :   // Be conservative if there are no memory operands.
     603        1338 :   if (MI->getNumMemOperands() == 0)
     604             :     return SIMemOpInfo();
     605             : 
     606        1338 :   return constructFromMIWithMMO(MI);
     607             : }
     608             : 
     609       17974 : SICacheControl::SICacheControl(const SISubtarget &ST) {
     610       17974 :   TII = ST.getInstrInfo();
     611       17974 :   IV = IsaInfo::getIsaVersion(ST.getFeatureBits());
     612             : }
     613             : 
     614             : /* static */
     615       17974 : std::unique_ptr<SICacheControl> SICacheControl::create(const SISubtarget &ST) {
     616       17974 :   AMDGPUSubtarget::Generation Generation = ST.getGeneration();
     617       17974 :   if (Generation <= AMDGPUSubtarget::SOUTHERN_ISLANDS)
     618       11756 :     return make_unique<SIGfx6CacheControl>(ST);
     619       24192 :   return make_unique<SIGfx7CacheControl>(ST);
     620             : }
     621             : 
     622           0 : bool SIGfx6CacheControl::enableLoadCacheBypass(
     623             :     const MachineBasicBlock::iterator &MI,
     624             :     SIAtomicScope Scope,
     625             :     SIAtomicAddrSpace AddrSpace) const {
     626             :   assert(MI->mayLoad() && !MI->mayStore());
     627             :   bool Changed = false;
     628             : 
     629         119 :   if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
     630             :     /// TODO: Do not set glc for rmw atomic operations as they
     631             :     /// implicitly bypass the L1 cache.
     632             : 
     633         119 :     switch (Scope) {
     634             :     case SIAtomicScope::SYSTEM:
     635             :     case SIAtomicScope::AGENT:
     636             :       Changed |= enableGLCBit(MI);
     637           0 :       break;
     638             :     case SIAtomicScope::WORKGROUP:
     639             :     case SIAtomicScope::WAVEFRONT:
     640             :     case SIAtomicScope::SINGLETHREAD:
     641             :       // No cache to bypass.
     642             :       break;
     643           0 :     default:
     644           0 :       llvm_unreachable("Unsupported synchronization scope");
     645             :     }
     646             :   }
     647             : 
     648             :   /// The scratch address space does not need the global memory caches
     649             :   /// to be bypassed as all memory operations by the same thread are
     650             :   /// sequentially consistent, and no other thread can access scratch
     651             :   /// memory.
     652             : 
     653             :   /// Other address spaces do not hava a cache.
     654             : 
     655           0 :   return Changed;
     656             : }
     657             : 
     658           0 : bool SIGfx6CacheControl::enableNonTemporal(
     659             :     const MachineBasicBlock::iterator &MI) const {
     660             :   assert(MI->mayLoad() ^ MI->mayStore());
     661             :   bool Changed = false;
     662             : 
     663             :   /// TODO: Do not enableGLCBit if rmw atomic.
     664             :   Changed |= enableGLCBit(MI);
     665          45 :   Changed |= enableSLCBit(MI);
     666             : 
     667           0 :   return Changed;
     668             : }
     669             : 
     670         121 : bool SIGfx6CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
     671             :                                                SIAtomicScope Scope,
     672             :                                                SIAtomicAddrSpace AddrSpace,
     673             :                                                Position Pos) const {
     674             :   bool Changed = false;
     675             : 
     676         121 :   MachineBasicBlock &MBB = *MI->getParent();
     677             :   DebugLoc DL = MI->getDebugLoc();
     678             : 
     679         121 :   if (Pos == Position::AFTER)
     680             :     ++MI;
     681             : 
     682         121 :   if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
     683         121 :     switch (Scope) {
     684         112 :     case SIAtomicScope::SYSTEM:
     685             :     case SIAtomicScope::AGENT:
     686         224 :       BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBINVL1));
     687             :       Changed = true;
     688         112 :       break;
     689             :     case SIAtomicScope::WORKGROUP:
     690             :     case SIAtomicScope::WAVEFRONT:
     691             :     case SIAtomicScope::SINGLETHREAD:
     692             :       // No cache to invalidate.
     693             :       break;
     694           0 :     default:
     695           0 :       llvm_unreachable("Unsupported synchronization scope");
     696             :     }
     697             :   }
     698             : 
     699             :   /// The scratch address space does not need the global memory cache
     700             :   /// to be flushed as all memory operations by the same thread are
     701             :   /// sequentially consistent, and no other thread can access scratch
     702             :   /// memory.
     703             : 
     704             :   /// Other address spaces do not hava a cache.
     705             : 
     706         121 :   if (Pos == Position::AFTER)
     707             :     --MI;
     708             : 
     709         121 :   return Changed;
     710             : }
     711             : 
     712        2544 : bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
     713             :                                     SIAtomicScope Scope,
     714             :                                     SIAtomicAddrSpace AddrSpace,
     715             :                                     SIMemOp Op,
     716             :                                     bool IsCrossAddrSpaceOrdering,
     717             :                                     Position Pos) const {
     718             :   bool Changed = false;
     719             : 
     720        2544 :   MachineBasicBlock &MBB = *MI->getParent();
     721             :   DebugLoc DL = MI->getDebugLoc();
     722             : 
     723        2544 :   if (Pos == Position::AFTER)
     724             :     ++MI;
     725             : 
     726             :   bool VMCnt = false;
     727             :   bool LGKMCnt = false;
     728             :   bool EXPCnt = false;
     729             : 
     730        2544 :   if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
     731        2544 :     switch (Scope) {
     732        2308 :     case SIAtomicScope::SYSTEM:
     733             :     case SIAtomicScope::AGENT:
     734             :       VMCnt = true;
     735        2308 :       break;
     736             :     case SIAtomicScope::WORKGROUP:
     737             :     case SIAtomicScope::WAVEFRONT:
     738             :     case SIAtomicScope::SINGLETHREAD:
     739             :       // The L1 cache keeps all memory operations in order for
     740             :       // wavesfronts in the same work-group.
     741             :       break;
     742           0 :     default:
     743           0 :       llvm_unreachable("Unsupported synchronization scope");
     744             :     }
     745             :   }
     746             : 
     747        2544 :   if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
     748        1382 :     switch (Scope) {
     749        1233 :     case SIAtomicScope::SYSTEM:
     750             :     case SIAtomicScope::AGENT:
     751             :     case SIAtomicScope::WORKGROUP:
     752             :       // If no cross address space ordering then an LDS waitcnt is not
     753             :       // needed as LDS operations for all waves are executed in a
     754             :       // total global ordering as observed by all waves. Required if
     755             :       // also synchronizing with global/GDS memory as LDS operations
     756             :       // could be reordered with respect to later global/GDS memory
     757             :       // operations of the same wave.
     758             :       LGKMCnt = IsCrossAddrSpaceOrdering;
     759        1233 :       break;
     760             :     case SIAtomicScope::WAVEFRONT:
     761             :     case SIAtomicScope::SINGLETHREAD:
     762             :       // The LDS keeps all memory operations in order for
     763             :       // the same wavesfront.
     764             :       break;
     765           0 :     default:
     766           0 :       llvm_unreachable("Unsupported synchronization scope");
     767             :     }
     768             :   }
     769             : 
     770        2544 :   if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
     771         113 :     switch (Scope) {
     772          65 :     case SIAtomicScope::SYSTEM:
     773             :     case SIAtomicScope::AGENT:
     774             :       // If no cross address space ordering then an GDS waitcnt is not
     775             :       // needed as GDS operations for all waves are executed in a
     776             :       // total global ordering as observed by all waves. Required if
     777             :       // also synchronizing with global/LDS memory as GDS operations
     778             :       // could be reordered with respect to later global/LDS memory
     779             :       // operations of the same wave.
     780             :       EXPCnt = IsCrossAddrSpaceOrdering;
     781          65 :       break;
     782             :     case SIAtomicScope::WORKGROUP:
     783             :     case SIAtomicScope::WAVEFRONT:
     784             :     case SIAtomicScope::SINGLETHREAD:
     785             :       // The GDS keeps all memory operations in order for
     786             :       // the same work-group.
     787             :       break;
     788           0 :     default:
     789           0 :       llvm_unreachable("Unsupported synchronization scope");
     790             :     }
     791             :   }
     792             : 
     793        2544 :   if (VMCnt || LGKMCnt || EXPCnt) {
     794             :     unsigned WaitCntImmediate =
     795        6844 :       AMDGPU::encodeWaitcnt(IV,
     796           0 :                             VMCnt ? 0 : getVmcntBitMask(IV),
     797        2268 :                             EXPCnt ? 0 : getExpcntBitMask(IV),
     798        4576 :                             LGKMCnt ? 0 : getLgkmcntBitMask(IV));
     799        6924 :     BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(WaitCntImmediate);
     800             :     Changed = true;
     801             :   }
     802             : 
     803        2544 :   if (Pos == Position::AFTER)
     804             :     --MI;
     805             : 
     806        2544 :   return Changed;
     807             : }
     808             : 
     809        1132 : bool SIGfx7CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
     810             :                                                SIAtomicScope Scope,
     811             :                                                SIAtomicAddrSpace AddrSpace,
     812             :                                                Position Pos) const {
     813             :   bool Changed = false;
     814             : 
     815        1132 :   MachineBasicBlock &MBB = *MI->getParent();
     816             :   DebugLoc DL = MI->getDebugLoc();
     817             : 
     818        1132 :   if (Pos == Position::AFTER)
     819             :     ++MI;
     820             : 
     821        1132 :   if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
     822        1132 :     switch (Scope) {
     823        1018 :     case SIAtomicScope::SYSTEM:
     824             :     case SIAtomicScope::AGENT:
     825        2036 :       BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBINVL1_VOL));
     826             :       Changed = true;
     827        1018 :       break;
     828             :     case SIAtomicScope::WORKGROUP:
     829             :     case SIAtomicScope::WAVEFRONT:
     830             :     case SIAtomicScope::SINGLETHREAD:
     831             :       // No cache to invalidate.
     832             :       break;
     833           0 :     default:
     834           0 :       llvm_unreachable("Unsupported synchronization scope");
     835             :     }
     836             :   }
     837             : 
     838             :   /// The scratch address space does not need the global memory cache
     839             :   /// to be flushed as all memory operations by the same thread are
     840             :   /// sequentially consistent, and no other thread can access scratch
     841             :   /// memory.
     842             : 
     843             :   /// Other address spaces do not hava a cache.
     844             : 
     845        1132 :   if (Pos == Position::AFTER)
     846             :     --MI;
     847             : 
     848        1132 :   return Changed;
     849             : }
     850             : 
     851       17974 : bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
     852       17974 :   if (AtomicPseudoMIs.empty())
     853             :     return false;
     854             : 
     855         140 :   for (auto &MI : AtomicPseudoMIs)
     856          73 :     MI->eraseFromParent();
     857             : 
     858             :   AtomicPseudoMIs.clear();
     859          67 :   return true;
     860             : }
     861             : 
     862       16971 : bool SIMemoryLegalizer::expandLoad(const SIMemOpInfo &MOI,
     863             :                                    MachineBasicBlock::iterator &MI) {
     864             :   assert(MI->mayLoad() && !MI->mayStore());
     865             : 
     866             :   bool Changed = false;
     867             : 
     868       16971 :   if (MOI.isAtomic()) {
     869         119 :     if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
     870         238 :         MOI.getOrdering() == AtomicOrdering::Acquire ||
     871             :         MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
     872         119 :       Changed |= CC->enableLoadCacheBypass(MI, MOI.getScope(),
     873             :                                            MOI.getOrderingAddrSpace());
     874             :     }
     875             : 
     876         139 :     if (MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
     877          79 :       Changed |= CC->insertWait(MI, MOI.getScope(),
     878             :                                 MOI.getOrderingAddrSpace(),
     879             :                                 SIMemOp::LOAD | SIMemOp::STORE,
     880          79 :                                 MOI.getIsCrossAddressSpaceOrdering(),
     881             :                                 Position::BEFORE);
     882             : 
     883         139 :     if (MOI.getOrdering() == AtomicOrdering::Acquire ||
     884             :         MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
     885          99 :       Changed |= CC->insertWait(MI, MOI.getScope(),
     886             :                                 MOI.getInstrAddrSpace(),
     887             :                                 SIMemOp::LOAD,
     888          99 :                                 MOI.getIsCrossAddressSpaceOrdering(),
     889             :                                 Position::AFTER);
     890          99 :       Changed |= CC->insertCacheInvalidate(MI, MOI.getScope(),
     891             :                                            MOI.getOrderingAddrSpace(),
     892          99 :                                            Position::AFTER);
     893             :     }
     894             : 
     895             :     return Changed;
     896             :   }
     897             : 
     898             :   // Atomic instructions do not have the nontemporal attribute.
     899       16832 :   if (MOI.isNonTemporal()) {
     900             :     Changed |= CC->enableNonTemporal(MI);
     901          21 :     return Changed;
     902             :   }
     903             : 
     904             :   return Changed;
     905             : }
     906             : 
     907       27467 : bool SIMemoryLegalizer::expandStore(const SIMemOpInfo &MOI,
     908             :                                     MachineBasicBlock::iterator &MI) {
     909             :   assert(!MI->mayLoad() && MI->mayStore());
     910             : 
     911             :   bool Changed = false;
     912             : 
     913       27467 :   if (MOI.isAtomic()) {
     914         139 :     if (MOI.getOrdering() == AtomicOrdering::Release ||
     915             :         MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
     916          99 :       Changed |= CC->insertWait(MI, MOI.getScope(),
     917             :                                 MOI.getOrderingAddrSpace(),
     918             :                                 SIMemOp::LOAD | SIMemOp::STORE,
     919          99 :                                 MOI.getIsCrossAddressSpaceOrdering(),
     920             :                                 Position::BEFORE);
     921             : 
     922             :     return Changed;
     923             :   }
     924             : 
     925             :   // Atomic instructions do not have the nontemporal attribute.
     926       27328 :   if (MOI.isNonTemporal()) {
     927             :     Changed |= CC->enableNonTemporal(MI);
     928          24 :     return Changed;
     929             :   }
     930             : 
     931             :   return Changed;
     932             : }
     933             : 
     934          73 : bool SIMemoryLegalizer::expandAtomicFence(const SIMemOpInfo &MOI,
     935             :                                           MachineBasicBlock::iterator &MI) {
     936             :   assert(MI->getOpcode() == AMDGPU::ATOMIC_FENCE);
     937             : 
     938          73 :   AtomicPseudoMIs.push_back(MI);
     939             :   bool Changed = false;
     940             : 
     941          73 :   if (MOI.isAtomic()) {
     942          51 :     if (MOI.getOrdering() == AtomicOrdering::Acquire ||
     943          30 :         MOI.getOrdering() == AtomicOrdering::Release ||
     944          88 :         MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
     945             :         MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
     946             :       /// TODO: This relies on a barrier always generating a waitcnt
     947             :       /// for LDS to ensure it is not reordered with the completion of
     948             :       /// the proceeding LDS operations. If barrier had a memory
     949             :       /// ordering and memory scope, then library does not need to
     950             :       /// generate a fence. Could add support in this file for
     951             :       /// barrier. SIInsertWaitcnt.cpp could then stop unconditionally
     952             :       /// adding waitcnt before a S_BARRIER.
     953          73 :       Changed |= CC->insertWait(MI, MOI.getScope(),
     954             :                                 MOI.getOrderingAddrSpace(),
     955             :                                 SIMemOp::LOAD | SIMemOp::STORE,
     956          73 :                                 MOI.getIsCrossAddressSpaceOrdering(),
     957             :                                 Position::BEFORE);
     958             : 
     959         124 :     if (MOI.getOrdering() == AtomicOrdering::Acquire ||
     960         109 :         MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
     961             :         MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
     962          52 :       Changed |= CC->insertCacheInvalidate(MI, MOI.getScope(),
     963             :                                            MOI.getOrderingAddrSpace(),
     964          52 :                                            Position::BEFORE);
     965             : 
     966             :     return Changed;
     967             :   }
     968             : 
     969             :   return Changed;
     970             : }
     971             : 
     972        1332 : bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
     973             :   MachineBasicBlock::iterator &MI) {
     974             :   assert(MI->mayLoad() && MI->mayStore());
     975             : 
     976             :   bool Changed = false;
     977             : 
     978        1332 :   if (MOI.isAtomic()) {
     979        1112 :     if (MOI.getOrdering() == AtomicOrdering::Release ||
     980        1082 :         MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
     981        1192 :         MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
     982          50 :         MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent)
     983        1092 :       Changed |= CC->insertWait(MI, MOI.getScope(),
     984             :                                 MOI.getOrderingAddrSpace(),
     985             :                                 SIMemOp::LOAD | SIMemOp::STORE,
     986        1092 :                                 MOI.getIsCrossAddressSpaceOrdering(),
     987             :                                 Position::BEFORE);
     988             : 
     989        2254 :     if (MOI.getOrdering() == AtomicOrdering::Acquire ||
     990        1082 :         MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
     991          50 :         MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
     992        1232 :         MOI.getFailureOrdering() == AtomicOrdering::Acquire ||
     993             :         MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) {
     994        2204 :       Changed |= CC->insertWait(MI, MOI.getScope(),
     995             :                                 MOI.getOrderingAddrSpace(),
     996             :                                 isAtomicRet(*MI) ? SIMemOp::LOAD :
     997             :                                                    SIMemOp::STORE,
     998        1102 :                                 MOI.getIsCrossAddressSpaceOrdering(),
     999             :                                 Position::AFTER);
    1000        1102 :       Changed |= CC->insertCacheInvalidate(MI, MOI.getScope(),
    1001             :                                            MOI.getOrderingAddrSpace(),
    1002        1102 :                                            Position::AFTER);
    1003             :     }
    1004             : 
    1005             :     return Changed;
    1006             :   }
    1007             : 
    1008             :   return Changed;
    1009             : }
    1010             : 
    1011       17974 : bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) {
    1012             :   bool Changed = false;
    1013             : 
    1014       17974 :   SIMemOpAccess MOA(MF);
    1015       35948 :   CC = SICacheControl::create(MF.getSubtarget<SISubtarget>());
    1016             : 
    1017       38079 :   for (auto &MBB : MF) {
    1018      333686 :     for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) {
    1019      293476 :       if (!(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic))
    1020      247619 :         continue;
    1021             : 
    1022       45857 :       if (const auto &MOI = MOA.getLoadInfo(MI))
    1023       16971 :         Changed |= expandLoad(MOI.getValue(), MI);
    1024       28886 :       else if (const auto &MOI = MOA.getStoreInfo(MI))
    1025       27467 :         Changed |= expandStore(MOI.getValue(), MI);
    1026        1419 :       else if (const auto &MOI = MOA.getAtomicFenceInfo(MI))
    1027          73 :         Changed |= expandAtomicFence(MOI.getValue(), MI);
    1028        1346 :       else if (const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(MI))
    1029        1332 :         Changed |= expandAtomicCmpxchgOrRmw(MOI.getValue(), MI);
    1030             :     }
    1031             :   }
    1032             : 
    1033       17974 :   Changed |= removeAtomicPseudoMIs();
    1034       17974 :   return Changed;
    1035             : }
    1036             : 
    1037      342570 : INITIALIZE_PASS(SIMemoryLegalizer, DEBUG_TYPE, PASS_NAME, false, false)
    1038             : 
    1039             : char SIMemoryLegalizer::ID = 0;
    1040             : char &llvm::SIMemoryLegalizerID = SIMemoryLegalizer::ID;
    1041             : 
    1042        1795 : FunctionPass *llvm::createSIMemoryLegalizerPass() {
    1043        3590 :   return new SIMemoryLegalizer();
    1044             : }

Generated by: LCOV version 1.13