LLVM  10.0.0svn
SIMemoryLegalizer.cpp
Go to the documentation of this file.
1 //===- SIMemoryLegalizer.cpp ----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Memory legalizer - implements memory model. More information can be
11 /// found here:
12 /// http://llvm.org/docs/AMDGPUUsage.html#memory-model
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPU.h"
18 #include "AMDGPUSubtarget.h"
19 #include "SIDefines.h"
20 #include "SIInstrInfo.h"
22 #include "Utils/AMDGPUBaseInfo.h"
23 #include "llvm/ADT/BitmaskEnum.h"
24 #include "llvm/ADT/None.h"
25 #include "llvm/ADT/Optional.h"
33 #include "llvm/IR/DebugLoc.h"
34 #include "llvm/IR/DiagnosticInfo.h"
35 #include "llvm/IR/Function.h"
36 #include "llvm/IR/LLVMContext.h"
37 #include "llvm/MC/MCInstrDesc.h"
38 #include "llvm/Pass.h"
41 #include <cassert>
42 #include <list>
43 
44 using namespace llvm;
45 using namespace llvm::AMDGPU;
46 
47 #define DEBUG_TYPE "si-memory-legalizer"
48 #define PASS_NAME "SI Memory Legalizer"
49 
50 namespace {
51 
53 
54 /// Memory operation flags. Can be ORed together.
55 enum class SIMemOp {
56  NONE = 0u,
57  LOAD = 1u << 0,
58  STORE = 1u << 1,
59  LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ STORE)
60 };
61 
62 /// Position to insert a new instruction relative to an existing
63 /// instruction.
64 enum class Position {
65  BEFORE,
66  AFTER
67 };
68 
69 /// The atomic synchronization scopes supported by the AMDGPU target.
70 enum class SIAtomicScope {
71  NONE,
72  SINGLETHREAD,
73  WAVEFRONT,
74  WORKGROUP,
75  AGENT,
76  SYSTEM
77 };
78 
79 /// The distinct address spaces supported by the AMDGPU target for
80 /// atomic memory operation. Can be ORed toether.
81 enum class SIAtomicAddrSpace {
82  NONE = 0u,
83  GLOBAL = 1u << 0,
84  LDS = 1u << 1,
85  SCRATCH = 1u << 2,
86  GDS = 1u << 3,
87  OTHER = 1u << 4,
88 
89  /// The address spaces that can be accessed by a FLAT instruction.
90  FLAT = GLOBAL | LDS | SCRATCH,
91 
92  /// The address spaces that support atomic instructions.
93  ATOMIC = GLOBAL | LDS | SCRATCH | GDS,
94 
95  /// All address spaces.
96  ALL = GLOBAL | LDS | SCRATCH | GDS | OTHER,
97 
98  LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ ALL)
99 };
100 
101 /// Sets named bit \p BitName to "true" if present in instruction \p MI.
102 /// \returns Returns true if \p MI is modified, false otherwise.
103 template <uint16_t BitName>
104 bool enableNamedBit(const MachineBasicBlock::iterator &MI) {
105  int BitIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), BitName);
106  if (BitIdx == -1)
107  return false;
108 
109  MachineOperand &Bit = MI->getOperand(BitIdx);
110  if (Bit.getImm() != 0)
111  return false;
112 
113  Bit.setImm(1);
114  return true;
115 }
116 
117 class SIMemOpInfo final {
118 private:
119 
120  friend class SIMemOpAccess;
121 
123  AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic;
124  SIAtomicScope Scope = SIAtomicScope::SYSTEM;
125  SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
127  bool IsCrossAddressSpaceOrdering = false;
128  bool IsNonTemporal = false;
129 
131  SIAtomicScope Scope = SIAtomicScope::SYSTEM,
132  SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::ATOMIC,
133  SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::ALL,
134  bool IsCrossAddressSpaceOrdering = true,
135  AtomicOrdering FailureOrdering =
137  bool IsNonTemporal = false)
138  : Ordering(Ordering), FailureOrdering(FailureOrdering),
139  Scope(Scope), OrderingAddrSpace(OrderingAddrSpace),
140  InstrAddrSpace(InstrAddrSpace),
141  IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
142  IsNonTemporal(IsNonTemporal) {
143  // There is also no cross address space ordering if the ordering
144  // address space is the same as the instruction address space and
145  // only contains a single address space.
146  if ((OrderingAddrSpace == InstrAddrSpace) &&
147  isPowerOf2_32(uint32_t(InstrAddrSpace)))
148  this->IsCrossAddressSpaceOrdering = false;
149  }
150 
151 public:
152  /// \returns Atomic synchronization scope of the machine instruction used to
153  /// create this SIMemOpInfo.
154  SIAtomicScope getScope() const {
155  return Scope;
156  }
157 
158  /// \returns Ordering constraint of the machine instruction used to
159  /// create this SIMemOpInfo.
160  AtomicOrdering getOrdering() const {
161  return Ordering;
162  }
163 
164  /// \returns Failure ordering constraint of the machine instruction used to
165  /// create this SIMemOpInfo.
166  AtomicOrdering getFailureOrdering() const {
167  return FailureOrdering;
168  }
169 
170  /// \returns The address spaces be accessed by the machine
171  /// instruction used to create this SiMemOpInfo.
172  SIAtomicAddrSpace getInstrAddrSpace() const {
173  return InstrAddrSpace;
174  }
175 
176  /// \returns The address spaces that must be ordered by the machine
177  /// instruction used to create this SiMemOpInfo.
178  SIAtomicAddrSpace getOrderingAddrSpace() const {
179  return OrderingAddrSpace;
180  }
181 
182  /// \returns Return true iff memory ordering of operations on
183  /// different address spaces is required.
184  bool getIsCrossAddressSpaceOrdering() const {
185  return IsCrossAddressSpaceOrdering;
186  }
187 
188  /// \returns True if memory access of the machine instruction used to
189  /// create this SIMemOpInfo is non-temporal, false otherwise.
190  bool isNonTemporal() const {
191  return IsNonTemporal;
192  }
193 
194  /// \returns True if ordering constraint of the machine instruction used to
195  /// create this SIMemOpInfo is unordered or higher, false otherwise.
196  bool isAtomic() const {
197  return Ordering != AtomicOrdering::NotAtomic;
198  }
199 
200 };
201 
202 class SIMemOpAccess final {
203 private:
204  AMDGPUMachineModuleInfo *MMI = nullptr;
205 
206  /// Reports unsupported message \p Msg for \p MI to LLVM context.
207  void reportUnsupported(const MachineBasicBlock::iterator &MI,
208  const char *Msg) const;
209 
210  /// Inspects the target synchonization scope \p SSID and determines
211  /// the SI atomic scope it corresponds to, the address spaces it
212  /// covers, and whether the memory ordering applies between address
213  /// spaces.
215  toSIAtomicScope(SyncScope::ID SSID, SIAtomicAddrSpace InstrScope) const;
216 
217  /// \return Return a bit set of the address spaces accessed by \p AS.
218  SIAtomicAddrSpace toSIAtomicAddrSpace(unsigned AS) const;
219 
220  /// \returns Info constructed from \p MI, which has at least machine memory
221  /// operand.
222  Optional<SIMemOpInfo> constructFromMIWithMMO(
223  const MachineBasicBlock::iterator &MI) const;
224 
225 public:
226  /// Construct class to support accessing the machine memory operands
227  /// of instructions in the machine function \p MF.
228  SIMemOpAccess(MachineFunction &MF);
229 
230  /// \returns Load info if \p MI is a load operation, "None" otherwise.
232  const MachineBasicBlock::iterator &MI) const;
233 
234  /// \returns Store info if \p MI is a store operation, "None" otherwise.
235  Optional<SIMemOpInfo> getStoreInfo(
236  const MachineBasicBlock::iterator &MI) const;
237 
238  /// \returns Atomic fence info if \p MI is an atomic fence operation,
239  /// "None" otherwise.
240  Optional<SIMemOpInfo> getAtomicFenceInfo(
241  const MachineBasicBlock::iterator &MI) const;
242 
243  /// \returns Atomic cmpxchg/rmw info if \p MI is an atomic cmpxchg or
244  /// rmw operation, "None" otherwise.
245  Optional<SIMemOpInfo> getAtomicCmpxchgOrRmwInfo(
246  const MachineBasicBlock::iterator &MI) const;
247 };
248 
249 class SICacheControl {
250 protected:
251 
252  /// Instruction info.
253  const SIInstrInfo *TII = nullptr;
254 
255  IsaVersion IV;
256 
257  SICacheControl(const GCNSubtarget &ST);
258 
259 public:
260 
261  /// Create a cache control for the subtarget \p ST.
262  static std::unique_ptr<SICacheControl> create(const GCNSubtarget &ST);
263 
264  /// Update \p MI memory load instruction to bypass any caches up to
265  /// the \p Scope memory scope for address spaces \p
266  /// AddrSpace. Return true iff the instruction was modified.
267  virtual bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
268  SIAtomicScope Scope,
269  SIAtomicAddrSpace AddrSpace) const = 0;
270 
271  /// Update \p MI memory instruction to indicate it is
272  /// nontemporal. Return true iff the instruction was modified.
273  virtual bool enableNonTemporal(const MachineBasicBlock::iterator &MI)
274  const = 0;
275 
276  /// Inserts any necessary instructions at position \p Pos relative
277  /// to instruction \p MI to ensure any caches associated with
278  /// address spaces \p AddrSpace for memory scopes up to memory scope
279  /// \p Scope are invalidated. Returns true iff any instructions
280  /// inserted.
281  virtual bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
282  SIAtomicScope Scope,
283  SIAtomicAddrSpace AddrSpace,
284  Position Pos) const = 0;
285 
286  /// Inserts any necessary instructions at position \p Pos relative
287  /// to instruction \p MI to ensure memory instructions of kind \p Op
288  /// associated with address spaces \p AddrSpace have completed as
289  /// observed by other memory instructions executing in memory scope
290  /// \p Scope. \p IsCrossAddrSpaceOrdering indicates if the memory
291  /// ordering is between address spaces. Returns true iff any
292  /// instructions inserted.
293  virtual bool insertWait(MachineBasicBlock::iterator &MI,
294  SIAtomicScope Scope,
295  SIAtomicAddrSpace AddrSpace,
296  SIMemOp Op,
297  bool IsCrossAddrSpaceOrdering,
298  Position Pos) const = 0;
299 
300  /// Virtual destructor to allow derivations to be deleted.
301  virtual ~SICacheControl() = default;
302 
303 };
304 
305 class SIGfx6CacheControl : public SICacheControl {
306 protected:
307 
308  /// Sets GLC bit to "true" if present in \p MI. Returns true if \p MI
309  /// is modified, false otherwise.
310  bool enableGLCBit(const MachineBasicBlock::iterator &MI) const {
311  return enableNamedBit<AMDGPU::OpName::glc>(MI);
312  }
313 
314  /// Sets SLC bit to "true" if present in \p MI. Returns true if \p MI
315  /// is modified, false otherwise.
316  bool enableSLCBit(const MachineBasicBlock::iterator &MI) const {
317  return enableNamedBit<AMDGPU::OpName::slc>(MI);
318  }
319 
320 public:
321 
322  SIGfx6CacheControl(const GCNSubtarget &ST) : SICacheControl(ST) {};
323 
324  bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
325  SIAtomicScope Scope,
326  SIAtomicAddrSpace AddrSpace) const override;
327 
328  bool enableNonTemporal(const MachineBasicBlock::iterator &MI) const override;
329 
330  bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
331  SIAtomicScope Scope,
332  SIAtomicAddrSpace AddrSpace,
333  Position Pos) const override;
334 
335  bool insertWait(MachineBasicBlock::iterator &MI,
336  SIAtomicScope Scope,
337  SIAtomicAddrSpace AddrSpace,
338  SIMemOp Op,
339  bool IsCrossAddrSpaceOrdering,
340  Position Pos) const override;
341 };
342 
343 class SIGfx7CacheControl : public SIGfx6CacheControl {
344 public:
345 
346  SIGfx7CacheControl(const GCNSubtarget &ST) : SIGfx6CacheControl(ST) {};
347 
348  bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
349  SIAtomicScope Scope,
350  SIAtomicAddrSpace AddrSpace,
351  Position Pos) const override;
352 
353 };
354 
355 class SIGfx10CacheControl : public SIGfx7CacheControl {
356 protected:
357  bool CuMode = false;
358 
359  /// Sets DLC bit to "true" if present in \p MI. Returns true if \p MI
360  /// is modified, false otherwise.
361  bool enableDLCBit(const MachineBasicBlock::iterator &MI) const {
362  return enableNamedBit<AMDGPU::OpName::dlc>(MI);
363  }
364 
365 public:
366 
367  SIGfx10CacheControl(const GCNSubtarget &ST, bool CuMode) :
368  SIGfx7CacheControl(ST), CuMode(CuMode) {};
369 
370  bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
371  SIAtomicScope Scope,
372  SIAtomicAddrSpace AddrSpace) const override;
373 
374  bool enableNonTemporal(const MachineBasicBlock::iterator &MI) const override;
375 
376  bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
377  SIAtomicScope Scope,
378  SIAtomicAddrSpace AddrSpace,
379  Position Pos) const override;
380 
381  bool insertWait(MachineBasicBlock::iterator &MI,
382  SIAtomicScope Scope,
383  SIAtomicAddrSpace AddrSpace,
384  SIMemOp Op,
385  bool IsCrossAddrSpaceOrdering,
386  Position Pos) const override;
387 };
388 
389 class SIMemoryLegalizer final : public MachineFunctionPass {
390 private:
391 
392  /// Cache Control.
393  std::unique_ptr<SICacheControl> CC = nullptr;
394 
395  /// List of atomic pseudo instructions.
396  std::list<MachineBasicBlock::iterator> AtomicPseudoMIs;
397 
398  /// Return true iff instruction \p MI is a atomic instruction that
399  /// returns a result.
400  bool isAtomicRet(const MachineInstr &MI) const {
401  return AMDGPU::getAtomicNoRetOp(MI.getOpcode()) != -1;
402  }
403 
404  /// Removes all processed atomic pseudo instructions from the current
405  /// function. Returns true if current function is modified, false otherwise.
406  bool removeAtomicPseudoMIs();
407 
408  /// Expands load operation \p MI. Returns true if instructions are
409  /// added/deleted or \p MI is modified, false otherwise.
410  bool expandLoad(const SIMemOpInfo &MOI,
412  /// Expands store operation \p MI. Returns true if instructions are
413  /// added/deleted or \p MI is modified, false otherwise.
414  bool expandStore(const SIMemOpInfo &MOI,
416  /// Expands atomic fence operation \p MI. Returns true if
417  /// instructions are added/deleted or \p MI is modified, false otherwise.
418  bool expandAtomicFence(const SIMemOpInfo &MOI,
420  /// Expands atomic cmpxchg or rmw operation \p MI. Returns true if
421  /// instructions are added/deleted or \p MI is modified, false otherwise.
422  bool expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
424 
425 public:
426  static char ID;
427 
428  SIMemoryLegalizer() : MachineFunctionPass(ID) {}
429 
430  void getAnalysisUsage(AnalysisUsage &AU) const override {
431  AU.setPreservesCFG();
433  }
434 
435  StringRef getPassName() const override {
436  return PASS_NAME;
437  }
438 
439  bool runOnMachineFunction(MachineFunction &MF) override;
440 };
441 
442 } // end namespace anonymous
443 
444 void SIMemOpAccess::reportUnsupported(const MachineBasicBlock::iterator &MI,
445  const char *Msg) const {
446  const Function &Func = MI->getParent()->getParent()->getFunction();
447  DiagnosticInfoUnsupported Diag(Func, Msg, MI->getDebugLoc());
448  Func.getContext().diagnose(Diag);
449 }
450 
452 SIMemOpAccess::toSIAtomicScope(SyncScope::ID SSID,
453  SIAtomicAddrSpace InstrScope) const {
454  if (SSID == SyncScope::System)
455  return std::make_tuple(SIAtomicScope::SYSTEM,
456  SIAtomicAddrSpace::ATOMIC,
457  true);
458  if (SSID == MMI->getAgentSSID())
459  return std::make_tuple(SIAtomicScope::AGENT,
460  SIAtomicAddrSpace::ATOMIC,
461  true);
462  if (SSID == MMI->getWorkgroupSSID())
463  return std::make_tuple(SIAtomicScope::WORKGROUP,
464  SIAtomicAddrSpace::ATOMIC,
465  true);
466  if (SSID == MMI->getWavefrontSSID())
467  return std::make_tuple(SIAtomicScope::WAVEFRONT,
468  SIAtomicAddrSpace::ATOMIC,
469  true);
470  if (SSID == SyncScope::SingleThread)
471  return std::make_tuple(SIAtomicScope::SINGLETHREAD,
472  SIAtomicAddrSpace::ATOMIC,
473  true);
474  if (SSID == MMI->getSystemOneAddressSpaceSSID())
475  return std::make_tuple(SIAtomicScope::SYSTEM,
476  SIAtomicAddrSpace::ATOMIC & InstrScope,
477  false);
478  if (SSID == MMI->getAgentOneAddressSpaceSSID())
479  return std::make_tuple(SIAtomicScope::AGENT,
480  SIAtomicAddrSpace::ATOMIC & InstrScope,
481  false);
482  if (SSID == MMI->getWorkgroupOneAddressSpaceSSID())
483  return std::make_tuple(SIAtomicScope::WORKGROUP,
484  SIAtomicAddrSpace::ATOMIC & InstrScope,
485  false);
486  if (SSID == MMI->getWavefrontOneAddressSpaceSSID())
487  return std::make_tuple(SIAtomicScope::WAVEFRONT,
488  SIAtomicAddrSpace::ATOMIC & InstrScope,
489  false);
490  if (SSID == MMI->getSingleThreadOneAddressSpaceSSID())
491  return std::make_tuple(SIAtomicScope::SINGLETHREAD,
492  SIAtomicAddrSpace::ATOMIC & InstrScope,
493  false);
494  return None;
495 }
496 
497 SIAtomicAddrSpace SIMemOpAccess::toSIAtomicAddrSpace(unsigned AS) const {
498  if (AS == AMDGPUAS::FLAT_ADDRESS)
500  if (AS == AMDGPUAS::GLOBAL_ADDRESS)
502  if (AS == AMDGPUAS::LOCAL_ADDRESS)
503  return SIAtomicAddrSpace::LDS;
504  if (AS == AMDGPUAS::PRIVATE_ADDRESS)
505  return SIAtomicAddrSpace::SCRATCH;
506  if (AS == AMDGPUAS::REGION_ADDRESS)
507  return SIAtomicAddrSpace::GDS;
508 
509  return SIAtomicAddrSpace::OTHER;
510 }
511 
512 SIMemOpAccess::SIMemOpAccess(MachineFunction &MF) {
514 }
515 
516 Optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
517  const MachineBasicBlock::iterator &MI) const {
518  assert(MI->getNumMemOperands() > 0);
519 
522  AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic;
524  bool IsNonTemporal = true;
525 
526  // Validator should check whether or not MMOs cover the entire set of
527  // locations accessed by the memory instruction.
528  for (const auto &MMO : MI->memoperands()) {
529  IsNonTemporal &= MMO->isNonTemporal();
530  InstrAddrSpace |=
531  toSIAtomicAddrSpace(MMO->getPointerInfo().getAddrSpace());
532  AtomicOrdering OpOrdering = MMO->getOrdering();
533  if (OpOrdering != AtomicOrdering::NotAtomic) {
534  const auto &IsSyncScopeInclusion =
535  MMI->isSyncScopeInclusion(SSID, MMO->getSyncScopeID());
536  if (!IsSyncScopeInclusion) {
537  reportUnsupported(MI,
538  "Unsupported non-inclusive atomic synchronization scope");
539  return None;
540  }
541 
542  SSID = IsSyncScopeInclusion.getValue() ? SSID : MMO->getSyncScopeID();
543  Ordering =
544  isStrongerThan(Ordering, OpOrdering) ?
545  Ordering : MMO->getOrdering();
546  assert(MMO->getFailureOrdering() != AtomicOrdering::Release &&
547  MMO->getFailureOrdering() != AtomicOrdering::AcquireRelease);
548  FailureOrdering =
549  isStrongerThan(FailureOrdering, MMO->getFailureOrdering()) ?
550  FailureOrdering : MMO->getFailureOrdering();
551  }
552  }
553 
555  SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
556  bool IsCrossAddressSpaceOrdering = false;
557  if (Ordering != AtomicOrdering::NotAtomic) {
558  auto ScopeOrNone = toSIAtomicScope(SSID, InstrAddrSpace);
559  if (!ScopeOrNone) {
560  reportUnsupported(MI, "Unsupported atomic synchronization scope");
561  return None;
562  }
563  std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
564  ScopeOrNone.getValue();
565  if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
566  ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace)) {
567  reportUnsupported(MI, "Unsupported atomic address space");
568  return None;
569  }
570  }
571  return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, InstrAddrSpace,
572  IsCrossAddressSpaceOrdering, FailureOrdering, IsNonTemporal);
573 }
574 
576  const MachineBasicBlock::iterator &MI) const {
577  assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
578 
579  if (!(MI->mayLoad() && !MI->mayStore()))
580  return None;
581 
582  // Be conservative if there are no memory operands.
583  if (MI->getNumMemOperands() == 0)
584  return SIMemOpInfo();
585 
586  return constructFromMIWithMMO(MI);
587 }
588 
589 Optional<SIMemOpInfo> SIMemOpAccess::getStoreInfo(
590  const MachineBasicBlock::iterator &MI) const {
591  assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
592 
593  if (!(!MI->mayLoad() && MI->mayStore()))
594  return None;
595 
596  // Be conservative if there are no memory operands.
597  if (MI->getNumMemOperands() == 0)
598  return SIMemOpInfo();
599 
600  return constructFromMIWithMMO(MI);
601 }
602 
603 Optional<SIMemOpInfo> SIMemOpAccess::getAtomicFenceInfo(
604  const MachineBasicBlock::iterator &MI) const {
605  assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
606 
607  if (MI->getOpcode() != AMDGPU::ATOMIC_FENCE)
608  return None;
609 
610  AtomicOrdering Ordering =
611  static_cast<AtomicOrdering>(MI->getOperand(0).getImm());
612 
613  SyncScope::ID SSID = static_cast<SyncScope::ID>(MI->getOperand(1).getImm());
614  auto ScopeOrNone = toSIAtomicScope(SSID, SIAtomicAddrSpace::ATOMIC);
615  if (!ScopeOrNone) {
616  reportUnsupported(MI, "Unsupported atomic synchronization scope");
617  return None;
618  }
619 
621  SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
622  bool IsCrossAddressSpaceOrdering = false;
623  std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
624  ScopeOrNone.getValue();
625 
626  if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
627  ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace)) {
628  reportUnsupported(MI, "Unsupported atomic address space");
629  return None;
630  }
631 
632  return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, SIAtomicAddrSpace::ATOMIC,
633  IsCrossAddressSpaceOrdering);
634 }
635 
636 Optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
637  const MachineBasicBlock::iterator &MI) const {
638  assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
639 
640  if (!(MI->mayLoad() && MI->mayStore()))
641  return None;
642 
643  // Be conservative if there are no memory operands.
644  if (MI->getNumMemOperands() == 0)
645  return SIMemOpInfo();
646 
647  return constructFromMIWithMMO(MI);
648 }
649 
650 SICacheControl::SICacheControl(const GCNSubtarget &ST) {
651  TII = ST.getInstrInfo();
652  IV = getIsaVersion(ST.getCPU());
653 }
654 
655 /* static */
656 std::unique_ptr<SICacheControl> SICacheControl::create(const GCNSubtarget &ST) {
657  GCNSubtarget::Generation Generation = ST.getGeneration();
658  if (Generation <= AMDGPUSubtarget::SOUTHERN_ISLANDS)
659  return std::make_unique<SIGfx6CacheControl>(ST);
660  if (Generation < AMDGPUSubtarget::GFX10)
661  return std::make_unique<SIGfx7CacheControl>(ST);
662  return std::make_unique<SIGfx10CacheControl>(ST, ST.isCuModeEnabled());
663 }
664 
665 bool SIGfx6CacheControl::enableLoadCacheBypass(
666  const MachineBasicBlock::iterator &MI,
667  SIAtomicScope Scope,
668  SIAtomicAddrSpace AddrSpace) const {
669  assert(MI->mayLoad() && !MI->mayStore());
670  bool Changed = false;
671 
672  if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
673  /// TODO: Do not set glc for rmw atomic operations as they
674  /// implicitly bypass the L1 cache.
675 
676  switch (Scope) {
677  case SIAtomicScope::SYSTEM:
678  case SIAtomicScope::AGENT:
679  Changed |= enableGLCBit(MI);
680  break;
681  case SIAtomicScope::WORKGROUP:
682  case SIAtomicScope::WAVEFRONT:
683  case SIAtomicScope::SINGLETHREAD:
684  // No cache to bypass.
685  break;
686  default:
687  llvm_unreachable("Unsupported synchronization scope");
688  }
689  }
690 
691  /// The scratch address space does not need the global memory caches
692  /// to be bypassed as all memory operations by the same thread are
693  /// sequentially consistent, and no other thread can access scratch
694  /// memory.
695 
696  /// Other address spaces do not hava a cache.
697 
698  return Changed;
699 }
700 
701 bool SIGfx6CacheControl::enableNonTemporal(
702  const MachineBasicBlock::iterator &MI) const {
703  assert(MI->mayLoad() ^ MI->mayStore());
704  bool Changed = false;
705 
706  /// TODO: Do not enableGLCBit if rmw atomic.
707  Changed |= enableGLCBit(MI);
708  Changed |= enableSLCBit(MI);
709 
710  return Changed;
711 }
712 
713 bool SIGfx6CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
714  SIAtomicScope Scope,
715  SIAtomicAddrSpace AddrSpace,
716  Position Pos) const {
717  bool Changed = false;
718 
719  MachineBasicBlock &MBB = *MI->getParent();
720  DebugLoc DL = MI->getDebugLoc();
721 
722  if (Pos == Position::AFTER)
723  ++MI;
724 
725  if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
726  switch (Scope) {
727  case SIAtomicScope::SYSTEM:
728  case SIAtomicScope::AGENT:
729  BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBINVL1));
730  Changed = true;
731  break;
732  case SIAtomicScope::WORKGROUP:
733  case SIAtomicScope::WAVEFRONT:
734  case SIAtomicScope::SINGLETHREAD:
735  // No cache to invalidate.
736  break;
737  default:
738  llvm_unreachable("Unsupported synchronization scope");
739  }
740  }
741 
742  /// The scratch address space does not need the global memory cache
743  /// to be flushed as all memory operations by the same thread are
744  /// sequentially consistent, and no other thread can access scratch
745  /// memory.
746 
747  /// Other address spaces do not hava a cache.
748 
749  if (Pos == Position::AFTER)
750  --MI;
751 
752  return Changed;
753 }
754 
755 bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
756  SIAtomicScope Scope,
757  SIAtomicAddrSpace AddrSpace,
758  SIMemOp Op,
759  bool IsCrossAddrSpaceOrdering,
760  Position Pos) const {
761  bool Changed = false;
762 
763  MachineBasicBlock &MBB = *MI->getParent();
764  DebugLoc DL = MI->getDebugLoc();
765 
766  if (Pos == Position::AFTER)
767  ++MI;
768 
769  bool VMCnt = false;
770  bool LGKMCnt = false;
771 
772  if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
773  switch (Scope) {
774  case SIAtomicScope::SYSTEM:
775  case SIAtomicScope::AGENT:
776  VMCnt |= true;
777  break;
778  case SIAtomicScope::WORKGROUP:
779  case SIAtomicScope::WAVEFRONT:
780  case SIAtomicScope::SINGLETHREAD:
781  // The L1 cache keeps all memory operations in order for
782  // wavefronts in the same work-group.
783  break;
784  default:
785  llvm_unreachable("Unsupported synchronization scope");
786  }
787  }
788 
789  if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
790  switch (Scope) {
791  case SIAtomicScope::SYSTEM:
792  case SIAtomicScope::AGENT:
793  case SIAtomicScope::WORKGROUP:
794  // If no cross address space ordering then an LDS waitcnt is not
795  // needed as LDS operations for all waves are executed in a
796  // total global ordering as observed by all waves. Required if
797  // also synchronizing with global/GDS memory as LDS operations
798  // could be reordered with respect to later global/GDS memory
799  // operations of the same wave.
800  LGKMCnt |= IsCrossAddrSpaceOrdering;
801  break;
802  case SIAtomicScope::WAVEFRONT:
803  case SIAtomicScope::SINGLETHREAD:
804  // The LDS keeps all memory operations in order for
805  // the same wavesfront.
806  break;
807  default:
808  llvm_unreachable("Unsupported synchronization scope");
809  }
810  }
811 
812  if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
813  switch (Scope) {
814  case SIAtomicScope::SYSTEM:
815  case SIAtomicScope::AGENT:
816  // If no cross address space ordering then an GDS waitcnt is not
817  // needed as GDS operations for all waves are executed in a
818  // total global ordering as observed by all waves. Required if
819  // also synchronizing with global/LDS memory as GDS operations
820  // could be reordered with respect to later global/LDS memory
821  // operations of the same wave.
822  LGKMCnt |= IsCrossAddrSpaceOrdering;
823  break;
824  case SIAtomicScope::WORKGROUP:
825  case SIAtomicScope::WAVEFRONT:
826  case SIAtomicScope::SINGLETHREAD:
827  // The GDS keeps all memory operations in order for
828  // the same work-group.
829  break;
830  default:
831  llvm_unreachable("Unsupported synchronization scope");
832  }
833  }
834 
835  if (VMCnt || LGKMCnt) {
836  unsigned WaitCntImmediate =
838  VMCnt ? 0 : getVmcntBitMask(IV),
839  getExpcntBitMask(IV),
840  LGKMCnt ? 0 : getLgkmcntBitMask(IV));
841  BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(WaitCntImmediate);
842  Changed = true;
843  }
844 
845  if (Pos == Position::AFTER)
846  --MI;
847 
848  return Changed;
849 }
850 
851 bool SIGfx7CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
852  SIAtomicScope Scope,
853  SIAtomicAddrSpace AddrSpace,
854  Position Pos) const {
855  bool Changed = false;
856 
857  MachineBasicBlock &MBB = *MI->getParent();
858  DebugLoc DL = MI->getDebugLoc();
859 
860  const GCNSubtarget &STM = MBB.getParent()->getSubtarget<GCNSubtarget>();
861 
862  const unsigned Flush = STM.isAmdPalOS() || STM.isMesa3DOS()
863  ? AMDGPU::BUFFER_WBINVL1
864  : AMDGPU::BUFFER_WBINVL1_VOL;
865 
866  if (Pos == Position::AFTER)
867  ++MI;
868 
869  if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
870  switch (Scope) {
871  case SIAtomicScope::SYSTEM:
872  case SIAtomicScope::AGENT:
873  BuildMI(MBB, MI, DL, TII->get(Flush));
874  Changed = true;
875  break;
876  case SIAtomicScope::WORKGROUP:
877  case SIAtomicScope::WAVEFRONT:
878  case SIAtomicScope::SINGLETHREAD:
879  // No cache to invalidate.
880  break;
881  default:
882  llvm_unreachable("Unsupported synchronization scope");
883  }
884  }
885 
886  /// The scratch address space does not need the global memory cache
887  /// to be flushed as all memory operations by the same thread are
888  /// sequentially consistent, and no other thread can access scratch
889  /// memory.
890 
891  /// Other address spaces do not hava a cache.
892 
893  if (Pos == Position::AFTER)
894  --MI;
895 
896  return Changed;
897 }
898 
899 bool SIGfx10CacheControl::enableLoadCacheBypass(
900  const MachineBasicBlock::iterator &MI,
901  SIAtomicScope Scope,
902  SIAtomicAddrSpace AddrSpace) const {
903  assert(MI->mayLoad() && !MI->mayStore());
904  bool Changed = false;
905 
906  if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
907  /// TODO Do not set glc for rmw atomic operations as they
908  /// implicitly bypass the L0/L1 caches.
909 
910  switch (Scope) {
911  case SIAtomicScope::SYSTEM:
912  case SIAtomicScope::AGENT:
913  Changed |= enableGLCBit(MI);
914  Changed |= enableDLCBit(MI);
915  break;
916  case SIAtomicScope::WORKGROUP:
917  // In WGP mode the waves of a work-group can be executing on either CU of
918  // the WGP. Therefore need to bypass the L0 which is per CU. Otherwise in
919  // CU mode and all waves of a work-group are on the same CU, and so the
920  // L0 does not need to be bypassed.
921  if (!CuMode) Changed |= enableGLCBit(MI);
922  break;
923  case SIAtomicScope::WAVEFRONT:
924  case SIAtomicScope::SINGLETHREAD:
925  // No cache to bypass.
926  break;
927  default:
928  llvm_unreachable("Unsupported synchronization scope");
929  }
930  }
931 
932  /// The scratch address space does not need the global memory caches
933  /// to be bypassed as all memory operations by the same thread are
934  /// sequentially consistent, and no other thread can access scratch
935  /// memory.
936 
937  /// Other address spaces do not hava a cache.
938 
939  return Changed;
940 }
941 
942 bool SIGfx10CacheControl::enableNonTemporal(
943  const MachineBasicBlock::iterator &MI) const {
944  assert(MI->mayLoad() ^ MI->mayStore());
945  bool Changed = false;
946 
947  Changed |= enableSLCBit(MI);
948  /// TODO for store (non-rmw atomic) instructions also enableGLCBit(MI)
949 
950  return Changed;
951 }
952 
953 bool SIGfx10CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
954  SIAtomicScope Scope,
955  SIAtomicAddrSpace AddrSpace,
956  Position Pos) const {
957  bool Changed = false;
958 
959  MachineBasicBlock &MBB = *MI->getParent();
960  DebugLoc DL = MI->getDebugLoc();
961 
962  if (Pos == Position::AFTER)
963  ++MI;
964 
965  if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
966  switch (Scope) {
967  case SIAtomicScope::SYSTEM:
968  case SIAtomicScope::AGENT:
969  BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL0_INV));
970  BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL1_INV));
971  Changed = true;
972  break;
973  case SIAtomicScope::WORKGROUP:
974  // In WGP mode the waves of a work-group can be executing on either CU of
975  // the WGP. Therefore need to invalidate the L0 which is per CU. Otherwise
976  // in CU mode and all waves of a work-group are on the same CU, and so the
977  // L0 does not need to be invalidated.
978  if (!CuMode) {
979  BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL0_INV));
980  Changed = true;
981  }
982  break;
983  case SIAtomicScope::WAVEFRONT:
984  case SIAtomicScope::SINGLETHREAD:
985  // No cache to invalidate.
986  break;
987  default:
988  llvm_unreachable("Unsupported synchronization scope");
989  }
990  }
991 
992  /// The scratch address space does not need the global memory cache
993  /// to be flushed as all memory operations by the same thread are
994  /// sequentially consistent, and no other thread can access scratch
995  /// memory.
996 
997  /// Other address spaces do not hava a cache.
998 
999  if (Pos == Position::AFTER)
1000  --MI;
1001 
1002  return Changed;
1003 }
1004 
1005 bool SIGfx10CacheControl::insertWait(MachineBasicBlock::iterator &MI,
1006  SIAtomicScope Scope,
1007  SIAtomicAddrSpace AddrSpace,
1008  SIMemOp Op,
1009  bool IsCrossAddrSpaceOrdering,
1010  Position Pos) const {
1011  bool Changed = false;
1012 
1013  MachineBasicBlock &MBB = *MI->getParent();
1014  DebugLoc DL = MI->getDebugLoc();
1015 
1016  if (Pos == Position::AFTER)
1017  ++MI;
1018 
1019  bool VMCnt = false;
1020  bool VSCnt = false;
1021  bool LGKMCnt = false;
1022 
1023  if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1024  switch (Scope) {
1025  case SIAtomicScope::SYSTEM:
1026  case SIAtomicScope::AGENT:
1027  if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
1028  VMCnt |= true;
1029  if ((Op & SIMemOp::STORE) != SIMemOp::NONE)
1030  VSCnt |= true;
1031  break;
1032  case SIAtomicScope::WORKGROUP:
1033  // In WGP mode the waves of a work-group can be executing on either CU of
1034  // the WGP. Therefore need to wait for operations to complete to ensure
1035  // they are visible to waves in the other CU as the L0 is per CU.
1036  // Otherwise in CU mode and all waves of a work-group are on the same CU
1037  // which shares the same L0.
1038  if (!CuMode) {
1039  if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
1040  VMCnt |= true;
1041  if ((Op & SIMemOp::STORE) != SIMemOp::NONE)
1042  VSCnt |= true;
1043  }
1044  break;
1045  case SIAtomicScope::WAVEFRONT:
1046  case SIAtomicScope::SINGLETHREAD:
1047  // The L0 cache keeps all memory operations in order for
1048  // work-items in the same wavefront.
1049  break;
1050  default:
1051  llvm_unreachable("Unsupported synchronization scope");
1052  }
1053  }
1054 
1055  if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1056  switch (Scope) {
1057  case SIAtomicScope::SYSTEM:
1058  case SIAtomicScope::AGENT:
1059  case SIAtomicScope::WORKGROUP:
1060  // If no cross address space ordering then an LDS waitcnt is not
1061  // needed as LDS operations for all waves are executed in a
1062  // total global ordering as observed by all waves. Required if
1063  // also synchronizing with global/GDS memory as LDS operations
1064  // could be reordered with respect to later global/GDS memory
1065  // operations of the same wave.
1066  LGKMCnt |= IsCrossAddrSpaceOrdering;
1067  break;
1068  case SIAtomicScope::WAVEFRONT:
1069  case SIAtomicScope::SINGLETHREAD:
1070  // The LDS keeps all memory operations in order for
1071  // the same wavesfront.
1072  break;
1073  default:
1074  llvm_unreachable("Unsupported synchronization scope");
1075  }
1076  }
1077 
1078  if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
1079  switch (Scope) {
1080  case SIAtomicScope::SYSTEM:
1081  case SIAtomicScope::AGENT:
1082  // If no cross address space ordering then an GDS waitcnt is not
1083  // needed as GDS operations for all waves are executed in a
1084  // total global ordering as observed by all waves. Required if
1085  // also synchronizing with global/LDS memory as GDS operations
1086  // could be reordered with respect to later global/LDS memory
1087  // operations of the same wave.
1088  LGKMCnt |= IsCrossAddrSpaceOrdering;
1089  break;
1090  case SIAtomicScope::WORKGROUP:
1091  case SIAtomicScope::WAVEFRONT:
1092  case SIAtomicScope::SINGLETHREAD:
1093  // The GDS keeps all memory operations in order for
1094  // the same work-group.
1095  break;
1096  default:
1097  llvm_unreachable("Unsupported synchronization scope");
1098  }
1099  }
1100 
1101  if (VMCnt || LGKMCnt) {
1102  unsigned WaitCntImmediate =
1104  VMCnt ? 0 : getVmcntBitMask(IV),
1105  getExpcntBitMask(IV),
1106  LGKMCnt ? 0 : getLgkmcntBitMask(IV));
1107  BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(WaitCntImmediate);
1108  Changed = true;
1109  }
1110 
1111  if (VSCnt) {
1112  BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT_VSCNT))
1113  .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
1114  .addImm(0);
1115  Changed = true;
1116  }
1117 
1118  if (Pos == Position::AFTER)
1119  --MI;
1120 
1121  return Changed;
1122 }
1123 
1124 bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
1125  if (AtomicPseudoMIs.empty())
1126  return false;
1127 
1128  for (auto &MI : AtomicPseudoMIs)
1129  MI->eraseFromParent();
1130 
1131  AtomicPseudoMIs.clear();
1132  return true;
1133 }
1134 
1135 bool SIMemoryLegalizer::expandLoad(const SIMemOpInfo &MOI,
1137  assert(MI->mayLoad() && !MI->mayStore());
1138 
1139  bool Changed = false;
1140 
1141  if (MOI.isAtomic()) {
1142  if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
1143  MOI.getOrdering() == AtomicOrdering::Acquire ||
1144  MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
1145  Changed |= CC->enableLoadCacheBypass(MI, MOI.getScope(),
1146  MOI.getOrderingAddrSpace());
1147  }
1148 
1149  if (MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
1150  Changed |= CC->insertWait(MI, MOI.getScope(),
1151  MOI.getOrderingAddrSpace(),
1153  MOI.getIsCrossAddressSpaceOrdering(),
1154  Position::BEFORE);
1155 
1156  if (MOI.getOrdering() == AtomicOrdering::Acquire ||
1157  MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
1158  Changed |= CC->insertWait(MI, MOI.getScope(),
1159  MOI.getInstrAddrSpace(),
1160  SIMemOp::LOAD,
1161  MOI.getIsCrossAddressSpaceOrdering(),
1162  Position::AFTER);
1163  Changed |= CC->insertCacheInvalidate(MI, MOI.getScope(),
1164  MOI.getOrderingAddrSpace(),
1165  Position::AFTER);
1166  }
1167 
1168  return Changed;
1169  }
1170 
1171  // Atomic instructions do not have the nontemporal attribute.
1172  if (MOI.isNonTemporal()) {
1173  Changed |= CC->enableNonTemporal(MI);
1174  return Changed;
1175  }
1176 
1177  return Changed;
1178 }
1179 
1180 bool SIMemoryLegalizer::expandStore(const SIMemOpInfo &MOI,
1182  assert(!MI->mayLoad() && MI->mayStore());
1183 
1184  bool Changed = false;
1185 
1186  if (MOI.isAtomic()) {
1187  if (MOI.getOrdering() == AtomicOrdering::Release ||
1188  MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
1189  Changed |= CC->insertWait(MI, MOI.getScope(),
1190  MOI.getOrderingAddrSpace(),
1192  MOI.getIsCrossAddressSpaceOrdering(),
1193  Position::BEFORE);
1194 
1195  return Changed;
1196  }
1197 
1198  // Atomic instructions do not have the nontemporal attribute.
1199  if (MOI.isNonTemporal()) {
1200  Changed |= CC->enableNonTemporal(MI);
1201  return Changed;
1202  }
1203 
1204  return Changed;
1205 }
1206 
1207 bool SIMemoryLegalizer::expandAtomicFence(const SIMemOpInfo &MOI,
1209  assert(MI->getOpcode() == AMDGPU::ATOMIC_FENCE);
1210 
1211  AtomicPseudoMIs.push_back(MI);
1212  bool Changed = false;
1213 
1214  if (MOI.isAtomic()) {
1215  if (MOI.getOrdering() == AtomicOrdering::Acquire ||
1216  MOI.getOrdering() == AtomicOrdering::Release ||
1217  MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
1218  MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
1219  /// TODO: This relies on a barrier always generating a waitcnt
1220  /// for LDS to ensure it is not reordered with the completion of
1221  /// the proceeding LDS operations. If barrier had a memory
1222  /// ordering and memory scope, then library does not need to
1223  /// generate a fence. Could add support in this file for
1224  /// barrier. SIInsertWaitcnt.cpp could then stop unconditionally
1225  /// adding waitcnt before a S_BARRIER.
1226  Changed |= CC->insertWait(MI, MOI.getScope(),
1227  MOI.getOrderingAddrSpace(),
1229  MOI.getIsCrossAddressSpaceOrdering(),
1230  Position::BEFORE);
1231 
1232  if (MOI.getOrdering() == AtomicOrdering::Acquire ||
1233  MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
1234  MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
1235  Changed |= CC->insertCacheInvalidate(MI, MOI.getScope(),
1236  MOI.getOrderingAddrSpace(),
1237  Position::BEFORE);
1238 
1239  return Changed;
1240  }
1241 
1242  return Changed;
1243 }
1244 
1245 bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
1247  assert(MI->mayLoad() && MI->mayStore());
1248 
1249  bool Changed = false;
1250 
1251  if (MOI.isAtomic()) {
1252  if (MOI.getOrdering() == AtomicOrdering::Release ||
1253  MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
1254  MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
1255  MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent)
1256  Changed |= CC->insertWait(MI, MOI.getScope(),
1257  MOI.getOrderingAddrSpace(),
1259  MOI.getIsCrossAddressSpaceOrdering(),
1260  Position::BEFORE);
1261 
1262  if (MOI.getOrdering() == AtomicOrdering::Acquire ||
1263  MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
1264  MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
1265  MOI.getFailureOrdering() == AtomicOrdering::Acquire ||
1266  MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) {
1267  Changed |= CC->insertWait(MI, MOI.getScope(),
1268  MOI.getOrderingAddrSpace(),
1269  isAtomicRet(*MI) ? SIMemOp::LOAD :
1271  MOI.getIsCrossAddressSpaceOrdering(),
1272  Position::AFTER);
1273  Changed |= CC->insertCacheInvalidate(MI, MOI.getScope(),
1274  MOI.getOrderingAddrSpace(),
1275  Position::AFTER);
1276  }
1277 
1278  return Changed;
1279  }
1280 
1281  return Changed;
1282 }
1283 
1284 bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) {
1285  bool Changed = false;
1286 
1287  SIMemOpAccess MOA(MF);
1288  CC = SICacheControl::create(MF.getSubtarget<GCNSubtarget>());
1289 
1290  for (auto &MBB : MF) {
1291  for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) {
1292  if (!(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic))
1293  continue;
1294 
1295  if (const auto &MOI = MOA.getLoadInfo(MI))
1296  Changed |= expandLoad(MOI.getValue(), MI);
1297  else if (const auto &MOI = MOA.getStoreInfo(MI))
1298  Changed |= expandStore(MOI.getValue(), MI);
1299  else if (const auto &MOI = MOA.getAtomicFenceInfo(MI))
1300  Changed |= expandAtomicFence(MOI.getValue(), MI);
1301  else if (const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(MI))
1302  Changed |= expandAtomicCmpxchgOrRmw(MOI.getValue(), MI);
1303  }
1304  }
1305 
1306  Changed |= removeAtomicPseudoMIs();
1307  return Changed;
1308 }
1309 
1310 INITIALIZE_PASS(SIMemoryLegalizer, DEBUG_TYPE, PASS_NAME, false, false)
1311 
1312 char SIMemoryLegalizer::ID = 0;
1314 
1316  return new SIMemoryLegalizer();
1317 }
const NoneType None
Definition: None.h:23
#define LLVM_MARK_AS_BITMASK_ENUM(LargestValue)
LLVM_MARK_AS_BITMASK_ENUM lets you opt in an individual enum type so you can perform bitwise operatio...
Definition: BitmaskEnum.h:41
Diagnostic information for unsupported feature in backend.
AMDGPU specific subclass of TargetSubtarget.
Atomic ordering constants.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
unsigned getExpcntBitMask(const IsaVersion &Version)
Instruction set architecture version.
Definition: TargetParser.h:136
static Optional< LoadInfo > getLoadInfo(const MachineInstr &MI)
const SIInstrInfo * getInstrInfo() const override
A debug info location.
Definition: DebugLoc.h:33
MachineModuleInfo & getMMI() const
LLVM_READONLY int getAtomicNoRetOp(uint16_t Opcode)
FunctionPass * createSIMemoryLegalizerPass()
Address space for region memory. (GDS)
Definition: AMDGPU.h:271
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:812
Address space for private memory.
Definition: AMDGPU.h:275
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
bool isStrongerThan(AtomicOrdering ao, AtomicOrdering other)
Returns true if ao is stronger than other as defined by the AtomicOrdering lattice, which is based on C++&#39;s definition.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:411
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:270
Position
Position to insert a new instruction relative to an existing instruction.
AtomicOrdering
Atomic ordering for LLVM&#39;s memory model.
unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Represent the analysis usage information of a pass.
SIAtomicScope
The atomic synchronization scopes supported by the AMDGPU target.
AMDGPU Machine Module Info.
void setImm(int64_t immVal)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
Generation getGeneration() const
Ty & getObjFileInfo()
Keep track of various per-function pieces of information for backends that would like to do so...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
static bool isAtomic(Instruction *I)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Address space for local memory.
Definition: AMDGPU.h:274
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:33
IsaVersion getIsaVersion(StringRef GPU)
SIAtomicAddrSpace
The distinct address spaces supported by the AMDGPU target for atomic memory operation.
MachineOperand class - Representation of each machine instruction operand.
char & SIMemoryLegalizerID
#define DEBUG_TYPE
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:301
int64_t getImm() const
Synchronized with respect to all concurrently executing threads.
Definition: LLVMContext.h:54
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:64
SIMemOp
Memory operation flags. Can be ORed together.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Interface definition for SIInstrInfo.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:642
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Address space for flat memory.
Definition: AMDGPU.h:269
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:51
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
unsigned getLgkmcntBitMask(const IsaVersion &Version)
#define PASS_NAME
unsigned getVmcntBitMask(const IsaVersion &Version)