LLVM  9.0.0svn
SIMemoryLegalizer.cpp
Go to the documentation of this file.
1 //===- SIMemoryLegalizer.cpp ----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Memory legalizer - implements memory model. More information can be
11 /// found here:
12 /// http://llvm.org/docs/AMDGPUUsage.html#memory-model
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPU.h"
18 #include "AMDGPUSubtarget.h"
19 #include "SIDefines.h"
20 #include "SIInstrInfo.h"
22 #include "Utils/AMDGPUBaseInfo.h"
23 #include "llvm/ADT/BitmaskEnum.h"
24 #include "llvm/ADT/None.h"
25 #include "llvm/ADT/Optional.h"
33 #include "llvm/IR/DebugLoc.h"
34 #include "llvm/IR/DiagnosticInfo.h"
35 #include "llvm/IR/Function.h"
36 #include "llvm/IR/LLVMContext.h"
37 #include "llvm/MC/MCInstrDesc.h"
38 #include "llvm/Pass.h"
41 #include <cassert>
42 #include <list>
43 
44 using namespace llvm;
45 using namespace llvm::AMDGPU;
46 
47 #define DEBUG_TYPE "si-memory-legalizer"
48 #define PASS_NAME "SI Memory Legalizer"
49 
50 namespace {
51 
53 
54 /// Memory operation flags. Can be ORed together.
55 enum class SIMemOp {
56  NONE = 0u,
57  LOAD = 1u << 0,
58  STORE = 1u << 1,
59  LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ STORE)
60 };
61 
62 /// Position to insert a new instruction relative to an existing
63 /// instruction.
64 enum class Position {
65  BEFORE,
66  AFTER
67 };
68 
69 /// The atomic synchronization scopes supported by the AMDGPU target.
70 enum class SIAtomicScope {
71  NONE,
72  SINGLETHREAD,
73  WAVEFRONT,
74  WORKGROUP,
75  AGENT,
76  SYSTEM
77 };
78 
79 /// The distinct address spaces supported by the AMDGPU target for
80 /// atomic memory operation. Can be ORed toether.
81 enum class SIAtomicAddrSpace {
82  NONE = 0u,
83  GLOBAL = 1u << 0,
84  LDS = 1u << 1,
85  SCRATCH = 1u << 2,
86  GDS = 1u << 3,
87  OTHER = 1u << 4,
88 
89  /// The address spaces that can be accessed by a FLAT instruction.
90  FLAT = GLOBAL | LDS | SCRATCH,
91 
92  /// The address spaces that support atomic instructions.
93  ATOMIC = GLOBAL | LDS | SCRATCH | GDS,
94 
95  /// All address spaces.
96  ALL = GLOBAL | LDS | SCRATCH | GDS | OTHER,
97 
98  LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ ALL)
99 };
100 
101 /// Sets named bit \p BitName to "true" if present in instruction \p MI.
102 /// \returns Returns true if \p MI is modified, false otherwise.
103 template <uint16_t BitName>
104 bool enableNamedBit(const MachineBasicBlock::iterator &MI) {
105  int BitIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), BitName);
106  if (BitIdx == -1)
107  return false;
108 
109  MachineOperand &Bit = MI->getOperand(BitIdx);
110  if (Bit.getImm() != 0)
111  return false;
112 
113  Bit.setImm(1);
114  return true;
115 }
116 
117 class SIMemOpInfo final {
118 private:
119 
120  friend class SIMemOpAccess;
121 
123  AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic;
124  SIAtomicScope Scope = SIAtomicScope::SYSTEM;
125  SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
127  bool IsCrossAddressSpaceOrdering = false;
128  bool IsNonTemporal = false;
129 
131  SIAtomicScope Scope = SIAtomicScope::SYSTEM,
132  SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::ATOMIC,
133  SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::ALL,
134  bool IsCrossAddressSpaceOrdering = true,
135  AtomicOrdering FailureOrdering =
137  bool IsNonTemporal = false)
138  : Ordering(Ordering), FailureOrdering(FailureOrdering),
139  Scope(Scope), OrderingAddrSpace(OrderingAddrSpace),
140  InstrAddrSpace(InstrAddrSpace),
141  IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
142  IsNonTemporal(IsNonTemporal) {
143  // There is also no cross address space ordering if the ordering
144  // address space is the same as the instruction address space and
145  // only contains a single address space.
146  if ((OrderingAddrSpace == InstrAddrSpace) &&
147  isPowerOf2_32(uint32_t(InstrAddrSpace)))
148  IsCrossAddressSpaceOrdering = false;
149  }
150 
151 public:
152  /// \returns Atomic synchronization scope of the machine instruction used to
153  /// create this SIMemOpInfo.
154  SIAtomicScope getScope() const {
155  return Scope;
156  }
157 
158  /// \returns Ordering constraint of the machine instruction used to
159  /// create this SIMemOpInfo.
160  AtomicOrdering getOrdering() const {
161  return Ordering;
162  }
163 
164  /// \returns Failure ordering constraint of the machine instruction used to
165  /// create this SIMemOpInfo.
166  AtomicOrdering getFailureOrdering() const {
167  return FailureOrdering;
168  }
169 
170  /// \returns The address spaces be accessed by the machine
171  /// instruction used to create this SiMemOpInfo.
172  SIAtomicAddrSpace getInstrAddrSpace() const {
173  return InstrAddrSpace;
174  }
175 
176  /// \returns The address spaces that must be ordered by the machine
177  /// instruction used to create this SiMemOpInfo.
178  SIAtomicAddrSpace getOrderingAddrSpace() const {
179  return OrderingAddrSpace;
180  }
181 
182  /// \returns Return true iff memory ordering of operations on
183  /// different address spaces is required.
184  bool getIsCrossAddressSpaceOrdering() const {
185  return IsCrossAddressSpaceOrdering;
186  }
187 
188  /// \returns True if memory access of the machine instruction used to
189  /// create this SIMemOpInfo is non-temporal, false otherwise.
190  bool isNonTemporal() const {
191  return IsNonTemporal;
192  }
193 
194  /// \returns True if ordering constraint of the machine instruction used to
195  /// create this SIMemOpInfo is unordered or higher, false otherwise.
196  bool isAtomic() const {
197  return Ordering != AtomicOrdering::NotAtomic;
198  }
199 
200 };
201 
202 class SIMemOpAccess final {
203 private:
204  AMDGPUMachineModuleInfo *MMI = nullptr;
205 
206  /// Reports unsupported message \p Msg for \p MI to LLVM context.
207  void reportUnsupported(const MachineBasicBlock::iterator &MI,
208  const char *Msg) const;
209 
210  /// Inspects the target synchonization scope \p SSID and determines
211  /// the SI atomic scope it corresponds to, the address spaces it
212  /// covers, and whether the memory ordering applies between address
213  /// spaces.
215  toSIAtomicScope(SyncScope::ID SSID, SIAtomicAddrSpace InstrScope) const;
216 
217  /// \return Return a bit set of the address spaces accessed by \p AS.
218  SIAtomicAddrSpace toSIAtomicAddrSpace(unsigned AS) const;
219 
220  /// \returns Info constructed from \p MI, which has at least machine memory
221  /// operand.
222  Optional<SIMemOpInfo> constructFromMIWithMMO(
223  const MachineBasicBlock::iterator &MI) const;
224 
225 public:
226  /// Construct class to support accessing the machine memory operands
227  /// of instructions in the machine function \p MF.
228  SIMemOpAccess(MachineFunction &MF);
229 
230  /// \returns Load info if \p MI is a load operation, "None" otherwise.
232  const MachineBasicBlock::iterator &MI) const;
233 
234  /// \returns Store info if \p MI is a store operation, "None" otherwise.
235  Optional<SIMemOpInfo> getStoreInfo(
236  const MachineBasicBlock::iterator &MI) const;
237 
238  /// \returns Atomic fence info if \p MI is an atomic fence operation,
239  /// "None" otherwise.
240  Optional<SIMemOpInfo> getAtomicFenceInfo(
241  const MachineBasicBlock::iterator &MI) const;
242 
243  /// \returns Atomic cmpxchg/rmw info if \p MI is an atomic cmpxchg or
244  /// rmw operation, "None" otherwise.
245  Optional<SIMemOpInfo> getAtomicCmpxchgOrRmwInfo(
246  const MachineBasicBlock::iterator &MI) const;
247 };
248 
249 class SICacheControl {
250 protected:
251 
252  /// Instruction info.
253  const SIInstrInfo *TII = nullptr;
254 
255  IsaVersion IV;
256 
257  SICacheControl(const GCNSubtarget &ST);
258 
259 public:
260 
261  /// Create a cache control for the subtarget \p ST.
262  static std::unique_ptr<SICacheControl> create(const GCNSubtarget &ST);
263 
264  /// Update \p MI memory load instruction to bypass any caches up to
265  /// the \p Scope memory scope for address spaces \p
266  /// AddrSpace. Return true iff the instruction was modified.
267  virtual bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
268  SIAtomicScope Scope,
269  SIAtomicAddrSpace AddrSpace) const = 0;
270 
271  /// Update \p MI memory instruction to indicate it is
272  /// nontemporal. Return true iff the instruction was modified.
273  virtual bool enableNonTemporal(const MachineBasicBlock::iterator &MI)
274  const = 0;
275 
276  /// Inserts any necessary instructions at position \p Pos relative
277  /// to instruction \p MI to ensure any caches associated with
278  /// address spaces \p AddrSpace for memory scopes up to memory scope
279  /// \p Scope are invalidated. Returns true iff any instructions
280  /// inserted.
281  virtual bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
282  SIAtomicScope Scope,
283  SIAtomicAddrSpace AddrSpace,
284  Position Pos) const = 0;
285 
286  /// Inserts any necessary instructions at position \p Pos relative
287  /// to instruction \p MI to ensure memory instructions of kind \p Op
288  /// associated with address spaces \p AddrSpace have completed as
289  /// observed by other memory instructions executing in memory scope
290  /// \p Scope. \p IsCrossAddrSpaceOrdering indicates if the memory
291  /// ordering is between address spaces. Returns true iff any
292  /// instructions inserted.
293  virtual bool insertWait(MachineBasicBlock::iterator &MI,
294  SIAtomicScope Scope,
295  SIAtomicAddrSpace AddrSpace,
296  SIMemOp Op,
297  bool IsCrossAddrSpaceOrdering,
298  Position Pos) const = 0;
299 
300  /// Virtual destructor to allow derivations to be deleted.
301  virtual ~SICacheControl() = default;
302 
303 };
304 
305 class SIGfx6CacheControl : public SICacheControl {
306 protected:
307 
308  /// Sets GLC bit to "true" if present in \p MI. Returns true if \p MI
309  /// is modified, false otherwise.
310  bool enableGLCBit(const MachineBasicBlock::iterator &MI) const {
311  return enableNamedBit<AMDGPU::OpName::glc>(MI);
312  }
313 
314  /// Sets SLC bit to "true" if present in \p MI. Returns true if \p MI
315  /// is modified, false otherwise.
316  bool enableSLCBit(const MachineBasicBlock::iterator &MI) const {
317  return enableNamedBit<AMDGPU::OpName::slc>(MI);
318  }
319 
320 public:
321 
322  SIGfx6CacheControl(const GCNSubtarget &ST) : SICacheControl(ST) {};
323 
324  bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
325  SIAtomicScope Scope,
326  SIAtomicAddrSpace AddrSpace) const override;
327 
328  bool enableNonTemporal(const MachineBasicBlock::iterator &MI) const override;
329 
330  bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
331  SIAtomicScope Scope,
332  SIAtomicAddrSpace AddrSpace,
333  Position Pos) const override;
334 
335  bool insertWait(MachineBasicBlock::iterator &MI,
336  SIAtomicScope Scope,
337  SIAtomicAddrSpace AddrSpace,
338  SIMemOp Op,
339  bool IsCrossAddrSpaceOrdering,
340  Position Pos) const override;
341 };
342 
343 class SIGfx7CacheControl : public SIGfx6CacheControl {
344 public:
345 
346  SIGfx7CacheControl(const GCNSubtarget &ST) : SIGfx6CacheControl(ST) {};
347 
348  bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
349  SIAtomicScope Scope,
350  SIAtomicAddrSpace AddrSpace,
351  Position Pos) const override;
352 
353 };
354 
355 class SIMemoryLegalizer final : public MachineFunctionPass {
356 private:
357 
358  /// Cache Control.
359  std::unique_ptr<SICacheControl> CC = nullptr;
360 
361  /// List of atomic pseudo instructions.
362  std::list<MachineBasicBlock::iterator> AtomicPseudoMIs;
363 
364  /// Return true iff instruction \p MI is a atomic instruction that
365  /// returns a result.
366  bool isAtomicRet(const MachineInstr &MI) const {
367  return AMDGPU::getAtomicNoRetOp(MI.getOpcode()) != -1;
368  }
369 
370  /// Removes all processed atomic pseudo instructions from the current
371  /// function. Returns true if current function is modified, false otherwise.
372  bool removeAtomicPseudoMIs();
373 
374  /// Expands load operation \p MI. Returns true if instructions are
375  /// added/deleted or \p MI is modified, false otherwise.
376  bool expandLoad(const SIMemOpInfo &MOI,
378  /// Expands store operation \p MI. Returns true if instructions are
379  /// added/deleted or \p MI is modified, false otherwise.
380  bool expandStore(const SIMemOpInfo &MOI,
382  /// Expands atomic fence operation \p MI. Returns true if
383  /// instructions are added/deleted or \p MI is modified, false otherwise.
384  bool expandAtomicFence(const SIMemOpInfo &MOI,
386  /// Expands atomic cmpxchg or rmw operation \p MI. Returns true if
387  /// instructions are added/deleted or \p MI is modified, false otherwise.
388  bool expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
390 
391 public:
392  static char ID;
393 
394  SIMemoryLegalizer() : MachineFunctionPass(ID) {}
395 
396  void getAnalysisUsage(AnalysisUsage &AU) const override {
397  AU.setPreservesCFG();
399  }
400 
401  StringRef getPassName() const override {
402  return PASS_NAME;
403  }
404 
405  bool runOnMachineFunction(MachineFunction &MF) override;
406 };
407 
408 } // end namespace anonymous
409 
410 void SIMemOpAccess::reportUnsupported(const MachineBasicBlock::iterator &MI,
411  const char *Msg) const {
412  const Function &Func = MI->getParent()->getParent()->getFunction();
413  DiagnosticInfoUnsupported Diag(Func, Msg, MI->getDebugLoc());
414  Func.getContext().diagnose(Diag);
415 }
416 
418 SIMemOpAccess::toSIAtomicScope(SyncScope::ID SSID,
419  SIAtomicAddrSpace InstrScope) const {
420  /// TODO: For now assume OpenCL memory model which treats each
421  /// address space as having a separate happens-before relation, and
422  /// so an instruction only has ordering with respect to the address
423  /// space it accesses, and if it accesses multiple address spaces it
424  /// does not require ordering of operations in different address
425  /// spaces.
426  if (SSID == SyncScope::System)
427  return std::make_tuple(SIAtomicScope::SYSTEM,
428  SIAtomicAddrSpace::ATOMIC & InstrScope,
429  false);
430  if (SSID == MMI->getAgentSSID())
431  return std::make_tuple(SIAtomicScope::AGENT,
432  SIAtomicAddrSpace::ATOMIC & InstrScope,
433  false);
434  if (SSID == MMI->getWorkgroupSSID())
435  return std::make_tuple(SIAtomicScope::WORKGROUP,
436  SIAtomicAddrSpace::ATOMIC & InstrScope,
437  false);
438  if (SSID == MMI->getWavefrontSSID())
439  return std::make_tuple(SIAtomicScope::WAVEFRONT,
440  SIAtomicAddrSpace::ATOMIC & InstrScope,
441  false);
442  if (SSID == SyncScope::SingleThread)
443  return std::make_tuple(SIAtomicScope::SINGLETHREAD,
444  SIAtomicAddrSpace::ATOMIC & InstrScope,
445  false);
446  /// TODO: To support HSA Memory Model need to add additional memory
447  /// scopes that specify that do require cross address space
448  /// ordering.
449  return None;
450 }
451 
452 SIAtomicAddrSpace SIMemOpAccess::toSIAtomicAddrSpace(unsigned AS) const {
453  if (AS == AMDGPUAS::FLAT_ADDRESS)
455  if (AS == AMDGPUAS::GLOBAL_ADDRESS)
457  if (AS == AMDGPUAS::LOCAL_ADDRESS)
458  return SIAtomicAddrSpace::LDS;
459  if (AS == AMDGPUAS::PRIVATE_ADDRESS)
461  if (AS == AMDGPUAS::REGION_ADDRESS)
462  return SIAtomicAddrSpace::GDS;
463 
464  return SIAtomicAddrSpace::OTHER;
465 }
466 
467 SIMemOpAccess::SIMemOpAccess(MachineFunction &MF) {
469 }
470 
471 Optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
472  const MachineBasicBlock::iterator &MI) const {
473  assert(MI->getNumMemOperands() > 0);
474 
477  AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic;
479  bool IsNonTemporal = true;
480 
481  // Validator should check whether or not MMOs cover the entire set of
482  // locations accessed by the memory instruction.
483  for (const auto &MMO : MI->memoperands()) {
484  IsNonTemporal &= MMO->isNonTemporal();
485  InstrAddrSpace |=
486  toSIAtomicAddrSpace(MMO->getPointerInfo().getAddrSpace());
487  AtomicOrdering OpOrdering = MMO->getOrdering();
488  if (OpOrdering != AtomicOrdering::NotAtomic) {
489  const auto &IsSyncScopeInclusion =
490  MMI->isSyncScopeInclusion(SSID, MMO->getSyncScopeID());
491  if (!IsSyncScopeInclusion) {
492  reportUnsupported(MI,
493  "Unsupported non-inclusive atomic synchronization scope");
494  return None;
495  }
496 
497  SSID = IsSyncScopeInclusion.getValue() ? SSID : MMO->getSyncScopeID();
498  Ordering =
499  isStrongerThan(Ordering, OpOrdering) ?
500  Ordering : MMO->getOrdering();
501  assert(MMO->getFailureOrdering() != AtomicOrdering::Release &&
502  MMO->getFailureOrdering() != AtomicOrdering::AcquireRelease);
503  FailureOrdering =
504  isStrongerThan(FailureOrdering, MMO->getFailureOrdering()) ?
505  FailureOrdering : MMO->getFailureOrdering();
506  }
507  }
508 
510  SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
511  bool IsCrossAddressSpaceOrdering = false;
512  if (Ordering != AtomicOrdering::NotAtomic) {
513  auto ScopeOrNone = toSIAtomicScope(SSID, InstrAddrSpace);
514  if (!ScopeOrNone) {
515  reportUnsupported(MI, "Unsupported atomic synchronization scope");
516  return None;
517  }
518  std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
519  ScopeOrNone.getValue();
520  if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
521  ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace)) {
522  reportUnsupported(MI, "Unsupported atomic address space");
523  return None;
524  }
525  }
526  return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, InstrAddrSpace,
527  IsCrossAddressSpaceOrdering, FailureOrdering, IsNonTemporal);
528 }
529 
531  const MachineBasicBlock::iterator &MI) const {
532  assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
533 
534  if (!(MI->mayLoad() && !MI->mayStore()))
535  return None;
536 
537  // Be conservative if there are no memory operands.
538  if (MI->getNumMemOperands() == 0)
539  return SIMemOpInfo();
540 
541  return constructFromMIWithMMO(MI);
542 }
543 
544 Optional<SIMemOpInfo> SIMemOpAccess::getStoreInfo(
545  const MachineBasicBlock::iterator &MI) const {
546  assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
547 
548  if (!(!MI->mayLoad() && MI->mayStore()))
549  return None;
550 
551  // Be conservative if there are no memory operands.
552  if (MI->getNumMemOperands() == 0)
553  return SIMemOpInfo();
554 
555  return constructFromMIWithMMO(MI);
556 }
557 
558 Optional<SIMemOpInfo> SIMemOpAccess::getAtomicFenceInfo(
559  const MachineBasicBlock::iterator &MI) const {
560  assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
561 
562  if (MI->getOpcode() != AMDGPU::ATOMIC_FENCE)
563  return None;
564 
565  AtomicOrdering Ordering =
566  static_cast<AtomicOrdering>(MI->getOperand(0).getImm());
567 
568  SyncScope::ID SSID = static_cast<SyncScope::ID>(MI->getOperand(1).getImm());
569  auto ScopeOrNone = toSIAtomicScope(SSID, SIAtomicAddrSpace::ATOMIC);
570  if (!ScopeOrNone) {
571  reportUnsupported(MI, "Unsupported atomic synchronization scope");
572  return None;
573  }
574 
576  SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
577  bool IsCrossAddressSpaceOrdering = false;
578  std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
579  ScopeOrNone.getValue();
580 
581  if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
582  ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace)) {
583  reportUnsupported(MI, "Unsupported atomic address space");
584  return None;
585  }
586 
587  return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, SIAtomicAddrSpace::ATOMIC,
588  IsCrossAddressSpaceOrdering);
589 }
590 
591 Optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
592  const MachineBasicBlock::iterator &MI) const {
593  assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
594 
595  if (!(MI->mayLoad() && MI->mayStore()))
596  return None;
597 
598  // Be conservative if there are no memory operands.
599  if (MI->getNumMemOperands() == 0)
600  return SIMemOpInfo();
601 
602  return constructFromMIWithMMO(MI);
603 }
604 
605 SICacheControl::SICacheControl(const GCNSubtarget &ST) {
606  TII = ST.getInstrInfo();
607  IV = getIsaVersion(ST.getCPU());
608 }
609 
610 /* static */
611 std::unique_ptr<SICacheControl> SICacheControl::create(const GCNSubtarget &ST) {
612  GCNSubtarget::Generation Generation = ST.getGeneration();
613  if (Generation <= AMDGPUSubtarget::SOUTHERN_ISLANDS)
614  return make_unique<SIGfx6CacheControl>(ST);
615  return make_unique<SIGfx7CacheControl>(ST);
616 }
617 
618 bool SIGfx6CacheControl::enableLoadCacheBypass(
619  const MachineBasicBlock::iterator &MI,
620  SIAtomicScope Scope,
621  SIAtomicAddrSpace AddrSpace) const {
622  assert(MI->mayLoad() && !MI->mayStore());
623  bool Changed = false;
624 
625  if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
626  /// TODO: Do not set glc for rmw atomic operations as they
627  /// implicitly bypass the L1 cache.
628 
629  switch (Scope) {
630  case SIAtomicScope::SYSTEM:
631  case SIAtomicScope::AGENT:
632  Changed |= enableGLCBit(MI);
633  break;
634  case SIAtomicScope::WORKGROUP:
635  case SIAtomicScope::WAVEFRONT:
636  case SIAtomicScope::SINGLETHREAD:
637  // No cache to bypass.
638  break;
639  default:
640  llvm_unreachable("Unsupported synchronization scope");
641  }
642  }
643 
644  /// The scratch address space does not need the global memory caches
645  /// to be bypassed as all memory operations by the same thread are
646  /// sequentially consistent, and no other thread can access scratch
647  /// memory.
648 
649  /// Other address spaces do not hava a cache.
650 
651  return Changed;
652 }
653 
654 bool SIGfx6CacheControl::enableNonTemporal(
655  const MachineBasicBlock::iterator &MI) const {
656  assert(MI->mayLoad() ^ MI->mayStore());
657  bool Changed = false;
658 
659  /// TODO: Do not enableGLCBit if rmw atomic.
660  Changed |= enableGLCBit(MI);
661  Changed |= enableSLCBit(MI);
662 
663  return Changed;
664 }
665 
666 bool SIGfx6CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
667  SIAtomicScope Scope,
668  SIAtomicAddrSpace AddrSpace,
669  Position Pos) const {
670  bool Changed = false;
671 
672  MachineBasicBlock &MBB = *MI->getParent();
673  DebugLoc DL = MI->getDebugLoc();
674 
675  if (Pos == Position::AFTER)
676  ++MI;
677 
678  if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
679  switch (Scope) {
680  case SIAtomicScope::SYSTEM:
681  case SIAtomicScope::AGENT:
682  BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBINVL1));
683  Changed = true;
684  break;
685  case SIAtomicScope::WORKGROUP:
686  case SIAtomicScope::WAVEFRONT:
687  case SIAtomicScope::SINGLETHREAD:
688  // No cache to invalidate.
689  break;
690  default:
691  llvm_unreachable("Unsupported synchronization scope");
692  }
693  }
694 
695  /// The scratch address space does not need the global memory cache
696  /// to be flushed as all memory operations by the same thread are
697  /// sequentially consistent, and no other thread can access scratch
698  /// memory.
699 
700  /// Other address spaces do not hava a cache.
701 
702  if (Pos == Position::AFTER)
703  --MI;
704 
705  return Changed;
706 }
707 
708 bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
709  SIAtomicScope Scope,
710  SIAtomicAddrSpace AddrSpace,
711  SIMemOp Op,
712  bool IsCrossAddrSpaceOrdering,
713  Position Pos) const {
714  bool Changed = false;
715 
716  MachineBasicBlock &MBB = *MI->getParent();
717  DebugLoc DL = MI->getDebugLoc();
718 
719  if (Pos == Position::AFTER)
720  ++MI;
721 
722  bool VMCnt = false;
723  bool LGKMCnt = false;
724  bool EXPCnt = false;
725 
726  if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
727  switch (Scope) {
728  case SIAtomicScope::SYSTEM:
729  case SIAtomicScope::AGENT:
730  VMCnt = true;
731  break;
732  case SIAtomicScope::WORKGROUP:
733  case SIAtomicScope::WAVEFRONT:
734  case SIAtomicScope::SINGLETHREAD:
735  // The L1 cache keeps all memory operations in order for
736  // wavefronts in the same work-group.
737  break;
738  default:
739  llvm_unreachable("Unsupported synchronization scope");
740  }
741  }
742 
743  if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
744  switch (Scope) {
745  case SIAtomicScope::SYSTEM:
746  case SIAtomicScope::AGENT:
747  case SIAtomicScope::WORKGROUP:
748  // If no cross address space ordering then an LDS waitcnt is not
749  // needed as LDS operations for all waves are executed in a
750  // total global ordering as observed by all waves. Required if
751  // also synchronizing with global/GDS memory as LDS operations
752  // could be reordered with respect to later global/GDS memory
753  // operations of the same wave.
754  LGKMCnt = IsCrossAddrSpaceOrdering;
755  break;
756  case SIAtomicScope::WAVEFRONT:
757  case SIAtomicScope::SINGLETHREAD:
758  // The LDS keeps all memory operations in order for
759  // the same wavesfront.
760  break;
761  default:
762  llvm_unreachable("Unsupported synchronization scope");
763  }
764  }
765 
766  if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
767  switch (Scope) {
768  case SIAtomicScope::SYSTEM:
769  case SIAtomicScope::AGENT:
770  // If no cross address space ordering then an GDS waitcnt is not
771  // needed as GDS operations for all waves are executed in a
772  // total global ordering as observed by all waves. Required if
773  // also synchronizing with global/LDS memory as GDS operations
774  // could be reordered with respect to later global/LDS memory
775  // operations of the same wave.
776  EXPCnt = IsCrossAddrSpaceOrdering;
777  break;
778  case SIAtomicScope::WORKGROUP:
779  case SIAtomicScope::WAVEFRONT:
780  case SIAtomicScope::SINGLETHREAD:
781  // The GDS keeps all memory operations in order for
782  // the same work-group.
783  break;
784  default:
785  llvm_unreachable("Unsupported synchronization scope");
786  }
787  }
788 
789  if (VMCnt || LGKMCnt || EXPCnt) {
790  unsigned WaitCntImmediate =
792  VMCnt ? 0 : getVmcntBitMask(IV),
793  EXPCnt ? 0 : getExpcntBitMask(IV),
794  LGKMCnt ? 0 : getLgkmcntBitMask(IV));
795  BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(WaitCntImmediate);
796  Changed = true;
797  }
798 
799  if (Pos == Position::AFTER)
800  --MI;
801 
802  return Changed;
803 }
804 
805 bool SIGfx7CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
806  SIAtomicScope Scope,
807  SIAtomicAddrSpace AddrSpace,
808  Position Pos) const {
809  bool Changed = false;
810 
811  MachineBasicBlock &MBB = *MI->getParent();
812  DebugLoc DL = MI->getDebugLoc();
813 
814  const GCNSubtarget &STM = MBB.getParent()->getSubtarget<GCNSubtarget>();
815 
816  const unsigned Flush = STM.isAmdPalOS() || STM.isMesa3DOS()
817  ? AMDGPU::BUFFER_WBINVL1
818  : AMDGPU::BUFFER_WBINVL1_VOL;
819 
820  if (Pos == Position::AFTER)
821  ++MI;
822 
823  if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
824  switch (Scope) {
825  case SIAtomicScope::SYSTEM:
826  case SIAtomicScope::AGENT:
827  BuildMI(MBB, MI, DL, TII->get(Flush));
828  Changed = true;
829  break;
830  case SIAtomicScope::WORKGROUP:
831  case SIAtomicScope::WAVEFRONT:
832  case SIAtomicScope::SINGLETHREAD:
833  // No cache to invalidate.
834  break;
835  default:
836  llvm_unreachable("Unsupported synchronization scope");
837  }
838  }
839 
840  /// The scratch address space does not need the global memory cache
841  /// to be flushed as all memory operations by the same thread are
842  /// sequentially consistent, and no other thread can access scratch
843  /// memory.
844 
845  /// Other address spaces do not hava a cache.
846 
847  if (Pos == Position::AFTER)
848  --MI;
849 
850  return Changed;
851 }
852 
853 bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
854  if (AtomicPseudoMIs.empty())
855  return false;
856 
857  for (auto &MI : AtomicPseudoMIs)
858  MI->eraseFromParent();
859 
860  AtomicPseudoMIs.clear();
861  return true;
862 }
863 
864 bool SIMemoryLegalizer::expandLoad(const SIMemOpInfo &MOI,
866  assert(MI->mayLoad() && !MI->mayStore());
867 
868  bool Changed = false;
869 
870  if (MOI.isAtomic()) {
871  if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
872  MOI.getOrdering() == AtomicOrdering::Acquire ||
873  MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
874  Changed |= CC->enableLoadCacheBypass(MI, MOI.getScope(),
875  MOI.getOrderingAddrSpace());
876  }
877 
878  if (MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
879  Changed |= CC->insertWait(MI, MOI.getScope(),
880  MOI.getOrderingAddrSpace(),
882  MOI.getIsCrossAddressSpaceOrdering(),
883  Position::BEFORE);
884 
885  if (MOI.getOrdering() == AtomicOrdering::Acquire ||
886  MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
887  Changed |= CC->insertWait(MI, MOI.getScope(),
888  MOI.getInstrAddrSpace(),
890  MOI.getIsCrossAddressSpaceOrdering(),
891  Position::AFTER);
892  Changed |= CC->insertCacheInvalidate(MI, MOI.getScope(),
893  MOI.getOrderingAddrSpace(),
894  Position::AFTER);
895  }
896 
897  return Changed;
898  }
899 
900  // Atomic instructions do not have the nontemporal attribute.
901  if (MOI.isNonTemporal()) {
902  Changed |= CC->enableNonTemporal(MI);
903  return Changed;
904  }
905 
906  return Changed;
907 }
908 
909 bool SIMemoryLegalizer::expandStore(const SIMemOpInfo &MOI,
911  assert(!MI->mayLoad() && MI->mayStore());
912 
913  bool Changed = false;
914 
915  if (MOI.isAtomic()) {
916  if (MOI.getOrdering() == AtomicOrdering::Release ||
917  MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
918  Changed |= CC->insertWait(MI, MOI.getScope(),
919  MOI.getOrderingAddrSpace(),
921  MOI.getIsCrossAddressSpaceOrdering(),
922  Position::BEFORE);
923 
924  return Changed;
925  }
926 
927  // Atomic instructions do not have the nontemporal attribute.
928  if (MOI.isNonTemporal()) {
929  Changed |= CC->enableNonTemporal(MI);
930  return Changed;
931  }
932 
933  return Changed;
934 }
935 
936 bool SIMemoryLegalizer::expandAtomicFence(const SIMemOpInfo &MOI,
938  assert(MI->getOpcode() == AMDGPU::ATOMIC_FENCE);
939 
940  AtomicPseudoMIs.push_back(MI);
941  bool Changed = false;
942 
943  if (MOI.isAtomic()) {
944  if (MOI.getOrdering() == AtomicOrdering::Acquire ||
945  MOI.getOrdering() == AtomicOrdering::Release ||
946  MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
947  MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
948  /// TODO: This relies on a barrier always generating a waitcnt
949  /// for LDS to ensure it is not reordered with the completion of
950  /// the proceeding LDS operations. If barrier had a memory
951  /// ordering and memory scope, then library does not need to
952  /// generate a fence. Could add support in this file for
953  /// barrier. SIInsertWaitcnt.cpp could then stop unconditionally
954  /// adding waitcnt before a S_BARRIER.
955  Changed |= CC->insertWait(MI, MOI.getScope(),
956  MOI.getOrderingAddrSpace(),
958  MOI.getIsCrossAddressSpaceOrdering(),
959  Position::BEFORE);
960 
961  if (MOI.getOrdering() == AtomicOrdering::Acquire ||
962  MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
963  MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
964  Changed |= CC->insertCacheInvalidate(MI, MOI.getScope(),
965  MOI.getOrderingAddrSpace(),
966  Position::BEFORE);
967 
968  return Changed;
969  }
970 
971  return Changed;
972 }
973 
974 bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
976  assert(MI->mayLoad() && MI->mayStore());
977 
978  bool Changed = false;
979 
980  if (MOI.isAtomic()) {
981  if (MOI.getOrdering() == AtomicOrdering::Release ||
982  MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
983  MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
984  MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent)
985  Changed |= CC->insertWait(MI, MOI.getScope(),
986  MOI.getOrderingAddrSpace(),
988  MOI.getIsCrossAddressSpaceOrdering(),
989  Position::BEFORE);
990 
991  if (MOI.getOrdering() == AtomicOrdering::Acquire ||
992  MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
993  MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
994  MOI.getFailureOrdering() == AtomicOrdering::Acquire ||
995  MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) {
996  Changed |= CC->insertWait(MI, MOI.getScope(),
997  MOI.getOrderingAddrSpace(),
998  isAtomicRet(*MI) ? SIMemOp::LOAD :
1000  MOI.getIsCrossAddressSpaceOrdering(),
1001  Position::AFTER);
1002  Changed |= CC->insertCacheInvalidate(MI, MOI.getScope(),
1003  MOI.getOrderingAddrSpace(),
1004  Position::AFTER);
1005  }
1006 
1007  return Changed;
1008  }
1009 
1010  return Changed;
1011 }
1012 
1013 bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) {
1014  bool Changed = false;
1015 
1016  SIMemOpAccess MOA(MF);
1017  CC = SICacheControl::create(MF.getSubtarget<GCNSubtarget>());
1018 
1019  for (auto &MBB : MF) {
1020  for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) {
1021  if (!(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic))
1022  continue;
1023 
1024  if (const auto &MOI = MOA.getLoadInfo(MI))
1025  Changed |= expandLoad(MOI.getValue(), MI);
1026  else if (const auto &MOI = MOA.getStoreInfo(MI))
1027  Changed |= expandStore(MOI.getValue(), MI);
1028  else if (const auto &MOI = MOA.getAtomicFenceInfo(MI))
1029  Changed |= expandAtomicFence(MOI.getValue(), MI);
1030  else if (const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(MI))
1031  Changed |= expandAtomicCmpxchgOrRmw(MOI.getValue(), MI);
1032  }
1033  }
1034 
1035  Changed |= removeAtomicPseudoMIs();
1036  return Changed;
1037 }
1038 
1039 INITIALIZE_PASS(SIMemoryLegalizer, DEBUG_TYPE, PASS_NAME, false, false)
1040 
1041 char SIMemoryLegalizer::ID = 0;
1043 
1045  return new SIMemoryLegalizer();
1046 }
const NoneType None
Definition: None.h:23
Address space for region memory. (GDS)
Definition: AMDGPU.h:256
#define LLVM_MARK_AS_BITMASK_ENUM(LargestValue)
LLVM_MARK_AS_BITMASK_ENUM lets you opt in an individual enum type so you can perform bitwise operatio...
Definition: BitmaskEnum.h:41
Diagnostic information for unsupported feature in backend.
AMDGPU specific subclass of TargetSubtarget.
Atomic ordering constants.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
unsigned getExpcntBitMask(const IsaVersion &Version)
Address space for private memory.
Definition: AMDGPU.h:260
Instruction set architecture version.
Definition: TargetParser.h:131
static Optional< LoadInfo > getLoadInfo(const MachineInstr &MI)
const SIInstrInfo * getInstrInfo() const override
A debug info location.
Definition: DebugLoc.h:33
MachineModuleInfo & getMMI() const
LLVM_READONLY int getAtomicNoRetOp(uint16_t Opcode)
FunctionPass * createSIMemoryLegalizerPass()
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:785
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
bool isStrongerThan(AtomicOrdering ao, AtomicOrdering other)
Returns true if ao is stronger than other as defined by the AtomicOrdering lattice, which is based on C++&#39;s definition.
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:55
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:408
Synchronized with respect to all concurrently executing threads.
Definition: LLVMContext.h:58
Position
Position to insert a new instruction relative to an existing instruction.
AtomicOrdering
Atomic ordering for LLVM&#39;s memory model.
Address space for local memory.
Definition: AMDGPU.h:259
unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:255
Represent the analysis usage information of a pass.
SIAtomicScope
The atomic synchronization scopes supported by the AMDGPU target.
AMDGPU Machine Module Info.
void setImm(int64_t immVal)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
Generation getGeneration() const
Ty & getObjFileInfo()
Keep track of various per-function pieces of information for backends that would like to do so...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:192
static bool isAtomic(Instruction *I)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:33
IsaVersion getIsaVersion(StringRef GPU)
SIAtomicAddrSpace
The distinct address spaces supported by the AMDGPU target for atomic memory operation.
Address space for flat memory.
Definition: AMDGPU.h:254
MachineOperand class - Representation of each machine instruction operand.
char & SIMemoryLegalizerID
#define DEBUG_TYPE
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:285
int64_t getImm() const
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:63
SIMemOp
Memory operation flags. Can be ORed together.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Interface definition for SIInstrInfo.
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:615
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
unsigned getLgkmcntBitMask(const IsaVersion &Version)
#define PASS_NAME
unsigned getVmcntBitMask(const IsaVersion &Version)