LLVM  9.0.0svn
SIMemoryLegalizer.cpp
Go to the documentation of this file.
1 //===- SIMemoryLegalizer.cpp ----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Memory legalizer - implements memory model. More information can be
11 /// found here:
12 /// http://llvm.org/docs/AMDGPUUsage.html#memory-model
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPU.h"
18 #include "AMDGPUSubtarget.h"
19 #include "SIDefines.h"
20 #include "SIInstrInfo.h"
22 #include "Utils/AMDGPUBaseInfo.h"
23 #include "llvm/ADT/BitmaskEnum.h"
24 #include "llvm/ADT/None.h"
25 #include "llvm/ADT/Optional.h"
33 #include "llvm/IR/DebugLoc.h"
34 #include "llvm/IR/DiagnosticInfo.h"
35 #include "llvm/IR/Function.h"
36 #include "llvm/IR/LLVMContext.h"
37 #include "llvm/MC/MCInstrDesc.h"
38 #include "llvm/Pass.h"
41 #include <cassert>
42 #include <list>
43 
44 using namespace llvm;
45 using namespace llvm::AMDGPU;
46 
47 #define DEBUG_TYPE "si-memory-legalizer"
48 #define PASS_NAME "SI Memory Legalizer"
49 
50 namespace {
51 
53 
54 /// Memory operation flags. Can be ORed together.
55 enum class SIMemOp {
56  NONE = 0u,
57  LOAD = 1u << 0,
58  STORE = 1u << 1,
59  LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ STORE)
60 };
61 
62 /// Position to insert a new instruction relative to an existing
63 /// instruction.
64 enum class Position {
65  BEFORE,
66  AFTER
67 };
68 
69 /// The atomic synchronization scopes supported by the AMDGPU target.
70 enum class SIAtomicScope {
71  NONE,
72  SINGLETHREAD,
73  WAVEFRONT,
74  WORKGROUP,
75  AGENT,
76  SYSTEM
77 };
78 
79 /// The distinct address spaces supported by the AMDGPU target for
80 /// atomic memory operation. Can be ORed toether.
81 enum class SIAtomicAddrSpace {
82  NONE = 0u,
83  GLOBAL = 1u << 0,
84  LDS = 1u << 1,
85  SCRATCH = 1u << 2,
86  GDS = 1u << 3,
87  OTHER = 1u << 4,
88 
89  /// The address spaces that can be accessed by a FLAT instruction.
90  FLAT = GLOBAL | LDS | SCRATCH,
91 
92  /// The address spaces that support atomic instructions.
93  ATOMIC = GLOBAL | LDS | SCRATCH | GDS,
94 
95  /// All address spaces.
96  ALL = GLOBAL | LDS | SCRATCH | GDS | OTHER,
97 
98  LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ ALL)
99 };
100 
101 /// Sets named bit \p BitName to "true" if present in instruction \p MI.
102 /// \returns Returns true if \p MI is modified, false otherwise.
103 template <uint16_t BitName>
104 bool enableNamedBit(const MachineBasicBlock::iterator &MI) {
105  int BitIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), BitName);
106  if (BitIdx == -1)
107  return false;
108 
109  MachineOperand &Bit = MI->getOperand(BitIdx);
110  if (Bit.getImm() != 0)
111  return false;
112 
113  Bit.setImm(1);
114  return true;
115 }
116 
117 class SIMemOpInfo final {
118 private:
119 
120  friend class SIMemOpAccess;
121 
123  AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic;
124  SIAtomicScope Scope = SIAtomicScope::SYSTEM;
125  SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
127  bool IsCrossAddressSpaceOrdering = false;
128  bool IsNonTemporal = false;
129 
131  SIAtomicScope Scope = SIAtomicScope::SYSTEM,
132  SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::ATOMIC,
133  SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::ALL,
134  bool IsCrossAddressSpaceOrdering = true,
135  AtomicOrdering FailureOrdering =
137  bool IsNonTemporal = false)
138  : Ordering(Ordering), FailureOrdering(FailureOrdering),
139  Scope(Scope), OrderingAddrSpace(OrderingAddrSpace),
140  InstrAddrSpace(InstrAddrSpace),
141  IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
142  IsNonTemporal(IsNonTemporal) {
143  // There is also no cross address space ordering if the ordering
144  // address space is the same as the instruction address space and
145  // only contains a single address space.
146  if ((OrderingAddrSpace == InstrAddrSpace) &&
147  isPowerOf2_32(uint32_t(InstrAddrSpace)))
148  IsCrossAddressSpaceOrdering = false;
149  }
150 
151 public:
152  /// \returns Atomic synchronization scope of the machine instruction used to
153  /// create this SIMemOpInfo.
154  SIAtomicScope getScope() const {
155  return Scope;
156  }
157 
158  /// \returns Ordering constraint of the machine instruction used to
159  /// create this SIMemOpInfo.
160  AtomicOrdering getOrdering() const {
161  return Ordering;
162  }
163 
164  /// \returns Failure ordering constraint of the machine instruction used to
165  /// create this SIMemOpInfo.
166  AtomicOrdering getFailureOrdering() const {
167  return FailureOrdering;
168  }
169 
170  /// \returns The address spaces be accessed by the machine
171  /// instruction used to create this SiMemOpInfo.
172  SIAtomicAddrSpace getInstrAddrSpace() const {
173  return InstrAddrSpace;
174  }
175 
176  /// \returns The address spaces that must be ordered by the machine
177  /// instruction used to create this SiMemOpInfo.
178  SIAtomicAddrSpace getOrderingAddrSpace() const {
179  return OrderingAddrSpace;
180  }
181 
182  /// \returns Return true iff memory ordering of operations on
183  /// different address spaces is required.
184  bool getIsCrossAddressSpaceOrdering() const {
185  return IsCrossAddressSpaceOrdering;
186  }
187 
188  /// \returns True if memory access of the machine instruction used to
189  /// create this SIMemOpInfo is non-temporal, false otherwise.
190  bool isNonTemporal() const {
191  return IsNonTemporal;
192  }
193 
194  /// \returns True if ordering constraint of the machine instruction used to
195  /// create this SIMemOpInfo is unordered or higher, false otherwise.
196  bool isAtomic() const {
197  return Ordering != AtomicOrdering::NotAtomic;
198  }
199 
200 };
201 
202 class SIMemOpAccess final {
203 private:
204  AMDGPUMachineModuleInfo *MMI = nullptr;
205 
206  /// Reports unsupported message \p Msg for \p MI to LLVM context.
207  void reportUnsupported(const MachineBasicBlock::iterator &MI,
208  const char *Msg) const;
209 
210  /// Inspects the target synchonization scope \p SSID and determines
211  /// the SI atomic scope it corresponds to, the address spaces it
212  /// covers, and whether the memory ordering applies between address
213  /// spaces.
215  toSIAtomicScope(SyncScope::ID SSID, SIAtomicAddrSpace InstrScope) const;
216 
217  /// \return Return a bit set of the address spaces accessed by \p AS.
218  SIAtomicAddrSpace toSIAtomicAddrSpace(unsigned AS) const;
219 
220  /// \returns Info constructed from \p MI, which has at least machine memory
221  /// operand.
222  Optional<SIMemOpInfo> constructFromMIWithMMO(
223  const MachineBasicBlock::iterator &MI) const;
224 
225 public:
226  /// Construct class to support accessing the machine memory operands
227  /// of instructions in the machine function \p MF.
228  SIMemOpAccess(MachineFunction &MF);
229 
230  /// \returns Load info if \p MI is a load operation, "None" otherwise.
232  const MachineBasicBlock::iterator &MI) const;
233 
234  /// \returns Store info if \p MI is a store operation, "None" otherwise.
235  Optional<SIMemOpInfo> getStoreInfo(
236  const MachineBasicBlock::iterator &MI) const;
237 
238  /// \returns Atomic fence info if \p MI is an atomic fence operation,
239  /// "None" otherwise.
240  Optional<SIMemOpInfo> getAtomicFenceInfo(
241  const MachineBasicBlock::iterator &MI) const;
242 
243  /// \returns Atomic cmpxchg/rmw info if \p MI is an atomic cmpxchg or
244  /// rmw operation, "None" otherwise.
245  Optional<SIMemOpInfo> getAtomicCmpxchgOrRmwInfo(
246  const MachineBasicBlock::iterator &MI) const;
247 };
248 
249 class SICacheControl {
250 protected:
251 
252  /// Instruction info.
253  const SIInstrInfo *TII = nullptr;
254 
255  IsaVersion IV;
256 
257  SICacheControl(const GCNSubtarget &ST);
258 
259 public:
260 
261  /// Create a cache control for the subtarget \p ST.
262  static std::unique_ptr<SICacheControl> create(const GCNSubtarget &ST);
263 
264  /// Update \p MI memory load instruction to bypass any caches up to
265  /// the \p Scope memory scope for address spaces \p
266  /// AddrSpace. Return true iff the instruction was modified.
267  virtual bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
268  SIAtomicScope Scope,
269  SIAtomicAddrSpace AddrSpace) const = 0;
270 
271  /// Update \p MI memory instruction to indicate it is
272  /// nontemporal. Return true iff the instruction was modified.
273  virtual bool enableNonTemporal(const MachineBasicBlock::iterator &MI)
274  const = 0;
275 
276  /// Inserts any necessary instructions at position \p Pos relative
277  /// to instruction \p MI to ensure any caches associated with
278  /// address spaces \p AddrSpace for memory scopes up to memory scope
279  /// \p Scope are invalidated. Returns true iff any instructions
280  /// inserted.
281  virtual bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
282  SIAtomicScope Scope,
283  SIAtomicAddrSpace AddrSpace,
284  Position Pos) const = 0;
285 
286  /// Inserts any necessary instructions at position \p Pos relative
287  /// to instruction \p MI to ensure memory instructions of kind \p Op
288  /// associated with address spaces \p AddrSpace have completed as
289  /// observed by other memory instructions executing in memory scope
290  /// \p Scope. \p IsCrossAddrSpaceOrdering indicates if the memory
291  /// ordering is between address spaces. Returns true iff any
292  /// instructions inserted.
293  virtual bool insertWait(MachineBasicBlock::iterator &MI,
294  SIAtomicScope Scope,
295  SIAtomicAddrSpace AddrSpace,
296  SIMemOp Op,
297  bool IsCrossAddrSpaceOrdering,
298  Position Pos) const = 0;
299 
300  /// Virtual destructor to allow derivations to be deleted.
301  virtual ~SICacheControl() = default;
302 
303 };
304 
305 class SIGfx6CacheControl : public SICacheControl {
306 protected:
307 
308  /// Sets GLC bit to "true" if present in \p MI. Returns true if \p MI
309  /// is modified, false otherwise.
310  bool enableGLCBit(const MachineBasicBlock::iterator &MI) const {
311  return enableNamedBit<AMDGPU::OpName::glc>(MI);
312  }
313 
314  /// Sets SLC bit to "true" if present in \p MI. Returns true if \p MI
315  /// is modified, false otherwise.
316  bool enableSLCBit(const MachineBasicBlock::iterator &MI) const {
317  return enableNamedBit<AMDGPU::OpName::slc>(MI);
318  }
319 
320 public:
321 
322  SIGfx6CacheControl(const GCNSubtarget &ST) : SICacheControl(ST) {};
323 
324  bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
325  SIAtomicScope Scope,
326  SIAtomicAddrSpace AddrSpace) const override;
327 
328  bool enableNonTemporal(const MachineBasicBlock::iterator &MI) const override;
329 
330  bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
331  SIAtomicScope Scope,
332  SIAtomicAddrSpace AddrSpace,
333  Position Pos) const override;
334 
335  bool insertWait(MachineBasicBlock::iterator &MI,
336  SIAtomicScope Scope,
337  SIAtomicAddrSpace AddrSpace,
338  SIMemOp Op,
339  bool IsCrossAddrSpaceOrdering,
340  Position Pos) const override;
341 };
342 
343 class SIGfx7CacheControl : public SIGfx6CacheControl {
344 public:
345 
346  SIGfx7CacheControl(const GCNSubtarget &ST) : SIGfx6CacheControl(ST) {};
347 
348  bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
349  SIAtomicScope Scope,
350  SIAtomicAddrSpace AddrSpace,
351  Position Pos) const override;
352 
353 };
354 
355 class SIMemoryLegalizer final : public MachineFunctionPass {
356 private:
357 
358  /// Cache Control.
359  std::unique_ptr<SICacheControl> CC = nullptr;
360 
361  /// List of atomic pseudo instructions.
362  std::list<MachineBasicBlock::iterator> AtomicPseudoMIs;
363 
364  /// Return true iff instruction \p MI is a atomic instruction that
365  /// returns a result.
366  bool isAtomicRet(const MachineInstr &MI) const {
367  return AMDGPU::getAtomicNoRetOp(MI.getOpcode()) != -1;
368  }
369 
370  /// Removes all processed atomic pseudo instructions from the current
371  /// function. Returns true if current function is modified, false otherwise.
372  bool removeAtomicPseudoMIs();
373 
374  /// Expands load operation \p MI. Returns true if instructions are
375  /// added/deleted or \p MI is modified, false otherwise.
376  bool expandLoad(const SIMemOpInfo &MOI,
378  /// Expands store operation \p MI. Returns true if instructions are
379  /// added/deleted or \p MI is modified, false otherwise.
380  bool expandStore(const SIMemOpInfo &MOI,
382  /// Expands atomic fence operation \p MI. Returns true if
383  /// instructions are added/deleted or \p MI is modified, false otherwise.
384  bool expandAtomicFence(const SIMemOpInfo &MOI,
386  /// Expands atomic cmpxchg or rmw operation \p MI. Returns true if
387  /// instructions are added/deleted or \p MI is modified, false otherwise.
388  bool expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
390 
391 public:
392  static char ID;
393 
394  SIMemoryLegalizer() : MachineFunctionPass(ID) {}
395 
396  void getAnalysisUsage(AnalysisUsage &AU) const override {
397  AU.setPreservesCFG();
399  }
400 
401  StringRef getPassName() const override {
402  return PASS_NAME;
403  }
404 
405  bool runOnMachineFunction(MachineFunction &MF) override;
406 };
407 
408 } // end namespace anonymous
409 
410 void SIMemOpAccess::reportUnsupported(const MachineBasicBlock::iterator &MI,
411  const char *Msg) const {
412  const Function &Func = MI->getParent()->getParent()->getFunction();
413  DiagnosticInfoUnsupported Diag(Func, Msg, MI->getDebugLoc());
414  Func.getContext().diagnose(Diag);
415 }
416 
418 SIMemOpAccess::toSIAtomicScope(SyncScope::ID SSID,
419  SIAtomicAddrSpace InstrScope) const {
420  if (SSID == SyncScope::System)
421  return std::make_tuple(SIAtomicScope::SYSTEM,
422  SIAtomicAddrSpace::ATOMIC,
423  true);
424  if (SSID == MMI->getAgentSSID())
425  return std::make_tuple(SIAtomicScope::AGENT,
426  SIAtomicAddrSpace::ATOMIC,
427  true);
428  if (SSID == MMI->getWorkgroupSSID())
429  return std::make_tuple(SIAtomicScope::WORKGROUP,
430  SIAtomicAddrSpace::ATOMIC,
431  true);
432  if (SSID == MMI->getWavefrontSSID())
433  return std::make_tuple(SIAtomicScope::WAVEFRONT,
434  SIAtomicAddrSpace::ATOMIC,
435  true);
436  if (SSID == SyncScope::SingleThread)
437  return std::make_tuple(SIAtomicScope::SINGLETHREAD,
438  SIAtomicAddrSpace::ATOMIC,
439  true);
440  if (SSID == MMI->getSystemOneAddressSpaceSSID())
441  return std::make_tuple(SIAtomicScope::SYSTEM,
442  SIAtomicAddrSpace::ATOMIC & InstrScope,
443  false);
444  if (SSID == MMI->getAgentOneAddressSpaceSSID())
445  return std::make_tuple(SIAtomicScope::AGENT,
446  SIAtomicAddrSpace::ATOMIC & InstrScope,
447  false);
448  if (SSID == MMI->getWorkgroupOneAddressSpaceSSID())
449  return std::make_tuple(SIAtomicScope::WORKGROUP,
450  SIAtomicAddrSpace::ATOMIC & InstrScope,
451  false);
452  if (SSID == MMI->getWavefrontOneAddressSpaceSSID())
453  return std::make_tuple(SIAtomicScope::WAVEFRONT,
454  SIAtomicAddrSpace::ATOMIC & InstrScope,
455  false);
456  if (SSID == MMI->getSingleThreadOneAddressSpaceSSID())
457  return std::make_tuple(SIAtomicScope::SINGLETHREAD,
458  SIAtomicAddrSpace::ATOMIC & InstrScope,
459  false);
460  return None;
461 }
462 
463 SIAtomicAddrSpace SIMemOpAccess::toSIAtomicAddrSpace(unsigned AS) const {
464  if (AS == AMDGPUAS::FLAT_ADDRESS)
466  if (AS == AMDGPUAS::GLOBAL_ADDRESS)
468  if (AS == AMDGPUAS::LOCAL_ADDRESS)
469  return SIAtomicAddrSpace::LDS;
470  if (AS == AMDGPUAS::PRIVATE_ADDRESS)
472  if (AS == AMDGPUAS::REGION_ADDRESS)
473  return SIAtomicAddrSpace::GDS;
474 
475  return SIAtomicAddrSpace::OTHER;
476 }
477 
478 SIMemOpAccess::SIMemOpAccess(MachineFunction &MF) {
480 }
481 
482 Optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
483  const MachineBasicBlock::iterator &MI) const {
484  assert(MI->getNumMemOperands() > 0);
485 
488  AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic;
490  bool IsNonTemporal = true;
491 
492  // Validator should check whether or not MMOs cover the entire set of
493  // locations accessed by the memory instruction.
494  for (const auto &MMO : MI->memoperands()) {
495  IsNonTemporal &= MMO->isNonTemporal();
496  InstrAddrSpace |=
497  toSIAtomicAddrSpace(MMO->getPointerInfo().getAddrSpace());
498  AtomicOrdering OpOrdering = MMO->getOrdering();
499  if (OpOrdering != AtomicOrdering::NotAtomic) {
500  const auto &IsSyncScopeInclusion =
501  MMI->isSyncScopeInclusion(SSID, MMO->getSyncScopeID());
502  if (!IsSyncScopeInclusion) {
503  reportUnsupported(MI,
504  "Unsupported non-inclusive atomic synchronization scope");
505  return None;
506  }
507 
508  SSID = IsSyncScopeInclusion.getValue() ? SSID : MMO->getSyncScopeID();
509  Ordering =
510  isStrongerThan(Ordering, OpOrdering) ?
511  Ordering : MMO->getOrdering();
512  assert(MMO->getFailureOrdering() != AtomicOrdering::Release &&
513  MMO->getFailureOrdering() != AtomicOrdering::AcquireRelease);
514  FailureOrdering =
515  isStrongerThan(FailureOrdering, MMO->getFailureOrdering()) ?
516  FailureOrdering : MMO->getFailureOrdering();
517  }
518  }
519 
521  SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
522  bool IsCrossAddressSpaceOrdering = false;
523  if (Ordering != AtomicOrdering::NotAtomic) {
524  auto ScopeOrNone = toSIAtomicScope(SSID, InstrAddrSpace);
525  if (!ScopeOrNone) {
526  reportUnsupported(MI, "Unsupported atomic synchronization scope");
527  return None;
528  }
529  std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
530  ScopeOrNone.getValue();
531  if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
532  ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace)) {
533  reportUnsupported(MI, "Unsupported atomic address space");
534  return None;
535  }
536  }
537  return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, InstrAddrSpace,
538  IsCrossAddressSpaceOrdering, FailureOrdering, IsNonTemporal);
539 }
540 
542  const MachineBasicBlock::iterator &MI) const {
543  assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
544 
545  if (!(MI->mayLoad() && !MI->mayStore()))
546  return None;
547 
548  // Be conservative if there are no memory operands.
549  if (MI->getNumMemOperands() == 0)
550  return SIMemOpInfo();
551 
552  return constructFromMIWithMMO(MI);
553 }
554 
555 Optional<SIMemOpInfo> SIMemOpAccess::getStoreInfo(
556  const MachineBasicBlock::iterator &MI) const {
557  assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
558 
559  if (!(!MI->mayLoad() && MI->mayStore()))
560  return None;
561 
562  // Be conservative if there are no memory operands.
563  if (MI->getNumMemOperands() == 0)
564  return SIMemOpInfo();
565 
566  return constructFromMIWithMMO(MI);
567 }
568 
569 Optional<SIMemOpInfo> SIMemOpAccess::getAtomicFenceInfo(
570  const MachineBasicBlock::iterator &MI) const {
571  assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
572 
573  if (MI->getOpcode() != AMDGPU::ATOMIC_FENCE)
574  return None;
575 
576  AtomicOrdering Ordering =
577  static_cast<AtomicOrdering>(MI->getOperand(0).getImm());
578 
579  SyncScope::ID SSID = static_cast<SyncScope::ID>(MI->getOperand(1).getImm());
580  auto ScopeOrNone = toSIAtomicScope(SSID, SIAtomicAddrSpace::ATOMIC);
581  if (!ScopeOrNone) {
582  reportUnsupported(MI, "Unsupported atomic synchronization scope");
583  return None;
584  }
585 
587  SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
588  bool IsCrossAddressSpaceOrdering = false;
589  std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
590  ScopeOrNone.getValue();
591 
592  if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
593  ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace)) {
594  reportUnsupported(MI, "Unsupported atomic address space");
595  return None;
596  }
597 
598  return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, SIAtomicAddrSpace::ATOMIC,
599  IsCrossAddressSpaceOrdering);
600 }
601 
602 Optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
603  const MachineBasicBlock::iterator &MI) const {
604  assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
605 
606  if (!(MI->mayLoad() && MI->mayStore()))
607  return None;
608 
609  // Be conservative if there are no memory operands.
610  if (MI->getNumMemOperands() == 0)
611  return SIMemOpInfo();
612 
613  return constructFromMIWithMMO(MI);
614 }
615 
616 SICacheControl::SICacheControl(const GCNSubtarget &ST) {
617  TII = ST.getInstrInfo();
618  IV = getIsaVersion(ST.getCPU());
619 }
620 
621 /* static */
622 std::unique_ptr<SICacheControl> SICacheControl::create(const GCNSubtarget &ST) {
623  GCNSubtarget::Generation Generation = ST.getGeneration();
624  if (Generation <= AMDGPUSubtarget::SOUTHERN_ISLANDS)
625  return make_unique<SIGfx6CacheControl>(ST);
626  return make_unique<SIGfx7CacheControl>(ST);
627 }
628 
629 bool SIGfx6CacheControl::enableLoadCacheBypass(
630  const MachineBasicBlock::iterator &MI,
631  SIAtomicScope Scope,
632  SIAtomicAddrSpace AddrSpace) const {
633  assert(MI->mayLoad() && !MI->mayStore());
634  bool Changed = false;
635 
636  if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
637  /// TODO: Do not set glc for rmw atomic operations as they
638  /// implicitly bypass the L1 cache.
639 
640  switch (Scope) {
641  case SIAtomicScope::SYSTEM:
642  case SIAtomicScope::AGENT:
643  Changed |= enableGLCBit(MI);
644  break;
645  case SIAtomicScope::WORKGROUP:
646  case SIAtomicScope::WAVEFRONT:
647  case SIAtomicScope::SINGLETHREAD:
648  // No cache to bypass.
649  break;
650  default:
651  llvm_unreachable("Unsupported synchronization scope");
652  }
653  }
654 
655  /// The scratch address space does not need the global memory caches
656  /// to be bypassed as all memory operations by the same thread are
657  /// sequentially consistent, and no other thread can access scratch
658  /// memory.
659 
660  /// Other address spaces do not hava a cache.
661 
662  return Changed;
663 }
664 
665 bool SIGfx6CacheControl::enableNonTemporal(
666  const MachineBasicBlock::iterator &MI) const {
667  assert(MI->mayLoad() ^ MI->mayStore());
668  bool Changed = false;
669 
670  /// TODO: Do not enableGLCBit if rmw atomic.
671  Changed |= enableGLCBit(MI);
672  Changed |= enableSLCBit(MI);
673 
674  return Changed;
675 }
676 
677 bool SIGfx6CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
678  SIAtomicScope Scope,
679  SIAtomicAddrSpace AddrSpace,
680  Position Pos) const {
681  bool Changed = false;
682 
683  MachineBasicBlock &MBB = *MI->getParent();
684  DebugLoc DL = MI->getDebugLoc();
685 
686  if (Pos == Position::AFTER)
687  ++MI;
688 
689  if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
690  switch (Scope) {
691  case SIAtomicScope::SYSTEM:
692  case SIAtomicScope::AGENT:
693  BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBINVL1));
694  Changed = true;
695  break;
696  case SIAtomicScope::WORKGROUP:
697  case SIAtomicScope::WAVEFRONT:
698  case SIAtomicScope::SINGLETHREAD:
699  // No cache to invalidate.
700  break;
701  default:
702  llvm_unreachable("Unsupported synchronization scope");
703  }
704  }
705 
706  /// The scratch address space does not need the global memory cache
707  /// to be flushed as all memory operations by the same thread are
708  /// sequentially consistent, and no other thread can access scratch
709  /// memory.
710 
711  /// Other address spaces do not hava a cache.
712 
713  if (Pos == Position::AFTER)
714  --MI;
715 
716  return Changed;
717 }
718 
719 bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
720  SIAtomicScope Scope,
721  SIAtomicAddrSpace AddrSpace,
722  SIMemOp Op,
723  bool IsCrossAddrSpaceOrdering,
724  Position Pos) const {
725  bool Changed = false;
726 
727  MachineBasicBlock &MBB = *MI->getParent();
728  DebugLoc DL = MI->getDebugLoc();
729 
730  if (Pos == Position::AFTER)
731  ++MI;
732 
733  bool VMCnt = false;
734  bool LGKMCnt = false;
735 
736  if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
737  switch (Scope) {
738  case SIAtomicScope::SYSTEM:
739  case SIAtomicScope::AGENT:
740  VMCnt |= true;
741  break;
742  case SIAtomicScope::WORKGROUP:
743  case SIAtomicScope::WAVEFRONT:
744  case SIAtomicScope::SINGLETHREAD:
745  // The L1 cache keeps all memory operations in order for
746  // wavefronts in the same work-group.
747  break;
748  default:
749  llvm_unreachable("Unsupported synchronization scope");
750  }
751  }
752 
753  if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
754  switch (Scope) {
755  case SIAtomicScope::SYSTEM:
756  case SIAtomicScope::AGENT:
757  case SIAtomicScope::WORKGROUP:
758  // If no cross address space ordering then an LDS waitcnt is not
759  // needed as LDS operations for all waves are executed in a
760  // total global ordering as observed by all waves. Required if
761  // also synchronizing with global/GDS memory as LDS operations
762  // could be reordered with respect to later global/GDS memory
763  // operations of the same wave.
764  LGKMCnt |= IsCrossAddrSpaceOrdering;
765  break;
766  case SIAtomicScope::WAVEFRONT:
767  case SIAtomicScope::SINGLETHREAD:
768  // The LDS keeps all memory operations in order for
769  // the same wavesfront.
770  break;
771  default:
772  llvm_unreachable("Unsupported synchronization scope");
773  }
774  }
775 
776  if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
777  switch (Scope) {
778  case SIAtomicScope::SYSTEM:
779  case SIAtomicScope::AGENT:
780  // If no cross address space ordering then an GDS waitcnt is not
781  // needed as GDS operations for all waves are executed in a
782  // total global ordering as observed by all waves. Required if
783  // also synchronizing with global/LDS memory as GDS operations
784  // could be reordered with respect to later global/LDS memory
785  // operations of the same wave.
786  LGKMCnt |= IsCrossAddrSpaceOrdering;
787  break;
788  case SIAtomicScope::WORKGROUP:
789  case SIAtomicScope::WAVEFRONT:
790  case SIAtomicScope::SINGLETHREAD:
791  // The GDS keeps all memory operations in order for
792  // the same work-group.
793  break;
794  default:
795  llvm_unreachable("Unsupported synchronization scope");
796  }
797  }
798 
799  if (VMCnt || LGKMCnt) {
800  unsigned WaitCntImmediate =
802  VMCnt ? 0 : getVmcntBitMask(IV),
803  getExpcntBitMask(IV),
804  LGKMCnt ? 0 : getLgkmcntBitMask(IV));
805  BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(WaitCntImmediate);
806  Changed = true;
807  }
808 
809  if (Pos == Position::AFTER)
810  --MI;
811 
812  return Changed;
813 }
814 
815 bool SIGfx7CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
816  SIAtomicScope Scope,
817  SIAtomicAddrSpace AddrSpace,
818  Position Pos) const {
819  bool Changed = false;
820 
821  MachineBasicBlock &MBB = *MI->getParent();
822  DebugLoc DL = MI->getDebugLoc();
823 
824  const GCNSubtarget &STM = MBB.getParent()->getSubtarget<GCNSubtarget>();
825 
826  const unsigned Flush = STM.isAmdPalOS() || STM.isMesa3DOS()
827  ? AMDGPU::BUFFER_WBINVL1
828  : AMDGPU::BUFFER_WBINVL1_VOL;
829 
830  if (Pos == Position::AFTER)
831  ++MI;
832 
833  if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
834  switch (Scope) {
835  case SIAtomicScope::SYSTEM:
836  case SIAtomicScope::AGENT:
837  BuildMI(MBB, MI, DL, TII->get(Flush));
838  Changed = true;
839  break;
840  case SIAtomicScope::WORKGROUP:
841  case SIAtomicScope::WAVEFRONT:
842  case SIAtomicScope::SINGLETHREAD:
843  // No cache to invalidate.
844  break;
845  default:
846  llvm_unreachable("Unsupported synchronization scope");
847  }
848  }
849 
850  /// The scratch address space does not need the global memory cache
851  /// to be flushed as all memory operations by the same thread are
852  /// sequentially consistent, and no other thread can access scratch
853  /// memory.
854 
855  /// Other address spaces do not hava a cache.
856 
857  if (Pos == Position::AFTER)
858  --MI;
859 
860  return Changed;
861 }
862 
863 bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
864  if (AtomicPseudoMIs.empty())
865  return false;
866 
867  for (auto &MI : AtomicPseudoMIs)
868  MI->eraseFromParent();
869 
870  AtomicPseudoMIs.clear();
871  return true;
872 }
873 
874 bool SIMemoryLegalizer::expandLoad(const SIMemOpInfo &MOI,
876  assert(MI->mayLoad() && !MI->mayStore());
877 
878  bool Changed = false;
879 
880  if (MOI.isAtomic()) {
881  if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
882  MOI.getOrdering() == AtomicOrdering::Acquire ||
883  MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
884  Changed |= CC->enableLoadCacheBypass(MI, MOI.getScope(),
885  MOI.getOrderingAddrSpace());
886  }
887 
888  if (MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
889  Changed |= CC->insertWait(MI, MOI.getScope(),
890  MOI.getOrderingAddrSpace(),
892  MOI.getIsCrossAddressSpaceOrdering(),
893  Position::BEFORE);
894 
895  if (MOI.getOrdering() == AtomicOrdering::Acquire ||
896  MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
897  Changed |= CC->insertWait(MI, MOI.getScope(),
898  MOI.getInstrAddrSpace(),
900  MOI.getIsCrossAddressSpaceOrdering(),
901  Position::AFTER);
902  Changed |= CC->insertCacheInvalidate(MI, MOI.getScope(),
903  MOI.getOrderingAddrSpace(),
904  Position::AFTER);
905  }
906 
907  return Changed;
908  }
909 
910  // Atomic instructions do not have the nontemporal attribute.
911  if (MOI.isNonTemporal()) {
912  Changed |= CC->enableNonTemporal(MI);
913  return Changed;
914  }
915 
916  return Changed;
917 }
918 
919 bool SIMemoryLegalizer::expandStore(const SIMemOpInfo &MOI,
921  assert(!MI->mayLoad() && MI->mayStore());
922 
923  bool Changed = false;
924 
925  if (MOI.isAtomic()) {
926  if (MOI.getOrdering() == AtomicOrdering::Release ||
927  MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
928  Changed |= CC->insertWait(MI, MOI.getScope(),
929  MOI.getOrderingAddrSpace(),
931  MOI.getIsCrossAddressSpaceOrdering(),
932  Position::BEFORE);
933 
934  return Changed;
935  }
936 
937  // Atomic instructions do not have the nontemporal attribute.
938  if (MOI.isNonTemporal()) {
939  Changed |= CC->enableNonTemporal(MI);
940  return Changed;
941  }
942 
943  return Changed;
944 }
945 
946 bool SIMemoryLegalizer::expandAtomicFence(const SIMemOpInfo &MOI,
948  assert(MI->getOpcode() == AMDGPU::ATOMIC_FENCE);
949 
950  AtomicPseudoMIs.push_back(MI);
951  bool Changed = false;
952 
953  if (MOI.isAtomic()) {
954  if (MOI.getOrdering() == AtomicOrdering::Acquire ||
955  MOI.getOrdering() == AtomicOrdering::Release ||
956  MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
957  MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
958  /// TODO: This relies on a barrier always generating a waitcnt
959  /// for LDS to ensure it is not reordered with the completion of
960  /// the proceeding LDS operations. If barrier had a memory
961  /// ordering and memory scope, then library does not need to
962  /// generate a fence. Could add support in this file for
963  /// barrier. SIInsertWaitcnt.cpp could then stop unconditionally
964  /// adding waitcnt before a S_BARRIER.
965  Changed |= CC->insertWait(MI, MOI.getScope(),
966  MOI.getOrderingAddrSpace(),
968  MOI.getIsCrossAddressSpaceOrdering(),
969  Position::BEFORE);
970 
971  if (MOI.getOrdering() == AtomicOrdering::Acquire ||
972  MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
973  MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
974  Changed |= CC->insertCacheInvalidate(MI, MOI.getScope(),
975  MOI.getOrderingAddrSpace(),
976  Position::BEFORE);
977 
978  return Changed;
979  }
980 
981  return Changed;
982 }
983 
984 bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
986  assert(MI->mayLoad() && MI->mayStore());
987 
988  bool Changed = false;
989 
990  if (MOI.isAtomic()) {
991  if (MOI.getOrdering() == AtomicOrdering::Release ||
992  MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
993  MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
994  MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent)
995  Changed |= CC->insertWait(MI, MOI.getScope(),
996  MOI.getOrderingAddrSpace(),
998  MOI.getIsCrossAddressSpaceOrdering(),
999  Position::BEFORE);
1000 
1001  if (MOI.getOrdering() == AtomicOrdering::Acquire ||
1002  MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
1003  MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
1004  MOI.getFailureOrdering() == AtomicOrdering::Acquire ||
1005  MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) {
1006  Changed |= CC->insertWait(MI, MOI.getScope(),
1007  MOI.getOrderingAddrSpace(),
1008  isAtomicRet(*MI) ? SIMemOp::LOAD :
1010  MOI.getIsCrossAddressSpaceOrdering(),
1011  Position::AFTER);
1012  Changed |= CC->insertCacheInvalidate(MI, MOI.getScope(),
1013  MOI.getOrderingAddrSpace(),
1014  Position::AFTER);
1015  }
1016 
1017  return Changed;
1018  }
1019 
1020  return Changed;
1021 }
1022 
1023 bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) {
1024  bool Changed = false;
1025 
1026  SIMemOpAccess MOA(MF);
1027  CC = SICacheControl::create(MF.getSubtarget<GCNSubtarget>());
1028 
1029  for (auto &MBB : MF) {
1030  for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) {
1031  if (!(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic))
1032  continue;
1033 
1034  if (const auto &MOI = MOA.getLoadInfo(MI))
1035  Changed |= expandLoad(MOI.getValue(), MI);
1036  else if (const auto &MOI = MOA.getStoreInfo(MI))
1037  Changed |= expandStore(MOI.getValue(), MI);
1038  else if (const auto &MOI = MOA.getAtomicFenceInfo(MI))
1039  Changed |= expandAtomicFence(MOI.getValue(), MI);
1040  else if (const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(MI))
1041  Changed |= expandAtomicCmpxchgOrRmw(MOI.getValue(), MI);
1042  }
1043  }
1044 
1045  Changed |= removeAtomicPseudoMIs();
1046  return Changed;
1047 }
1048 
1049 INITIALIZE_PASS(SIMemoryLegalizer, DEBUG_TYPE, PASS_NAME, false, false)
1050 
1051 char SIMemoryLegalizer::ID = 0;
1053 
1055  return new SIMemoryLegalizer();
1056 }
const NoneType None
Definition: None.h:23
#define LLVM_MARK_AS_BITMASK_ENUM(LargestValue)
LLVM_MARK_AS_BITMASK_ENUM lets you opt in an individual enum type so you can perform bitwise operatio...
Definition: BitmaskEnum.h:41
Diagnostic information for unsupported feature in backend.
AMDGPU specific subclass of TargetSubtarget.
Atomic ordering constants.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
unsigned getExpcntBitMask(const IsaVersion &Version)
Address space for flat memory.
Definition: AMDGPU.h:250
Instruction set architecture version.
Definition: TargetParser.h:131
static Optional< LoadInfo > getLoadInfo(const MachineInstr &MI)
Address space for private memory.
Definition: AMDGPU.h:256
const SIInstrInfo * getInstrInfo() const override
A debug info location.
Definition: DebugLoc.h:33
MachineModuleInfo & getMMI() const
LLVM_READONLY int getAtomicNoRetOp(uint16_t Opcode)
Synchronized with respect to all concurrently executing threads.
Definition: LLVMContext.h:54
FunctionPass * createSIMemoryLegalizerPass()
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:785
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
bool isStrongerThan(AtomicOrdering ao, AtomicOrdering other)
Returns true if ao is stronger than other as defined by the AtomicOrdering lattice, which is based on C++&#39;s definition.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:408
Position
Position to insert a new instruction relative to an existing instruction.
AtomicOrdering
Atomic ordering for LLVM&#39;s memory model.
unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:251
Represent the analysis usage information of a pass.
SIAtomicScope
The atomic synchronization scopes supported by the AMDGPU target.
AMDGPU Machine Module Info.
void setImm(int64_t immVal)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
Generation getGeneration() const
Ty & getObjFileInfo()
Keep track of various per-function pieces of information for backends that would like to do so...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:192
static bool isAtomic(Instruction *I)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:33
IsaVersion getIsaVersion(StringRef GPU)
SIAtomicAddrSpace
The distinct address spaces supported by the AMDGPU target for atomic memory operation.
MachineOperand class - Representation of each machine instruction operand.
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:51
char & SIMemoryLegalizerID
#define DEBUG_TYPE
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:301
int64_t getImm() const
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:63
SIMemOp
Memory operation flags. Can be ORed together.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Interface definition for SIInstrInfo.
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:615
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Address space for region memory. (GDS)
Definition: AMDGPU.h:252
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
unsigned getLgkmcntBitMask(const IsaVersion &Version)
#define PASS_NAME
Address space for local memory.
Definition: AMDGPU.h:255
unsigned getVmcntBitMask(const IsaVersion &Version)