LLVM  10.0.0svn
LSUnit.h
Go to the documentation of this file.
1 //===------------------------- LSUnit.h --------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// A Load/Store unit class that models load/store queues and that implements
11 /// a simple weak memory consistency model.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_MCA_LSUNIT_H
16 #define LLVM_MCA_LSUNIT_H
17 
18 #include "llvm/ADT/DenseMap.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/MC/MCSchedule.h"
22 #include "llvm/MCA/Instruction.h"
23 
24 namespace llvm {
25 namespace mca {
26 
27 class Scheduler;
28 
29 /// A node of a memory dependency graph. A MemoryGroup describes a set of
30 /// instructions with same memory dependencies.
31 ///
32 /// By construction, instructions of a MemoryGroup don't depend on each other.
33 /// At dispatch stage, instructions are mapped by the LSUnit to MemoryGroups.
34 /// A Memory group identifier is then stored as a "token" in field
35 /// Instruction::LSUTokenID of each dispatched instructions. That token is used
36 /// internally by the LSUnit to track memory dependencies.
37 class MemoryGroup {
38  unsigned NumPredecessors;
39  unsigned NumExecutingPredecessors;
40  unsigned NumExecutedPredecessors;
41 
42  unsigned NumInstructions;
43  unsigned NumExecuting;
44  unsigned NumExecuted;
46 
47  CriticalDependency CriticalPredecessor;
48  InstRef CriticalMemoryInstruction;
49 
50  MemoryGroup(const MemoryGroup &) = delete;
51  MemoryGroup &operator=(const MemoryGroup &) = delete;
52 
53 public:
55  : NumPredecessors(0), NumExecutingPredecessors(0),
56  NumExecutedPredecessors(0), NumInstructions(0), NumExecuting(0),
57  NumExecuted(0), CriticalPredecessor(), CriticalMemoryInstruction() {}
58  MemoryGroup(MemoryGroup &&) = default;
59 
60  ArrayRef<MemoryGroup *> getSuccessors() const { return Succ; }
61  unsigned getNumSuccessors() const { return Succ.size(); }
62  unsigned getNumPredecessors() const { return NumPredecessors; }
63  unsigned getNumExecutingPredecessors() const {
64  return NumExecutingPredecessors;
65  }
66  unsigned getNumExecutedPredecessors() const {
67  return NumExecutedPredecessors;
68  }
69  unsigned getNumInstructions() const { return NumInstructions; }
70  unsigned getNumExecuting() const { return NumExecuting; }
71  unsigned getNumExecuted() const { return NumExecuted; }
72 
74  return CriticalMemoryInstruction;
75  }
77  return CriticalPredecessor;
78  }
79 
80  void addSuccessor(MemoryGroup *Group) {
81  Group->NumPredecessors++;
82  assert(!isExecuted() && "Should have been removed!");
83  if (isExecuting())
84  Group->onGroupIssued(CriticalMemoryInstruction);
85  Succ.emplace_back(Group);
86  }
87 
88  bool isWaiting() const {
89  return NumPredecessors >
90  (NumExecutingPredecessors + NumExecutedPredecessors);
91  }
92  bool isPending() const {
93  return NumExecutingPredecessors &&
94  ((NumExecutedPredecessors + NumExecutingPredecessors) ==
95  NumPredecessors);
96  }
97  bool isReady() const { return NumExecutedPredecessors == NumPredecessors; }
98  bool isExecuting() const {
99  return NumExecuting && (NumExecuting == (NumInstructions - NumExecuted));
100  }
101  bool isExecuted() const { return NumInstructions == NumExecuted; }
102 
103  void onGroupIssued(const InstRef &IR) {
104  assert(!isReady() && "Unexpected group-start event!");
105  NumExecutingPredecessors++;
106 
107  unsigned Cycles = IR.getInstruction()->getCyclesLeft();
108  if (CriticalPredecessor.Cycles < Cycles) {
109  CriticalPredecessor.IID = IR.getSourceIndex();
110  CriticalPredecessor.Cycles = Cycles;
111  }
112  }
113 
115  assert(!isReady() && "Inconsistent state found!");
116  NumExecutingPredecessors--;
117  NumExecutedPredecessors++;
118  }
119 
121  assert(!isExecuting() && "Invalid internal state!");
122  ++NumExecuting;
123 
124  // update the CriticalMemDep.
125  const Instruction &IS = *IR.getInstruction();
126  if ((bool)CriticalMemoryInstruction) {
127  const Instruction &OtherIS = *CriticalMemoryInstruction.getInstruction();
128  if (OtherIS.getCyclesLeft() < IS.getCyclesLeft())
129  CriticalMemoryInstruction = IR;
130  } else {
131  CriticalMemoryInstruction = IR;
132  }
133 
134  if (!isExecuting())
135  return;
136 
137  // Notify successors that this group started execution.
138  for (MemoryGroup *MG : Succ)
139  MG->onGroupIssued(CriticalMemoryInstruction);
140  }
141 
143  assert(isReady() && !isExecuted() && "Invalid internal state!");
144  --NumExecuting;
145  ++NumExecuted;
146 
147  if (!isExecuted())
148  return;
149 
150  // Notify successors that this group has finished execution.
151  for (MemoryGroup *MG : Succ)
152  MG->onGroupExecuted();
153  }
154 
155  void addInstruction() {
156  assert(!getNumSuccessors() && "Cannot add instructions to this group!");
157  ++NumInstructions;
158  }
159 
160  void cycleEvent() {
161  if (isWaiting() && CriticalPredecessor.Cycles)
162  CriticalPredecessor.Cycles--;
163  }
164 };
165 
166 /// Abstract base interface for LS (load/store) units in llvm-mca.
167 class LSUnitBase : public HardwareUnit {
168  /// Load queue size.
169  ///
170  /// A value of zero for this field means that the load queue is unbounded.
171  /// Processor models can declare the size of a load queue via tablegen (see
172  /// the definition of tablegen class LoadQueue in
173  /// llvm/Target/TargetSchedule.td).
174  unsigned LQSize;
175 
176  /// Load queue size.
177  ///
178  /// A value of zero for this field means that the store queue is unbounded.
179  /// Processor models can declare the size of a store queue via tablegen (see
180  /// the definition of tablegen class StoreQueue in
181  /// llvm/Target/TargetSchedule.td).
182  unsigned SQSize;
183 
184  unsigned UsedLQEntries;
185  unsigned UsedSQEntries;
186 
187  /// True if loads don't alias with stores.
188  ///
189  /// By default, the LS unit assumes that loads and stores don't alias with
190  /// eachother. If this field is set to false, then loads are always assumed to
191  /// alias with stores.
192  const bool NoAlias;
193 
194  /// Used to map group identifiers to MemoryGroups.
196  unsigned NextGroupID;
197 
198 public:
199  LSUnitBase(const MCSchedModel &SM, unsigned LoadQueueSize,
200  unsigned StoreQueueSize, bool AssumeNoAlias);
201 
202  virtual ~LSUnitBase();
203 
204  /// Returns the total number of entries in the load queue.
205  unsigned getLoadQueueSize() const { return LQSize; }
206 
207  /// Returns the total number of entries in the store queue.
208  unsigned getStoreQueueSize() const { return SQSize; }
209 
210  unsigned getUsedLQEntries() const { return UsedLQEntries; }
211  unsigned getUsedSQEntries() const { return UsedSQEntries; }
212  unsigned assignLQSlot() { return UsedLQEntries++; }
213  unsigned assignSQSlot() { return UsedSQEntries++; }
214 
215  bool assumeNoAlias() const { return NoAlias; }
216 
217  enum Status {
218  LSU_AVAILABLE = 0,
219  LSU_LQUEUE_FULL, // Load Queue unavailable
220  LSU_SQUEUE_FULL // Store Queue unavailable
221  };
222 
223  /// This method checks the availability of the load/store buffers.
224  ///
225  /// Returns LSU_AVAILABLE if there are enough load/store queue entries to
226  /// accomodate instruction IR. By default, LSU_AVAILABLE is returned if IR is
227  /// not a memory operation.
228  virtual Status isAvailable(const InstRef &IR) const = 0;
229 
230  /// Allocates LS resources for instruction IR.
231  ///
232  /// This method assumes that a previous call to `isAvailable(IR)` succeeded
233  /// with a LSUnitBase::Status value of LSU_AVAILABLE.
234  /// Returns the GroupID associated with this instruction. That value will be
235  /// used to set the LSUTokenID field in class Instruction.
236  virtual unsigned dispatch(const InstRef &IR) = 0;
237 
238  bool isSQEmpty() const { return !UsedSQEntries; }
239  bool isLQEmpty() const { return !UsedLQEntries; }
240  bool isSQFull() const { return SQSize && SQSize == UsedSQEntries; }
241  bool isLQFull() const { return LQSize && LQSize == UsedLQEntries; }
242 
243  bool isValidGroupID(unsigned Index) const {
244  return Index && (Groups.find(Index) != Groups.end());
245  }
246 
247  /// Check if a peviously dispatched instruction IR is now ready for execution.
248  bool isReady(const InstRef &IR) const {
249  unsigned GroupID = IR.getInstruction()->getLSUTokenID();
250  const MemoryGroup &Group = getGroup(GroupID);
251  return Group.isReady();
252  }
253 
254  /// Check if instruction IR only depends on memory instructions that are
255  /// currently executing.
256  bool isPending(const InstRef &IR) const {
257  unsigned GroupID = IR.getInstruction()->getLSUTokenID();
258  const MemoryGroup &Group = getGroup(GroupID);
259  return Group.isPending();
260  }
261 
262  /// Check if instruction IR is still waiting on memory operations, and the
263  /// wait time is still unknown.
264  bool isWaiting(const InstRef &IR) const {
265  unsigned GroupID = IR.getInstruction()->getLSUTokenID();
266  const MemoryGroup &Group = getGroup(GroupID);
267  return Group.isWaiting();
268  }
269 
270  bool hasDependentUsers(const InstRef &IR) const {
271  unsigned GroupID = IR.getInstruction()->getLSUTokenID();
272  const MemoryGroup &Group = getGroup(GroupID);
273  return !Group.isExecuted() && Group.getNumSuccessors();
274  }
275 
276  const MemoryGroup &getGroup(unsigned Index) const {
277  assert(isValidGroupID(Index) && "Group doesn't exist!");
278  return *Groups.find(Index)->second;
279  }
280 
282  assert(isValidGroupID(Index) && "Group doesn't exist!");
283  return *Groups.find(Index)->second;
284  }
285 
286  unsigned createMemoryGroup() {
287  Groups.insert(
288  std::make_pair(NextGroupID, std::make_unique<MemoryGroup>()));
289  return NextGroupID++;
290  }
291 
292  // Instruction executed event handlers.
293  virtual void onInstructionExecuted(const InstRef &IR);
294 
295  virtual void onInstructionIssued(const InstRef &IR) {
296  unsigned GroupID = IR.getInstruction()->getLSUTokenID();
297  Groups[GroupID]->onInstructionIssued(IR);
298  }
299 
300  virtual void cycleEvent();
301 
302 #ifndef NDEBUG
303  void dump() const;
304 #endif
305 };
306 
307 /// Default Load/Store Unit (LS Unit) for simulated processors.
308 ///
309 /// Each load (or store) consumes one entry in the load (or store) queue.
310 ///
311 /// Rules are:
312 /// 1) A younger load is allowed to pass an older load only if there are no
313 /// stores nor barriers in between the two loads.
314 /// 2) An younger store is not allowed to pass an older store.
315 /// 3) A younger store is not allowed to pass an older load.
316 /// 4) A younger load is allowed to pass an older store only if the load does
317 /// not alias with the store.
318 ///
319 /// This class optimistically assumes that loads don't alias store operations.
320 /// Under this assumption, younger loads are always allowed to pass older
321 /// stores (this would only affects rule 4).
322 /// Essentially, this class doesn't perform any sort alias analysis to
323 /// identify aliasing loads and stores.
324 ///
325 /// To enforce aliasing between loads and stores, flag `AssumeNoAlias` must be
326 /// set to `false` by the constructor of LSUnit.
327 ///
328 /// Note that this class doesn't know about the existence of different memory
329 /// types for memory operations (example: write-through, write-combining, etc.).
330 /// Derived classes are responsible for implementing that extra knowledge, and
331 /// provide different sets of rules for loads and stores by overriding method
332 /// `isReady()`.
333 /// To emulate a write-combining memory type, rule 2. must be relaxed in a
334 /// derived class to enable the reordering of non-aliasing store operations.
335 ///
336 /// No assumptions are made by this class on the size of the store buffer. This
337 /// class doesn't know how to identify cases where store-to-load forwarding may
338 /// occur.
339 ///
340 /// LSUnit doesn't attempt to predict whether a load or store hits or misses
341 /// the L1 cache. To be more specific, LSUnit doesn't know anything about
342 /// cache hierarchy and memory types.
343 /// It only knows if an instruction "mayLoad" and/or "mayStore". For loads, the
344 /// scheduling model provides an "optimistic" load-to-use latency (which usually
345 /// matches the load-to-use latency for when there is a hit in the L1D).
346 /// Derived classes may expand this knowledge.
347 ///
348 /// Class MCInstrDesc in LLVM doesn't know about serializing operations, nor
349 /// memory-barrier like instructions.
350 /// LSUnit conservatively assumes that an instruction which `mayLoad` and has
351 /// `unmodeled side effects` behave like a "soft" load-barrier. That means, it
352 /// serializes loads without forcing a flush of the load queue.
353 /// Similarly, instructions that both `mayStore` and have `unmodeled side
354 /// effects` are treated like store barriers. A full memory
355 /// barrier is a 'mayLoad' and 'mayStore' instruction with unmodeled side
356 /// effects. This is obviously inaccurate, but this is the best that we can do
357 /// at the moment.
358 ///
359 /// Each load/store barrier consumes one entry in the load/store queue. A
360 /// load/store barrier enforces ordering of loads/stores:
361 /// - A younger load cannot pass a load barrier.
362 /// - A younger store cannot pass a store barrier.
363 ///
364 /// A younger load has to wait for the memory load barrier to execute.
365 /// A load/store barrier is "executed" when it becomes the oldest entry in
366 /// the load/store queue(s). That also means, all the older loads/stores have
367 /// already been executed.
368 class LSUnit : public LSUnitBase {
369  // This class doesn't know about the latency of a load instruction. So, it
370  // conservatively/pessimistically assumes that the latency of a load opcode
371  // matches the instruction latency.
372  //
373  // FIXME: In the absence of cache misses (i.e. L1I/L1D/iTLB/dTLB hits/misses),
374  // and load/store conflicts, the latency of a load is determined by the depth
375  // of the load pipeline. So, we could use field `LoadLatency` in the
376  // MCSchedModel to model that latency.
377  // Field `LoadLatency` often matches the so-called 'load-to-use' latency from
378  // L1D, and it usually already accounts for any extra latency due to data
379  // forwarding.
380  // When doing throughput analysis, `LoadLatency` is likely to
381  // be a better predictor of load latency than instruction latency. This is
382  // particularly true when simulating code with temporal/spatial locality of
383  // memory accesses.
384  // Using `LoadLatency` (instead of the instruction latency) is also expected
385  // to improve the load queue allocation for long latency instructions with
386  // folded memory operands (See PR39829).
387  //
388  // FIXME: On some processors, load/store operations are split into multiple
389  // uOps. For example, X86 AMD Jaguar natively supports 128-bit data types, but
390  // not 256-bit data types. So, a 256-bit load is effectively split into two
391  // 128-bit loads, and each split load consumes one 'LoadQueue' entry. For
392  // simplicity, this class optimistically assumes that a load instruction only
393  // consumes one entry in the LoadQueue. Similarly, store instructions only
394  // consume a single entry in the StoreQueue.
395  // In future, we should reassess the quality of this design, and consider
396  // alternative approaches that let instructions specify the number of
397  // load/store queue entries which they consume at dispatch stage (See
398  // PR39830).
399  //
400  // An instruction that both 'mayStore' and 'HasUnmodeledSideEffects' is
401  // conservatively treated as a store barrier. It forces older store to be
402  // executed before newer stores are issued.
403  //
404  // An instruction that both 'MayLoad' and 'HasUnmodeledSideEffects' is
405  // conservatively treated as a load barrier. It forces older loads to execute
406  // before newer loads are issued.
407  unsigned CurrentLoadGroupID;
408  unsigned CurrentLoadBarrierGroupID;
409  unsigned CurrentStoreGroupID;
410 
411 public:
412  LSUnit(const MCSchedModel &SM)
413  : LSUnit(SM, /* LQSize */ 0, /* SQSize */ 0, /* NoAlias */ false) {}
414  LSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ)
415  : LSUnit(SM, LQ, SQ, /* NoAlias */ false) {}
416  LSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ, bool AssumeNoAlias)
417  : LSUnitBase(SM, LQ, SQ, AssumeNoAlias), CurrentLoadGroupID(0),
418  CurrentLoadBarrierGroupID(0), CurrentStoreGroupID(0) {}
419 
420  /// Returns LSU_AVAILABLE if there are enough load/store queue entries to
421  /// accomodate instruction IR.
422  Status isAvailable(const InstRef &IR) const override;
423 
424  /// Allocates LS resources for instruction IR.
425  ///
426  /// This method assumes that a previous call to `isAvailable(IR)` succeeded
427  /// returning LSU_AVAILABLE.
428  ///
429  /// Rules are:
430  /// By default, rules are:
431  /// 1. A store may not pass a previous store.
432  /// 2. A load may not pass a previous store unless flag 'NoAlias' is set.
433  /// 3. A load may pass a previous load.
434  /// 4. A store may not pass a previous load (regardless of flag 'NoAlias').
435  /// 5. A load has to wait until an older load barrier is fully executed.
436  /// 6. A store has to wait until an older store barrier is fully executed.
437  unsigned dispatch(const InstRef &IR) override;
438 
439  // FIXME: For simplicity, we optimistically assume a similar behavior for
440  // store instructions. In practice, store operations don't tend to leave the
441  // store queue until they reach the 'Retired' stage (See PR39830).
442  void onInstructionExecuted(const InstRef &IR) override;
443 };
444 
445 } // namespace mca
446 } // namespace llvm
447 
448 #endif // LLVM_MCA_LSUNIT_H
Instruction * getInstruction()
Definition: Instruction.h:576
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:641
An instruction propagated through the simulated instruction pipeline.
Definition: Instruction.h:445
void onGroupExecuted()
Definition: LSUnit.h:114
unsigned assignLQSlot()
Definition: LSUnit.h:212
This class represents lattice values for constants.
Definition: AllocatorList.h:23
bool isPending() const
Definition: LSUnit.h:92
int getCyclesLeft() const
Definition: Instruction.h:506
bool isValidGroupID(unsigned Index) const
Definition: LSUnit.h:243
unsigned getLoadQueueSize() const
Returns the total number of entries in the load queue.
Definition: LSUnit.h:205
bool isWaiting() const
Definition: LSUnit.h:88
bool isExecuting() const
Definition: LSUnit.h:98
unsigned getNumInstructions() const
Definition: LSUnit.h:69
The two locations do not alias at all.
Definition: AliasAnalysis.h:84
bool isSQEmpty() const
Definition: LSUnit.h:238
LSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ)
Definition: LSUnit.h:414
An InstRef contains both a SourceMgr index and Instruction pair.
Definition: Instruction.h:562
A node of a memory dependency graph.
Definition: LSUnit.h:37
const InstRef & getCriticalMemoryInstruction() const
Definition: LSUnit.h:73
void onInstructionIssued(const InstRef &IR)
Definition: LSUnit.h:120
unsigned getSourceIndex() const
Definition: Instruction.h:575
bool isSQFull() const
Definition: LSUnit.h:240
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:195
Default Load/Store Unit (LS Unit) for simulated processors.
Definition: LSUnit.h:368
unsigned createMemoryGroup()
Definition: LSUnit.h:286
bool isLQFull() const
Definition: LSUnit.h:241
LSUnit(const MCSchedModel &SM)
Definition: LSUnit.h:412
unsigned getUsedLQEntries() const
Definition: LSUnit.h:210
ArrayRef< MemoryGroup * > getSuccessors() const
Definition: LSUnit.h:60
unsigned getLSUTokenID() const
Definition: Instruction.h:499
Abstract base interface for LS (load/store) units in llvm-mca.
Definition: LSUnit.h:167
bool assumeNoAlias() const
Definition: LSUnit.h:215
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
unsigned getNumExecuted() const
Definition: LSUnit.h:71
unsigned assignSQSlot()
Definition: LSUnit.h:213
void onInstructionExecuted()
Definition: LSUnit.h:142
unsigned getNumExecutingPredecessors() const
Definition: LSUnit.h:63
bool isAvailable()
Definition: Compression.cpp:47
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
bool hasDependentUsers(const InstRef &IR) const
Definition: LSUnit.h:270
bool isExecuted() const
Definition: LSUnit.h:101
unsigned getNumExecutedPredecessors() const
Definition: LSUnit.h:66
This file defines a base class for describing a simulated hardware unit.
unsigned getUsedSQEntries() const
Definition: LSUnit.h:211
size_t size() const
Definition: SmallVector.h:52
A critical data dependency descriptor.
Definition: Instruction.h:87
static const X86InstrFMA3Group Groups[]
bool isReady() const
Definition: LSUnit.h:97
bool isLQEmpty() const
Definition: LSUnit.h:239
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
void addSuccessor(MemoryGroup *Group)
Definition: LSUnit.h:80
const CriticalDependency & getCriticalPredecessor() const
Definition: LSUnit.h:76
unsigned getNumExecuting() const
Definition: LSUnit.h:70
unsigned getNumPredecessors() const
Definition: LSUnit.h:62
unsigned getStoreQueueSize() const
Returns the total number of entries in the store queue.
Definition: LSUnit.h:208
bool isPending(const InstRef &IR) const
Check if instruction IR only depends on memory instructions that are currently executing.
Definition: LSUnit.h:256
unsigned getNumSuccessors() const
Definition: LSUnit.h:61
MemoryGroup & getGroup(unsigned Index)
Definition: LSUnit.h:281
bool isWaiting(const InstRef &IR) const
Check if instruction IR is still waiting on memory operations, and the wait time is still unknown...
Definition: LSUnit.h:264
This file defines abstractions used by the Pipeline to model register reads, register writes and inst...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Machine Instruction Scheduler
void onGroupIssued(const InstRef &IR)
Definition: LSUnit.h:103
LSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ, bool AssumeNoAlias)
Definition: LSUnit.h:416
virtual void onInstructionIssued(const InstRef &IR)
Definition: LSUnit.h:295
Machine model for scheduling, bundling, and heuristics.
Definition: MCSchedule.h:244
Statically lint checks LLVM IR
Definition: Lint.cpp:192
const MemoryGroup & getGroup(unsigned Index) const
Definition: LSUnit.h:276
bool isReady(const InstRef &IR) const
Check if a peviously dispatched instruction IR is now ready for execution.
Definition: LSUnit.h:248