LLVM  16.0.0git
SIModeRegister.cpp
Go to the documentation of this file.
1 //===-- SIModeRegister.cpp - Mode Register --------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This pass inserts changes to the Mode register settings as required.
10 /// Note that currently it only deals with the Double Precision Floating Point
11 /// rounding mode setting, but is intended to be generic enough to be easily
12 /// expanded.
13 ///
14 //===----------------------------------------------------------------------===//
15 //
16 #include "AMDGPU.h"
17 #include "GCNSubtarget.h"
19 #include "llvm/ADT/Statistic.h"
21 #include <queue>
22 
23 #define DEBUG_TYPE "si-mode-register"
24 
25 STATISTIC(NumSetregInserted, "Number of setreg of mode register inserted.");
26 
27 using namespace llvm;
28 
29 struct Status {
30  // Mask is a bitmask where a '1' indicates the corresponding Mode bit has a
31  // known value
32  unsigned Mask;
33  unsigned Mode;
34 
35  Status() : Mask(0), Mode(0){};
36 
37  Status(unsigned NewMask, unsigned NewMode) : Mask(NewMask), Mode(NewMode) {
38  Mode &= Mask;
39  };
40 
41  // merge two status values such that only values that don't conflict are
42  // preserved
43  Status merge(const Status &S) const {
44  return Status((Mask | S.Mask), ((Mode & ~S.Mask) | (S.Mode & S.Mask)));
45  }
46 
47  // merge an unknown value by using the unknown value's mask to remove bits
48  // from the result
49  Status mergeUnknown(unsigned newMask) {
50  return Status(Mask & ~newMask, Mode & ~newMask);
51  }
52 
53  // intersect two Status values to produce a mode and mask that is a subset
54  // of both values
55  Status intersect(const Status &S) const {
56  unsigned NewMask = (Mask & S.Mask) & (Mode ^ ~S.Mode);
57  unsigned NewMode = (Mode & NewMask);
58  return Status(NewMask, NewMode);
59  }
60 
61  // produce the delta required to change the Mode to the required Mode
62  Status delta(const Status &S) const {
63  return Status((S.Mask & (Mode ^ S.Mode)) | (~Mask & S.Mask), S.Mode);
64  }
65 
66  bool operator==(const Status &S) const {
67  return (Mask == S.Mask) && (Mode == S.Mode);
68  }
69 
70  bool operator!=(const Status &S) const { return !(*this == S); }
71 
73  return ((Mask & S.Mask) == S.Mask) && ((Mode & S.Mask) == S.Mode);
74  }
75 
76  bool isCombinable(Status &S) { return !(Mask & S.Mask) || isCompatible(S); }
77 };
78 
79 class BlockData {
80 public:
81  // The Status that represents the mode register settings required by the
82  // FirstInsertionPoint (if any) in this block. Calculated in Phase 1.
84 
85  // The Status that represents the net changes to the Mode register made by
86  // this block, Calculated in Phase 1.
88 
89  // The Status that represents the mode register settings on exit from this
90  // block. Calculated in Phase 2.
92 
93  // The Status that represents the intersection of exit Mode register settings
94  // from all predecessor blocks. Calculated in Phase 2, and used by Phase 3.
96 
97  // In Phase 1 we record the first instruction that has a mode requirement,
98  // which is used in Phase 3 if we need to insert a mode change.
100 
101  // A flag to indicate whether an Exit value has been set (we can't tell by
102  // examining the Exit value itself as all values may be valid results).
103  bool ExitSet;
104 
105  BlockData() : FirstInsertionPoint(nullptr), ExitSet(false){};
106 };
107 
108 namespace {
109 
110 class SIModeRegister : public MachineFunctionPass {
111 public:
112  static char ID;
113 
114  std::vector<std::unique_ptr<BlockData>> BlockInfo;
115  std::queue<MachineBasicBlock *> Phase2List;
116 
117  // The default mode register setting currently only caters for the floating
118  // point double precision rounding mode.
119  // We currently assume the default rounding mode is Round to Nearest
120  // NOTE: this should come from a per function rounding mode setting once such
121  // a setting exists.
122  unsigned DefaultMode = FP_ROUND_ROUND_TO_NEAREST;
123  Status DefaultStatus =
124  Status(FP_ROUND_MODE_DP(0x3), FP_ROUND_MODE_DP(DefaultMode));
125 
126  bool Changed = false;
127 
128 public:
129  SIModeRegister() : MachineFunctionPass(ID) {}
130 
131  bool runOnMachineFunction(MachineFunction &MF) override;
132 
133  void getAnalysisUsage(AnalysisUsage &AU) const override {
134  AU.setPreservesCFG();
136  }
137 
138  void processBlockPhase1(MachineBasicBlock &MBB, const SIInstrInfo *TII);
139 
140  void processBlockPhase2(MachineBasicBlock &MBB, const SIInstrInfo *TII);
141 
142  void processBlockPhase3(MachineBasicBlock &MBB, const SIInstrInfo *TII);
143 
144  Status getInstructionMode(MachineInstr &MI, const SIInstrInfo *TII);
145 
146  void insertSetreg(MachineBasicBlock &MBB, MachineInstr *I,
147  const SIInstrInfo *TII, Status InstrMode);
148 };
149 } // End anonymous namespace.
150 
151 INITIALIZE_PASS(SIModeRegister, DEBUG_TYPE,
152  "Insert required mode register values", false, false)
153 
154 char SIModeRegister::ID = 0;
155 
156 char &llvm::SIModeRegisterID = SIModeRegister::ID;
157 
158 FunctionPass *llvm::createSIModeRegisterPass() { return new SIModeRegister(); }
159 
160 // Determine the Mode register setting required for this instruction.
161 // Instructions which don't use the Mode register return a null Status.
162 // Note this currently only deals with instructions that use the floating point
163 // double precision setting.
164 Status SIModeRegister::getInstructionMode(MachineInstr &MI,
165  const SIInstrInfo *TII) {
166  if (TII->usesFPDPRounding(MI) ||
167  MI.getOpcode() == AMDGPU::FPTRUNC_UPWARD_PSEUDO ||
168  MI.getOpcode() == AMDGPU::FPTRUNC_DOWNWARD_PSEUDO) {
169  switch (MI.getOpcode()) {
170  case AMDGPU::V_INTERP_P1LL_F16:
171  case AMDGPU::V_INTERP_P1LV_F16:
172  case AMDGPU::V_INTERP_P2_F16:
173  // f16 interpolation instructions need double precision round to zero
174  return Status(FP_ROUND_MODE_DP(3),
176  case AMDGPU::FPTRUNC_UPWARD_PSEUDO: {
177  // Replacing the pseudo by a real instruction in place
178  if (TII->getSubtarget().hasTrue16BitInsts()) {
179  MachineBasicBlock &MBB = *MI.getParent();
181  MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_t16_e64));
182  MachineOperand Src0 = MI.getOperand(1);
183  MI.removeOperand(1);
184  B.addImm(0); // src0_modifiers
185  B.add(Src0); // re-add src0 operand
186  B.addImm(0); // clamp
187  B.addImm(0); // omod
188  } else
189  MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32));
190  return Status(FP_ROUND_MODE_DP(3),
192  }
193  case AMDGPU::FPTRUNC_DOWNWARD_PSEUDO: {
194  // Replacing the pseudo by a real instruction in place
195  if (TII->getSubtarget().hasTrue16BitInsts()) {
196  MachineBasicBlock &MBB = *MI.getParent();
198  MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_t16_e64));
199  MachineOperand Src0 = MI.getOperand(1);
200  MI.removeOperand(1);
201  B.addImm(0); // src0_modifiers
202  B.add(Src0); // re-add src0 operand
203  B.addImm(0); // clamp
204  B.addImm(0); // omod
205  } else
206  MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32));
207  return Status(FP_ROUND_MODE_DP(3),
209  }
210  default:
211  return DefaultStatus;
212  }
213  }
214  return Status();
215 }
216 
217 // Insert a setreg instruction to update the Mode register.
218 // It is possible (though unlikely) for an instruction to require a change to
219 // the value of disjoint parts of the Mode register when we don't know the
220 // value of the intervening bits. In that case we need to use more than one
221 // setreg instruction.
222 void SIModeRegister::insertSetreg(MachineBasicBlock &MBB, MachineInstr *MI,
223  const SIInstrInfo *TII, Status InstrMode) {
224  while (InstrMode.Mask) {
225  unsigned Offset = countTrailingZeros<unsigned>(InstrMode.Mask);
226  unsigned Width = countTrailingOnes<unsigned>(InstrMode.Mask >> Offset);
227  unsigned Value = (InstrMode.Mode >> Offset) & ((1 << Width) - 1);
228  BuildMI(MBB, MI, nullptr, TII->get(AMDGPU::S_SETREG_IMM32_B32))
229  .addImm(Value)
231  (Offset << AMDGPU::Hwreg::OFFSET_SHIFT_) |
233  ++NumSetregInserted;
234  Changed = true;
235  InstrMode.Mask &= ~(((1 << Width) - 1) << Offset);
236  }
237 }
238 
239 // In Phase 1 we iterate through the instructions of the block and for each
240 // instruction we get its mode usage. If the instruction uses the Mode register
241 // we:
242 // - update the Change status, which tracks the changes to the Mode register
243 // made by this block
244 // - if this instruction's requirements are compatible with the current setting
245 // of the Mode register we merge the modes
246 // - if it isn't compatible and an InsertionPoint isn't set, then we set the
247 // InsertionPoint to the current instruction, and we remember the current
248 // mode
249 // - if it isn't compatible and InsertionPoint is set we insert a seteg before
250 // that instruction (unless this instruction forms part of the block's
251 // entry requirements in which case the insertion is deferred until Phase 3
252 // when predecessor exit values are known), and move the insertion point to
253 // this instruction
254 // - if this is a setreg instruction we treat it as an incompatible instruction.
255 // This is sub-optimal but avoids some nasty corner cases, and is expected to
256 // occur very rarely.
257 // - on exit we have set the Require, Change, and initial Exit modes.
258 void SIModeRegister::processBlockPhase1(MachineBasicBlock &MBB,
259  const SIInstrInfo *TII) {
260  auto NewInfo = std::make_unique<BlockData>();
261  MachineInstr *InsertionPoint = nullptr;
262  // RequirePending is used to indicate whether we are collecting the initial
263  // requirements for the block, and need to defer the first InsertionPoint to
264  // Phase 3. It is set to false once we have set FirstInsertionPoint, or when
265  // we discover an explicit setreg that means this block doesn't have any
266  // initial requirements.
267  bool RequirePending = true;
268  Status IPChange;
269  for (MachineInstr &MI : MBB) {
270  Status InstrMode = getInstructionMode(MI, TII);
271  if (MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
272  MI.getOpcode() == AMDGPU::S_SETREG_B32_mode ||
273  MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
274  MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode) {
275  // We preserve any explicit mode register setreg instruction we encounter,
276  // as we assume it has been inserted by a higher authority (this is
277  // likely to be a very rare occurrence).
278  unsigned Dst = TII->getNamedOperand(MI, AMDGPU::OpName::simm16)->getImm();
281  continue;
282 
283  unsigned Width = ((Dst & AMDGPU::Hwreg::WIDTH_M1_MASK_) >>
285  1;
286  unsigned Offset =
288  unsigned Mask = ((1 << Width) - 1) << Offset;
289 
290  // If an InsertionPoint is set we will insert a setreg there.
291  if (InsertionPoint) {
292  insertSetreg(MBB, InsertionPoint, TII, IPChange.delta(NewInfo->Change));
293  InsertionPoint = nullptr;
294  }
295  // If this is an immediate then we know the value being set, but if it is
296  // not an immediate then we treat the modified bits of the mode register
297  // as unknown.
298  if (MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
299  MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode) {
300  unsigned Val = TII->getNamedOperand(MI, AMDGPU::OpName::imm)->getImm();
301  unsigned Mode = (Val << Offset) & Mask;
302  Status Setreg = Status(Mask, Mode);
303  // If we haven't already set the initial requirements for the block we
304  // don't need to as the requirements start from this explicit setreg.
305  RequirePending = false;
306  NewInfo->Change = NewInfo->Change.merge(Setreg);
307  } else {
308  NewInfo->Change = NewInfo->Change.mergeUnknown(Mask);
309  }
310  } else if (!NewInfo->Change.isCompatible(InstrMode)) {
311  // This instruction uses the Mode register and its requirements aren't
312  // compatible with the current mode.
313  if (InsertionPoint) {
314  // If the required mode change cannot be included in the current
315  // InsertionPoint changes, we need a setreg and start a new
316  // InsertionPoint.
317  if (!IPChange.delta(NewInfo->Change).isCombinable(InstrMode)) {
318  if (RequirePending) {
319  // This is the first insertionPoint in the block so we will defer
320  // the insertion of the setreg to Phase 3 where we know whether or
321  // not it is actually needed.
322  NewInfo->FirstInsertionPoint = InsertionPoint;
323  NewInfo->Require = NewInfo->Change;
324  RequirePending = false;
325  } else {
326  insertSetreg(MBB, InsertionPoint, TII,
327  IPChange.delta(NewInfo->Change));
328  IPChange = NewInfo->Change;
329  }
330  // Set the new InsertionPoint
331  InsertionPoint = &MI;
332  }
333  NewInfo->Change = NewInfo->Change.merge(InstrMode);
334  } else {
335  // No InsertionPoint is currently set - this is either the first in
336  // the block or we have previously seen an explicit setreg.
337  InsertionPoint = &MI;
338  IPChange = NewInfo->Change;
339  NewInfo->Change = NewInfo->Change.merge(InstrMode);
340  }
341  }
342  }
343  if (RequirePending) {
344  // If we haven't yet set the initial requirements for the block we set them
345  // now.
346  NewInfo->FirstInsertionPoint = InsertionPoint;
347  NewInfo->Require = NewInfo->Change;
348  } else if (InsertionPoint) {
349  // We need to insert a setreg at the InsertionPoint
350  insertSetreg(MBB, InsertionPoint, TII, IPChange.delta(NewInfo->Change));
351  }
352  NewInfo->Exit = NewInfo->Change;
353  BlockInfo[MBB.getNumber()] = std::move(NewInfo);
354 }
355 
356 // In Phase 2 we revisit each block and calculate the common Mode register
357 // value provided by all predecessor blocks. If the Exit value for the block
358 // is changed, then we add the successor blocks to the worklist so that the
359 // exit value is propagated.
360 void SIModeRegister::processBlockPhase2(MachineBasicBlock &MBB,
361  const SIInstrInfo *TII) {
362  bool RevisitRequired = false;
363  bool ExitSet = false;
364  unsigned ThisBlock = MBB.getNumber();
365  if (MBB.pred_empty()) {
366  // There are no predecessors, so use the default starting status.
367  BlockInfo[ThisBlock]->Pred = DefaultStatus;
368  ExitSet = true;
369  } else {
370  // Build a status that is common to all the predecessors by intersecting
371  // all the predecessor exit status values.
372  // Mask bits (which represent the Mode bits with a known value) can only be
373  // added by explicit SETREG instructions or the initial default value -
374  // the intersection process may remove Mask bits.
375  // If we find a predecessor that has not yet had an exit value determined
376  // (this can happen for example if a block is its own predecessor) we defer
377  // use of that value as the Mask will be all zero, and we will revisit this
378  // block again later (unless the only predecessor without an exit value is
379  // this block).
381  MachineBasicBlock &PB = *(*P);
382  unsigned PredBlock = PB.getNumber();
383  if ((ThisBlock == PredBlock) && (std::next(P) == E)) {
384  BlockInfo[ThisBlock]->Pred = DefaultStatus;
385  ExitSet = true;
386  } else if (BlockInfo[PredBlock]->ExitSet) {
387  BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit;
388  ExitSet = true;
389  } else if (PredBlock != ThisBlock)
390  RevisitRequired = true;
391 
392  for (P = std::next(P); P != E; P = std::next(P)) {
393  MachineBasicBlock *Pred = *P;
394  unsigned PredBlock = Pred->getNumber();
395  if (BlockInfo[PredBlock]->ExitSet) {
396  if (BlockInfo[ThisBlock]->ExitSet) {
397  BlockInfo[ThisBlock]->Pred =
398  BlockInfo[ThisBlock]->Pred.intersect(BlockInfo[PredBlock]->Exit);
399  } else {
400  BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit;
401  }
402  ExitSet = true;
403  } else if (PredBlock != ThisBlock)
404  RevisitRequired = true;
405  }
406  }
407  Status TmpStatus =
408  BlockInfo[ThisBlock]->Pred.merge(BlockInfo[ThisBlock]->Change);
409  if (BlockInfo[ThisBlock]->Exit != TmpStatus) {
410  BlockInfo[ThisBlock]->Exit = TmpStatus;
411  // Add the successors to the work list so we can propagate the changed exit
412  // status.
413  for (MachineBasicBlock *Succ : MBB.successors())
414  Phase2List.push(Succ);
415  }
416  BlockInfo[ThisBlock]->ExitSet = ExitSet;
417  if (RevisitRequired)
418  Phase2List.push(&MBB);
419 }
420 
421 // In Phase 3 we revisit each block and if it has an insertion point defined we
422 // check whether the predecessor mode meets the block's entry requirements. If
423 // not we insert an appropriate setreg instruction to modify the Mode register.
424 void SIModeRegister::processBlockPhase3(MachineBasicBlock &MBB,
425  const SIInstrInfo *TII) {
426  unsigned ThisBlock = MBB.getNumber();
427  if (!BlockInfo[ThisBlock]->Pred.isCompatible(BlockInfo[ThisBlock]->Require)) {
428  Status Delta =
429  BlockInfo[ThisBlock]->Pred.delta(BlockInfo[ThisBlock]->Require);
430  if (BlockInfo[ThisBlock]->FirstInsertionPoint)
431  insertSetreg(MBB, BlockInfo[ThisBlock]->FirstInsertionPoint, TII, Delta);
432  else
433  insertSetreg(MBB, &MBB.instr_front(), TII, Delta);
434  }
435 }
436 
437 bool SIModeRegister::runOnMachineFunction(MachineFunction &MF) {
438  BlockInfo.resize(MF.getNumBlockIDs());
439  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
440  const SIInstrInfo *TII = ST.getInstrInfo();
441 
442  // Processing is performed in a number of phases
443 
444  // Phase 1 - determine the initial mode required by each block, and add setreg
445  // instructions for intra block requirements.
446  for (MachineBasicBlock &BB : MF)
447  processBlockPhase1(BB, TII);
448 
449  // Phase 2 - determine the exit mode from each block. We add all blocks to the
450  // list here, but will also add any that need to be revisited during Phase 2
451  // processing.
452  for (MachineBasicBlock &BB : MF)
453  Phase2List.push(&BB);
454  while (!Phase2List.empty()) {
455  processBlockPhase2(*Phase2List.front(), TII);
456  Phase2List.pop();
457  }
458 
459  // Phase 3 - add an initial setreg to each block where the required entry mode
460  // is not satisfied by the exit mode of all its predecessors.
461  for (MachineBasicBlock &BB : MF)
462  processBlockPhase3(BB, TII);
463 
464  BlockInfo.clear();
465 
466  return Changed;
467 }
FP_ROUND_ROUND_TO_NEAREST
#define FP_ROUND_ROUND_TO_NEAREST
Definition: SIDefines.h:1014
BlockData::Change
Status Change
Definition: SIModeRegister.cpp:87
llvm::MachineBasicBlock::pred_begin
pred_iterator pred_begin()
Definition: MachineBasicBlock.h:353
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:108
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
Status::mergeUnknown
Status mergeUnknown(unsigned newMask)
Definition: SIModeRegister.cpp:49
llvm::AMDGPU::Hwreg::ID_SHIFT_
@ ID_SHIFT_
Definition: SIDefines.h:411
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Statistic.h
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::MachineFunction::getNumBlockIDs
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID's allocated.
Definition: MachineFunction.h:799
llvm::AMDGPU::Hwreg::ID_MASK_
@ ID_MASK_
Definition: SIDefines.h:413
BlockData::BlockData
BlockData()
Definition: SIModeRegister.cpp:105
BlockData::Pred
Status Pred
Definition: SIModeRegister.cpp:95
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:167
FP_ROUND_ROUND_TO_ZERO
#define FP_ROUND_ROUND_TO_ZERO
Definition: SIDefines.h:1017
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
DEBUG_TYPE
#define DEBUG_TYPE
Definition: SIModeRegister.cpp:23
x3
In x86 we generate this spiffy xmm0 xmm0 ret in x86 we generate this which could be xmm1 movss xmm1 xmm0 ret In sse4 we could use insertps to make both better Here s another testcase that could use x3
Definition: README-SSE.txt:547
GCNSubtarget.h
llvm::AMDGPU::Hwreg::ID_MODE
@ ID_MODE
Definition: SIDefines.h:386
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
INITIALIZE_PASS
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:37
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
BlockData::FirstInsertionPoint
MachineInstr * FirstInsertionPoint
Definition: SIModeRegister.cpp:99
false
Definition: StackSlotColoring.cpp:141
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48
STATISTIC
STATISTIC(NumSetregInserted, "Number of setreg of mode register inserted.")
BlockData::Exit
Status Exit
Definition: SIModeRegister.cpp:91
Status::operator==
bool operator==(const Status &S) const
Definition: SIModeRegister.cpp:66
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
llvm::AMDGPU::Hwreg::OFFSET_MASK_
@ OFFSET_MASK_
Definition: SIDefines.h:420
llvm::AMDGPU::Hwreg::WIDTH_M1_SHIFT_
@ WIDTH_M1_SHIFT_
Definition: SIDefines.h:427
PB
PassBuilder PB(Machine, PassOpts->PTO, None, &PIC)
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:657
BlockData::Require
Status Require
Definition: SIModeRegister.cpp:83
FP_ROUND_ROUND_TO_INF
#define FP_ROUND_ROUND_TO_INF
Definition: SIDefines.h:1015
AMDGPUMCTargetDesc.h
llvm::MachineBasicBlock::pred_end
pred_iterator pred_end()
Definition: MachineBasicBlock.h:355
llvm::AMDGPU::Hwreg::Offset
Offset
Definition: SIDefines.h:416
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
I
#define I(x, y, z)
Definition: MD5.cpp:58
MachineFunctionPass.h
llvm::MachineBasicBlock::pred_iterator
std::vector< MachineBasicBlock * >::iterator pred_iterator
Definition: MachineBasicBlock.h:341
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:261
FP_ROUND_ROUND_TO_NEGINF
#define FP_ROUND_ROUND_TO_NEGINF
Definition: SIDefines.h:1016
Mode
SI Whole Quad Mode
Definition: SIWholeQuadMode.cpp:264
llvm::SIModeRegisterID
char & SIModeRegisterID
llvm::MachineBasicBlock::pred_empty
bool pred_empty() const
Definition: MachineBasicBlock.h:368
Status
Definition: SIModeRegister.cpp:29
llvm::MachineFunction
Definition: MachineFunction.h:257
Status::operator!=
bool operator!=(const Status &S) const
Definition: SIModeRegister.cpp:70
llvm::MachineBasicBlock::getNumber
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
Definition: MachineBasicBlock.h:1115
llvm::MachineBasicBlock::successors
iterator_range< succ_iterator > successors()
Definition: MachineBasicBlock.h:392
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
AMDGPU.h
Status::Status
Status()
Definition: SIModeRegister.cpp:35
llvm::AMDGPU::Hwreg::OFFSET_SHIFT_
@ OFFSET_SHIFT_
Definition: SIDefines.h:418
Status::Mode
unsigned Mode
Definition: SIModeRegister.cpp:33
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::MachineBasicBlock::instr_front
MachineInstr & instr_front()
Definition: MachineBasicBlock.h:279
FP_ROUND_MODE_DP
#define FP_ROUND_MODE_DP(x)
Definition: SIDefines.h:1022
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
Status::merge
Status merge(const Status &S) const
Definition: SIModeRegister.cpp:43
Status::delta
Status delta(const Status &S) const
Definition: SIModeRegister.cpp:62
Status::intersect
Status intersect(const Status &S) const
Definition: SIModeRegister.cpp:55
llvm::createSIModeRegisterPass
FunctionPass * createSIModeRegisterPass()
Definition: SIModeRegister.cpp:158
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:433
llvm::AMDGPU::Hwreg::WIDTH_M1_MASK_
@ WIDTH_M1_MASK_
Definition: SIDefines.h:429
llvm::SIInstrInfo
Definition: SIInstrInfo.h:44
Status::Status
Status(unsigned NewMask, unsigned NewMode)
Definition: SIModeRegister.cpp:37
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:357
Status::isCombinable
bool isCombinable(Status &S)
Definition: SIModeRegister.cpp:76
Status::isCompatible
bool isCompatible(Status &S)
Definition: SIModeRegister.cpp:72
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
Status::Mask
unsigned Mask
Definition: SIModeRegister.cpp:32
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
BlockData::ExitSet
bool ExitSet
Definition: SIModeRegister.cpp:103