LLVM  15.0.0git
SIModeRegister.cpp
Go to the documentation of this file.
1 //===-- SIModeRegister.cpp - Mode Register --------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This pass inserts changes to the Mode register settings as required.
10 /// Note that currently it only deals with the Double Precision Floating Point
11 /// rounding mode setting, but is intended to be generic enough to be easily
12 /// expanded.
13 ///
14 //===----------------------------------------------------------------------===//
15 //
16 #include "AMDGPU.h"
17 #include "GCNSubtarget.h"
19 #include "llvm/ADT/Statistic.h"
21 #include <queue>
22 
23 #define DEBUG_TYPE "si-mode-register"
24 
25 STATISTIC(NumSetregInserted, "Number of setreg of mode register inserted.");
26 
27 using namespace llvm;
28 
29 struct Status {
30  // Mask is a bitmask where a '1' indicates the corresponding Mode bit has a
31  // known value
32  unsigned Mask;
33  unsigned Mode;
34 
35  Status() : Mask(0), Mode(0){};
36 
37  Status(unsigned NewMask, unsigned NewMode) : Mask(NewMask), Mode(NewMode) {
38  Mode &= Mask;
39  };
40 
41  // merge two status values such that only values that don't conflict are
42  // preserved
43  Status merge(const Status &S) const {
44  return Status((Mask | S.Mask), ((Mode & ~S.Mask) | (S.Mode & S.Mask)));
45  }
46 
47  // merge an unknown value by using the unknown value's mask to remove bits
48  // from the result
49  Status mergeUnknown(unsigned newMask) {
50  return Status(Mask & ~newMask, Mode & ~newMask);
51  }
52 
53  // intersect two Status values to produce a mode and mask that is a subset
54  // of both values
55  Status intersect(const Status &S) const {
56  unsigned NewMask = (Mask & S.Mask) & (Mode ^ ~S.Mode);
57  unsigned NewMode = (Mode & NewMask);
58  return Status(NewMask, NewMode);
59  }
60 
61  // produce the delta required to change the Mode to the required Mode
62  Status delta(const Status &S) const {
63  return Status((S.Mask & (Mode ^ S.Mode)) | (~Mask & S.Mask), S.Mode);
64  }
65 
66  bool operator==(const Status &S) const {
67  return (Mask == S.Mask) && (Mode == S.Mode);
68  }
69 
70  bool operator!=(const Status &S) const { return !(*this == S); }
71 
73  return ((Mask & S.Mask) == S.Mask) && ((Mode & S.Mask) == S.Mode);
74  }
75 
76  bool isCombinable(Status &S) { return !(Mask & S.Mask) || isCompatible(S); }
77 };
78 
79 class BlockData {
80 public:
81  // The Status that represents the mode register settings required by the
82  // FirstInsertionPoint (if any) in this block. Calculated in Phase 1.
84 
85  // The Status that represents the net changes to the Mode register made by
86  // this block, Calculated in Phase 1.
88 
89  // The Status that represents the mode register settings on exit from this
90  // block. Calculated in Phase 2.
92 
93  // The Status that represents the intersection of exit Mode register settings
94  // from all predecessor blocks. Calculated in Phase 2, and used by Phase 3.
96 
97  // In Phase 1 we record the first instruction that has a mode requirement,
98  // which is used in Phase 3 if we need to insert a mode change.
100 
101  // A flag to indicate whether an Exit value has been set (we can't tell by
102  // examining the Exit value itself as all values may be valid results).
103  bool ExitSet;
104 
105  BlockData() : FirstInsertionPoint(nullptr), ExitSet(false){};
106 };
107 
108 namespace {
109 
110 class SIModeRegister : public MachineFunctionPass {
111 public:
112  static char ID;
113 
114  std::vector<std::unique_ptr<BlockData>> BlockInfo;
115  std::queue<MachineBasicBlock *> Phase2List;
116 
117  // The default mode register setting currently only caters for the floating
118  // point double precision rounding mode.
119  // We currently assume the default rounding mode is Round to Nearest
120  // NOTE: this should come from a per function rounding mode setting once such
121  // a setting exists.
122  unsigned DefaultMode = FP_ROUND_ROUND_TO_NEAREST;
123  Status DefaultStatus =
124  Status(FP_ROUND_MODE_DP(0x3), FP_ROUND_MODE_DP(DefaultMode));
125 
126  bool Changed = false;
127 
128 public:
129  SIModeRegister() : MachineFunctionPass(ID) {}
130 
131  bool runOnMachineFunction(MachineFunction &MF) override;
132 
133  void getAnalysisUsage(AnalysisUsage &AU) const override {
134  AU.setPreservesCFG();
136  }
137 
138  void processBlockPhase1(MachineBasicBlock &MBB, const SIInstrInfo *TII);
139 
140  void processBlockPhase2(MachineBasicBlock &MBB, const SIInstrInfo *TII);
141 
142  void processBlockPhase3(MachineBasicBlock &MBB, const SIInstrInfo *TII);
143 
144  Status getInstructionMode(MachineInstr &MI, const SIInstrInfo *TII);
145 
146  void insertSetreg(MachineBasicBlock &MBB, MachineInstr *I,
147  const SIInstrInfo *TII, Status InstrMode);
148 };
149 } // End anonymous namespace.
150 
151 INITIALIZE_PASS(SIModeRegister, DEBUG_TYPE,
152  "Insert required mode register values", false, false)
153 
154 char SIModeRegister::ID = 0;
155 
156 char &llvm::SIModeRegisterID = SIModeRegister::ID;
157 
158 FunctionPass *llvm::createSIModeRegisterPass() { return new SIModeRegister(); }
159 
160 // Determine the Mode register setting required for this instruction.
161 // Instructions which don't use the Mode register return a null Status.
162 // Note this currently only deals with instructions that use the floating point
163 // double precision setting.
164 Status SIModeRegister::getInstructionMode(MachineInstr &MI,
165  const SIInstrInfo *TII) {
166  if (TII->usesFPDPRounding(MI) ||
167  MI.getOpcode() == AMDGPU::FPTRUNC_UPWARD_PSEUDO ||
168  MI.getOpcode() == AMDGPU::FPTRUNC_DOWNWARD_PSEUDO) {
169  switch (MI.getOpcode()) {
170  case AMDGPU::V_INTERP_P1LL_F16:
171  case AMDGPU::V_INTERP_P1LV_F16:
172  case AMDGPU::V_INTERP_P2_F16:
173  // f16 interpolation instructions need double precision round to zero
174  return Status(FP_ROUND_MODE_DP(3),
176  case AMDGPU::FPTRUNC_UPWARD_PSEUDO: {
177  // Replacing the pseudo by a real instruction
178  MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32));
179  return Status(FP_ROUND_MODE_DP(3),
181  }
182  case AMDGPU::FPTRUNC_DOWNWARD_PSEUDO: {
183  // Replacing the pseudo by a real instruction
184  MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32));
185  return Status(FP_ROUND_MODE_DP(3),
187  }
188  default:
189  return DefaultStatus;
190  }
191  }
192  return Status();
193 }
194 
195 // Insert a setreg instruction to update the Mode register.
196 // It is possible (though unlikely) for an instruction to require a change to
197 // the value of disjoint parts of the Mode register when we don't know the
198 // value of the intervening bits. In that case we need to use more than one
199 // setreg instruction.
200 void SIModeRegister::insertSetreg(MachineBasicBlock &MBB, MachineInstr *MI,
201  const SIInstrInfo *TII, Status InstrMode) {
202  while (InstrMode.Mask) {
203  unsigned Offset = countTrailingZeros<unsigned>(InstrMode.Mask);
204  unsigned Width = countTrailingOnes<unsigned>(InstrMode.Mask >> Offset);
205  unsigned Value = (InstrMode.Mode >> Offset) & ((1 << Width) - 1);
206  BuildMI(MBB, MI, nullptr, TII->get(AMDGPU::S_SETREG_IMM32_B32))
207  .addImm(Value)
209  (Offset << AMDGPU::Hwreg::OFFSET_SHIFT_) |
211  ++NumSetregInserted;
212  Changed = true;
213  InstrMode.Mask &= ~(((1 << Width) - 1) << Offset);
214  }
215 }
216 
217 // In Phase 1 we iterate through the instructions of the block and for each
218 // instruction we get its mode usage. If the instruction uses the Mode register
219 // we:
220 // - update the Change status, which tracks the changes to the Mode register
221 // made by this block
222 // - if this instruction's requirements are compatible with the current setting
223 // of the Mode register we merge the modes
224 // - if it isn't compatible and an InsertionPoint isn't set, then we set the
225 // InsertionPoint to the current instruction, and we remember the current
226 // mode
227 // - if it isn't compatible and InsertionPoint is set we insert a seteg before
228 // that instruction (unless this instruction forms part of the block's
229 // entry requirements in which case the insertion is deferred until Phase 3
230 // when predecessor exit values are known), and move the insertion point to
231 // this instruction
232 // - if this is a setreg instruction we treat it as an incompatible instruction.
233 // This is sub-optimal but avoids some nasty corner cases, and is expected to
234 // occur very rarely.
235 // - on exit we have set the Require, Change, and initial Exit modes.
236 void SIModeRegister::processBlockPhase1(MachineBasicBlock &MBB,
237  const SIInstrInfo *TII) {
238  auto NewInfo = std::make_unique<BlockData>();
239  MachineInstr *InsertionPoint = nullptr;
240  // RequirePending is used to indicate whether we are collecting the initial
241  // requirements for the block, and need to defer the first InsertionPoint to
242  // Phase 3. It is set to false once we have set FirstInsertionPoint, or when
243  // we discover an explicit setreg that means this block doesn't have any
244  // initial requirements.
245  bool RequirePending = true;
246  Status IPChange;
247  for (MachineInstr &MI : MBB) {
248  Status InstrMode = getInstructionMode(MI, TII);
249  if (MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
250  MI.getOpcode() == AMDGPU::S_SETREG_B32_mode ||
251  MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
252  MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode) {
253  // We preserve any explicit mode register setreg instruction we encounter,
254  // as we assume it has been inserted by a higher authority (this is
255  // likely to be a very rare occurrence).
256  unsigned Dst = TII->getNamedOperand(MI, AMDGPU::OpName::simm16)->getImm();
259  continue;
260 
261  unsigned Width = ((Dst & AMDGPU::Hwreg::WIDTH_M1_MASK_) >>
263  1;
264  unsigned Offset =
266  unsigned Mask = ((1 << Width) - 1) << Offset;
267 
268  // If an InsertionPoint is set we will insert a setreg there.
269  if (InsertionPoint) {
270  insertSetreg(MBB, InsertionPoint, TII, IPChange.delta(NewInfo->Change));
271  InsertionPoint = nullptr;
272  }
273  // If this is an immediate then we know the value being set, but if it is
274  // not an immediate then we treat the modified bits of the mode register
275  // as unknown.
276  if (MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
277  MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode) {
278  unsigned Val = TII->getNamedOperand(MI, AMDGPU::OpName::imm)->getImm();
279  unsigned Mode = (Val << Offset) & Mask;
280  Status Setreg = Status(Mask, Mode);
281  // If we haven't already set the initial requirements for the block we
282  // don't need to as the requirements start from this explicit setreg.
283  RequirePending = false;
284  NewInfo->Change = NewInfo->Change.merge(Setreg);
285  } else {
286  NewInfo->Change = NewInfo->Change.mergeUnknown(Mask);
287  }
288  } else if (!NewInfo->Change.isCompatible(InstrMode)) {
289  // This instruction uses the Mode register and its requirements aren't
290  // compatible with the current mode.
291  if (InsertionPoint) {
292  // If the required mode change cannot be included in the current
293  // InsertionPoint changes, we need a setreg and start a new
294  // InsertionPoint.
295  if (!IPChange.delta(NewInfo->Change).isCombinable(InstrMode)) {
296  if (RequirePending) {
297  // This is the first insertionPoint in the block so we will defer
298  // the insertion of the setreg to Phase 3 where we know whether or
299  // not it is actually needed.
300  NewInfo->FirstInsertionPoint = InsertionPoint;
301  NewInfo->Require = NewInfo->Change;
302  RequirePending = false;
303  } else {
304  insertSetreg(MBB, InsertionPoint, TII,
305  IPChange.delta(NewInfo->Change));
306  IPChange = NewInfo->Change;
307  }
308  // Set the new InsertionPoint
309  InsertionPoint = &MI;
310  }
311  NewInfo->Change = NewInfo->Change.merge(InstrMode);
312  } else {
313  // No InsertionPoint is currently set - this is either the first in
314  // the block or we have previously seen an explicit setreg.
315  InsertionPoint = &MI;
316  IPChange = NewInfo->Change;
317  NewInfo->Change = NewInfo->Change.merge(InstrMode);
318  }
319  }
320  }
321  if (RequirePending) {
322  // If we haven't yet set the initial requirements for the block we set them
323  // now.
324  NewInfo->FirstInsertionPoint = InsertionPoint;
325  NewInfo->Require = NewInfo->Change;
326  } else if (InsertionPoint) {
327  // We need to insert a setreg at the InsertionPoint
328  insertSetreg(MBB, InsertionPoint, TII, IPChange.delta(NewInfo->Change));
329  }
330  NewInfo->Exit = NewInfo->Change;
331  BlockInfo[MBB.getNumber()] = std::move(NewInfo);
332 }
333 
334 // In Phase 2 we revisit each block and calculate the common Mode register
335 // value provided by all predecessor blocks. If the Exit value for the block
336 // is changed, then we add the successor blocks to the worklist so that the
337 // exit value is propagated.
338 void SIModeRegister::processBlockPhase2(MachineBasicBlock &MBB,
339  const SIInstrInfo *TII) {
340  bool RevisitRequired = false;
341  bool ExitSet = false;
342  unsigned ThisBlock = MBB.getNumber();
343  if (MBB.pred_empty()) {
344  // There are no predecessors, so use the default starting status.
345  BlockInfo[ThisBlock]->Pred = DefaultStatus;
346  ExitSet = true;
347  } else {
348  // Build a status that is common to all the predecessors by intersecting
349  // all the predecessor exit status values.
350  // Mask bits (which represent the Mode bits with a known value) can only be
351  // added by explicit SETREG instructions or the initial default value -
352  // the intersection process may remove Mask bits.
353  // If we find a predecessor that has not yet had an exit value determined
354  // (this can happen for example if a block is its own predecessor) we defer
355  // use of that value as the Mask will be all zero, and we will revisit this
356  // block again later (unless the only predecessor without an exit value is
357  // this block).
359  MachineBasicBlock &PB = *(*P);
360  unsigned PredBlock = PB.getNumber();
361  if ((ThisBlock == PredBlock) && (std::next(P) == E)) {
362  BlockInfo[ThisBlock]->Pred = DefaultStatus;
363  ExitSet = true;
364  } else if (BlockInfo[PredBlock]->ExitSet) {
365  BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit;
366  ExitSet = true;
367  } else if (PredBlock != ThisBlock)
368  RevisitRequired = true;
369 
370  for (P = std::next(P); P != E; P = std::next(P)) {
371  MachineBasicBlock *Pred = *P;
372  unsigned PredBlock = Pred->getNumber();
373  if (BlockInfo[PredBlock]->ExitSet) {
374  if (BlockInfo[ThisBlock]->ExitSet) {
375  BlockInfo[ThisBlock]->Pred =
376  BlockInfo[ThisBlock]->Pred.intersect(BlockInfo[PredBlock]->Exit);
377  } else {
378  BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit;
379  }
380  ExitSet = true;
381  } else if (PredBlock != ThisBlock)
382  RevisitRequired = true;
383  }
384  }
385  Status TmpStatus =
386  BlockInfo[ThisBlock]->Pred.merge(BlockInfo[ThisBlock]->Change);
387  if (BlockInfo[ThisBlock]->Exit != TmpStatus) {
388  BlockInfo[ThisBlock]->Exit = TmpStatus;
389  // Add the successors to the work list so we can propagate the changed exit
390  // status.
391  for (MachineBasicBlock *Succ : MBB.successors())
392  Phase2List.push(Succ);
393  }
394  BlockInfo[ThisBlock]->ExitSet = ExitSet;
395  if (RevisitRequired)
396  Phase2List.push(&MBB);
397 }
398 
399 // In Phase 3 we revisit each block and if it has an insertion point defined we
400 // check whether the predecessor mode meets the block's entry requirements. If
401 // not we insert an appropriate setreg instruction to modify the Mode register.
402 void SIModeRegister::processBlockPhase3(MachineBasicBlock &MBB,
403  const SIInstrInfo *TII) {
404  unsigned ThisBlock = MBB.getNumber();
405  if (!BlockInfo[ThisBlock]->Pred.isCompatible(BlockInfo[ThisBlock]->Require)) {
406  Status Delta =
407  BlockInfo[ThisBlock]->Pred.delta(BlockInfo[ThisBlock]->Require);
408  if (BlockInfo[ThisBlock]->FirstInsertionPoint)
409  insertSetreg(MBB, BlockInfo[ThisBlock]->FirstInsertionPoint, TII, Delta);
410  else
411  insertSetreg(MBB, &MBB.instr_front(), TII, Delta);
412  }
413 }
414 
415 bool SIModeRegister::runOnMachineFunction(MachineFunction &MF) {
416  BlockInfo.resize(MF.getNumBlockIDs());
417  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
418  const SIInstrInfo *TII = ST.getInstrInfo();
419 
420  // Processing is performed in a number of phases
421 
422  // Phase 1 - determine the initial mode required by each block, and add setreg
423  // instructions for intra block requirements.
424  for (MachineBasicBlock &BB : MF)
425  processBlockPhase1(BB, TII);
426 
427  // Phase 2 - determine the exit mode from each block. We add all blocks to the
428  // list here, but will also add any that need to be revisited during Phase 2
429  // processing.
430  for (MachineBasicBlock &BB : MF)
431  Phase2List.push(&BB);
432  while (!Phase2List.empty()) {
433  processBlockPhase2(*Phase2List.front(), TII);
434  Phase2List.pop();
435  }
436 
437  // Phase 3 - add an initial setreg to each block where the required entry mode
438  // is not satisfied by the exit mode of all its predecessors.
439  for (MachineBasicBlock &BB : MF)
440  processBlockPhase3(BB, TII);
441 
442  BlockInfo.clear();
443 
444  return Changed;
445 }
FP_ROUND_ROUND_TO_NEAREST
#define FP_ROUND_ROUND_TO_NEAREST
Definition: SIDefines.h:1020
BlockData::Change
Status Change
Definition: SIModeRegister.cpp:87
llvm::MachineBasicBlock::pred_begin
pred_iterator pred_begin()
Definition: MachineBasicBlock.h:326
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:104
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
Status::mergeUnknown
Status mergeUnknown(unsigned newMask)
Definition: SIModeRegister.cpp:49
llvm::AMDGPU::Hwreg::ID_SHIFT_
@ ID_SHIFT_
Definition: SIDefines.h:411
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Statistic.h
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::MachineFunction::getNumBlockIDs
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID's allocated.
Definition: MachineFunction.h:798
llvm::AMDGPU::Hwreg::ID_MASK_
@ ID_MASK_
Definition: SIDefines.h:413
BlockData::BlockData
BlockData()
Definition: SIModeRegister.cpp:105
BlockData::Pred
Status Pred
Definition: SIModeRegister.cpp:95
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:103
FP_ROUND_ROUND_TO_ZERO
#define FP_ROUND_ROUND_TO_ZERO
Definition: SIDefines.h:1023
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
DEBUG_TYPE
#define DEBUG_TYPE
Definition: SIModeRegister.cpp:23
x3
In x86 we generate this spiffy xmm0 xmm0 ret in x86 we generate this which could be xmm1 movss xmm1 xmm0 ret In sse4 we could use insertps to make both better Here s another testcase that could use x3
Definition: README-SSE.txt:547
GCNSubtarget.h
llvm::AMDGPU::Hwreg::ID_MODE
@ ID_MODE
Definition: SIDefines.h:386
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
INITIALIZE_PASS
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:37
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
BlockData::FirstInsertionPoint
MachineInstr * FirstInsertionPoint
Definition: SIModeRegister.cpp:99
false
Definition: StackSlotColoring.cpp:141
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125
STATISTIC
STATISTIC(NumSetregInserted, "Number of setreg of mode register inserted.")
BlockData::Exit
Status Exit
Definition: SIModeRegister.cpp:91
Status::operator==
bool operator==(const Status &S) const
Definition: SIModeRegister.cpp:66
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
llvm::AMDGPU::Hwreg::OFFSET_MASK_
@ OFFSET_MASK_
Definition: SIDefines.h:420
llvm::AMDGPU::Hwreg::WIDTH_M1_SHIFT_
@ WIDTH_M1_SHIFT_
Definition: SIDefines.h:430
PB
PassBuilder PB(Machine, PassOpts->PTO, None, &PIC)
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:656
BlockData::Require
Status Require
Definition: SIModeRegister.cpp:83
FP_ROUND_ROUND_TO_INF
#define FP_ROUND_ROUND_TO_INF
Definition: SIDefines.h:1021
AMDGPUMCTargetDesc.h
llvm::MachineBasicBlock::pred_end
pred_iterator pred_end()
Definition: MachineBasicBlock.h:328
llvm::AMDGPU::Hwreg::Offset
Offset
Definition: SIDefines.h:416
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
I
#define I(x, y, z)
Definition: MD5.cpp:58
MachineFunctionPass.h
llvm::MachineBasicBlock::pred_iterator
std::vector< MachineBasicBlock * >::iterator pred_iterator
Definition: MachineBasicBlock.h:314
FP_ROUND_ROUND_TO_NEGINF
#define FP_ROUND_ROUND_TO_NEGINF
Definition: SIDefines.h:1022
Mode
SI Whole Quad Mode
Definition: SIWholeQuadMode.cpp:262
llvm::SIModeRegisterID
char & SIModeRegisterID
llvm::MachineBasicBlock::pred_empty
bool pred_empty() const
Definition: MachineBasicBlock.h:341
Status
Definition: SIModeRegister.cpp:29
llvm::MachineFunction
Definition: MachineFunction.h:257
Status::operator!=
bool operator!=(const Status &S) const
Definition: SIModeRegister.cpp:70
llvm::MachineBasicBlock::getNumber
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
Definition: MachineBasicBlock.h:1088
llvm::MachineBasicBlock::successors
iterator_range< succ_iterator > successors()
Definition: MachineBasicBlock.h:365
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:263
AMDGPU.h
Status::Status
Status()
Definition: SIModeRegister.cpp:35
llvm::AMDGPU::Hwreg::OFFSET_SHIFT_
@ OFFSET_SHIFT_
Definition: SIDefines.h:418
Status::Mode
unsigned Mode
Definition: SIModeRegister.cpp:33
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::MachineBasicBlock::instr_front
MachineInstr & instr_front()
Definition: MachineBasicBlock.h:252
FP_ROUND_MODE_DP
#define FP_ROUND_MODE_DP(x)
Definition: SIDefines.h:1028
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
Status::merge
Status merge(const Status &S) const
Definition: SIModeRegister.cpp:43
Status::delta
Status delta(const Status &S) const
Definition: SIModeRegister.cpp:62
Status::intersect
Status intersect(const Status &S) const
Definition: SIModeRegister.cpp:55
llvm::createSIModeRegisterPass
FunctionPass * createSIModeRegisterPass()
Definition: SIModeRegister.cpp:158
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:439
llvm::AMDGPU::Hwreg::WIDTH_M1_MASK_
@ WIDTH_M1_MASK_
Definition: SIDefines.h:432
llvm::SIInstrInfo
Definition: SIInstrInfo.h:44
Status::Status
Status(unsigned NewMask, unsigned NewMode)
Definition: SIModeRegister.cpp:37
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
Status::isCombinable
bool isCombinable(Status &S)
Definition: SIModeRegister.cpp:76
Status::isCompatible
bool isCompatible(Status &S)
Definition: SIModeRegister.cpp:72
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
Status::Mask
unsigned Mask
Definition: SIModeRegister.cpp:32
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
BlockData::ExitSet
bool ExitSet
Definition: SIModeRegister.cpp:103
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38