LLVM  9.0.0svn
SIAddIMGInit.cpp
Go to the documentation of this file.
1 //===-- SIAddIMGInit.cpp - Add any required IMG inits ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Any MIMG instructions that use tfe or lwe require an initialization of the
11 /// result register that will be written in the case of a memory access failure
12 /// The required code is also added to tie this init code to the result of the
13 /// img instruction
14 ///
15 //===----------------------------------------------------------------------===//
16 //
17 
18 #include "AMDGPU.h"
19 #include "AMDGPUSubtarget.h"
21 #include "SIInstrInfo.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/Support/Debug.h"
28 
29 #define DEBUG_TYPE "si-img-init"
30 
31 using namespace llvm;
32 
33 namespace {
34 
35 class SIAddIMGInit : public MachineFunctionPass {
36 public:
37  static char ID;
38 
39 public:
40  SIAddIMGInit() : MachineFunctionPass(ID) {
42  }
43 
44  bool runOnMachineFunction(MachineFunction &MF) override;
45 
46  void getAnalysisUsage(AnalysisUsage &AU) const override {
47  AU.setPreservesCFG();
49  }
50 };
51 
52 } // End anonymous namespace.
53 
54 INITIALIZE_PASS(SIAddIMGInit, DEBUG_TYPE, "SI Add IMG Init", false, false)
55 
56 char SIAddIMGInit::ID = 0;
57 
59 
60 FunctionPass *llvm::createSIAddIMGInitPass() { return new SIAddIMGInit(); }
61 
62 bool SIAddIMGInit::runOnMachineFunction(MachineFunction &MF) {
65  const SIInstrInfo *TII = ST.getInstrInfo();
66  const SIRegisterInfo *RI = ST.getRegisterInfo();
67  bool Changed = false;
68 
69  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
70  ++BI) {
71  MachineBasicBlock &MBB = *BI;
73  for (I = MBB.begin(); I != MBB.end(); I = Next) {
74  Next = std::next(I);
75  MachineInstr &MI = *I;
76 
77  auto Opcode = MI.getOpcode();
78  if (TII->isMIMG(Opcode) && !MI.mayStore()) {
79  MachineOperand *TFE = TII->getNamedOperand(MI, AMDGPU::OpName::tfe);
80  MachineOperand *LWE = TII->getNamedOperand(MI, AMDGPU::OpName::lwe);
81  MachineOperand *D16 = TII->getNamedOperand(MI, AMDGPU::OpName::d16);
82 
83  // Check for instructions that don't have tfe or lwe fields
84  // There shouldn't be any at this point.
85  assert( (TFE && LWE) && "Expected tfe and lwe operands in instruction");
86 
87  unsigned TFEVal = TFE->getImm();
88  unsigned LWEVal = LWE->getImm();
89  unsigned D16Val = D16 ? D16->getImm() : 0;
90 
91  if (TFEVal || LWEVal) {
92  // At least one of TFE or LWE are non-zero
93  // We have to insert a suitable initialization of the result value and
94  // tie this to the dest of the image instruction.
95 
96  const DebugLoc &DL = MI.getDebugLoc();
97 
98  int DstIdx =
99  AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata);
100 
101  // Calculate which dword we have to initialize to 0.
102  MachineOperand *MO_Dmask =
103  TII->getNamedOperand(MI, AMDGPU::OpName::dmask);
104 
105  // check that dmask operand is found.
106  assert(MO_Dmask && "Expected dmask operand in instruction");
107 
108  unsigned dmask = MO_Dmask->getImm();
109  // Determine the number of active lanes taking into account the
110  // Gather4 special case
111  unsigned ActiveLanes =
112  TII->isGather4(Opcode) ? 4 : countPopulation(dmask);
113 
114  // Subreg indices are counted from 1
115  // When D16 then we want next whole VGPR after write data.
116  static_assert(AMDGPU::sub0 == 1 && AMDGPU::sub4 == 5, "Subreg indices different from expected");
117 
118  bool Packed = !ST.hasUnpackedD16VMem();
119 
120  unsigned InitIdx =
121  D16Val && Packed ? ((ActiveLanes + 1) >> 1) + 1 : ActiveLanes + 1;
122 
123  // Abandon attempt if the dst size isn't large enough
124  // - this is in fact an error but this is picked up elsewhere and
125  // reported correctly.
126  uint32_t DstSize =
127  RI->getRegSizeInBits(*TII->getOpRegClass(MI, DstIdx)) / 32;
128  if (DstSize < InitIdx)
129  continue;
130 
131  // Create a register for the intialization value.
132  unsigned PrevDst =
133  MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
134  unsigned NewDst = 0; // Final initialized value will be in here
135 
136  // If PRTStrictNull feature is enabled (the default) then initialize
137  // all the result registers to 0, otherwise just the error indication
138  // register (VGPRn+1)
139  unsigned SizeLeft = ST.usePRTStrictNull() ? InitIdx : 1;
140  unsigned CurrIdx = ST.usePRTStrictNull() ? 1 : InitIdx;
141 
142  if (DstSize == 1) {
143  // In this case we can just initialize the result directly
144  BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), PrevDst)
145  .addImm(0);
146  NewDst = PrevDst;
147  } else {
148  BuildMI(MBB, MI, DL, TII->get(AMDGPU::IMPLICIT_DEF), PrevDst);
149  for (; SizeLeft; SizeLeft--, CurrIdx++) {
150  NewDst =
151  MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
152  // Initialize dword
153  unsigned SubReg =
154  MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
155  BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), SubReg)
156  .addImm(0);
157  // Insert into the super-reg
158  BuildMI(MBB, I, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewDst)
159  .addReg(PrevDst)
160  .addReg(SubReg)
161  .addImm(CurrIdx);
162 
163  PrevDst = NewDst;
164  }
165  }
166 
167  // Add as an implicit operand
169 
170  // Tie the just added implicit operand to the dst
171  MI.tieOperands(DstIdx, MI.getNumOperands() - 1);
172 
173  Changed = true;
174  }
175  }
176  }
177  }
178 
179  return Changed;
180 }
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
AMDGPU specific subclass of TargetSubtarget.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:382
const SIInstrInfo * getInstrInfo() const override
A debug info location.
Definition: DebugLoc.h:33
FunctionPass * createSIAddIMGInitPass()
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:411
unsigned SubReg
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:408
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1165
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:819
unsigned const MachineRegisterInfo * MRI
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
bool usePRTStrictNull() const
Represent the analysis usage information of a pass.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
void initializeSIAddIMGInitPass(PassRegistry &)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:33
bool hasUnpackedD16VMem() const
Iterator for intrusive lists based on ilist_node.
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:519
MachineOperand class - Representation of each machine instruction operand.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:285
int64_t getImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:63
Interface definition for SIInstrInfo.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
#define I(x, y, z)
Definition: MD5.cpp:58
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
IRTranslator LLVM IR MI
#define DEBUG_TYPE
char & SIAddIMGInitID
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
const SIRegisterInfo * getRegisterInfo() const override