LLVM  6.0.0svn
NVPTXPeephole.cpp
Go to the documentation of this file.
1 //===-- NVPTXPeephole.cpp - NVPTX Peephole Optimiztions -------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // In NVPTX, NVPTXFrameLowering will emit following instruction at the beginning
11 // of a MachineFunction.
12 //
13 // mov %SPL, %depot
14 // cvta.local %SP, %SPL
15 //
16 // Because Frame Index is a generic address and alloca can only return generic
17 // pointer, without this pass the instructions producing alloca'ed address will
18 // be based on %SP. NVPTXLowerAlloca tends to help replace store and load on
19 // this address with their .local versions, but this may introduce a lot of
20 // cvta.to.local instructions. Performance can be improved if we avoid casting
21 // address back and forth and directly calculate local address based on %SPL.
22 // This peephole pass optimizes these cases, for example
23 //
24 // It will transform the following pattern
25 // %vreg0<def> = LEA_ADDRi64 %VRFrame, 4
26 // %vreg1<def> = cvta_to_local_yes_64 %vreg0
27 //
28 // into
29 // %vreg1<def> = LEA_ADDRi64 %VRFrameLocal, 4
30 //
31 // %VRFrameLocal is the virtual register name of %SPL
32 //
33 //===----------------------------------------------------------------------===//
34 
35 #include "NVPTX.h"
41 
42 using namespace llvm;
43 
44 #define DEBUG_TYPE "nvptx-peephole"
45 
46 namespace llvm {
48 }
49 
50 namespace {
51 struct NVPTXPeephole : public MachineFunctionPass {
52  public:
53  static char ID;
54  NVPTXPeephole() : MachineFunctionPass(ID) {
56  }
57 
58  bool runOnMachineFunction(MachineFunction &MF) override;
59 
60  StringRef getPassName() const override {
61  return "NVPTX optimize redundant cvta.to.local instruction";
62  }
63 
64  void getAnalysisUsage(AnalysisUsage &AU) const override {
66  }
67 };
68 }
69 
70 char NVPTXPeephole::ID = 0;
71 
72 INITIALIZE_PASS(NVPTXPeephole, "nvptx-peephole", "NVPTX Peephole", false, false)
73 
75  auto &MBB = *Root.getParent();
76  auto &MF = *MBB.getParent();
77  // Check current instruction is cvta.to.local
78  if (Root.getOpcode() != NVPTX::cvta_to_local_yes_64 &&
79  Root.getOpcode() != NVPTX::cvta_to_local_yes)
80  return false;
81 
82  auto &Op = Root.getOperand(1);
83  const auto &MRI = MF.getRegInfo();
84  MachineInstr *GenericAddrDef = nullptr;
85  if (Op.isReg() && TargetRegisterInfo::isVirtualRegister(Op.getReg())) {
86  GenericAddrDef = MRI.getUniqueVRegDef(Op.getReg());
87  }
88 
89  // Check the register operand is uniquely defined by LEA_ADDRi instruction
90  if (!GenericAddrDef || GenericAddrDef->getParent() != &MBB ||
91  (GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi64 &&
92  GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi)) {
93  return false;
94  }
95 
96  // Check the LEA_ADDRi operand is Frame index
97  auto &BaseAddrOp = GenericAddrDef->getOperand(1);
98  if (BaseAddrOp.isReg() && BaseAddrOp.getReg() == NVPTX::VRFrame) {
99  return true;
100  }
101 
102  return false;
103 }
104 
105 static void CombineCVTAToLocal(MachineInstr &Root) {
106  auto &MBB = *Root.getParent();
107  auto &MF = *MBB.getParent();
108  const auto &MRI = MF.getRegInfo();
109  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
110  auto &Prev = *MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
111 
112  MachineInstrBuilder MIB =
113  BuildMI(MF, Root.getDebugLoc(), TII->get(Prev.getOpcode()),
114  Root.getOperand(0).getReg())
115  .addReg(NVPTX::VRFrameLocal)
116  .add(Prev.getOperand(2));
117 
118  MBB.insert((MachineBasicBlock::iterator)&Root, MIB);
119 
120  // Check if MRI has only one non dbg use, which is Root
121  if (MRI.hasOneNonDBGUse(Prev.getOperand(0).getReg())) {
122  Prev.eraseFromParentAndMarkDBGValuesForRemoval();
123  }
124  Root.eraseFromParentAndMarkDBGValuesForRemoval();
125 }
126 
127 bool NVPTXPeephole::runOnMachineFunction(MachineFunction &MF) {
128  if (skipFunction(*MF.getFunction()))
129  return false;
130 
131  bool Changed = false;
132  // Loop over all of the basic blocks.
133  for (auto &MBB : MF) {
134  // Traverse the basic block.
135  auto BlockIter = MBB.begin();
136 
137  while (BlockIter != MBB.end()) {
138  auto &MI = *BlockIter++;
141  Changed = true;
142  }
143  } // Instruction
144  } // Basic Block
145 
146  // Remove unnecessary %VRFrame = cvta.local %VRFrameLocal
147  const auto &MRI = MF.getRegInfo();
148  if (MRI.use_empty(NVPTX::VRFrame)) {
149  if (auto MI = MRI.getUniqueVRegDef(NVPTX::VRFrame)) {
150  MI->eraseFromParentAndMarkDBGValuesForRemoval();
151  }
152  }
153 
154  return Changed;
155 }
156 
157 MachineFunctionPass *llvm::createNVPTXPeephole() { return new NVPTXPeephole(); }
const MachineInstrBuilder & add(const MachineOperand &MO) const
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static void CombineCVTAToLocal(MachineInstr &Root)
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:268
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:290
TargetInstrInfo - Interface to description of machine instruction set.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static bool isCVTAToLocalCombinationCandidate(MachineInstr &Root)
unsigned const MachineRegisterInfo * MRI
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Represent the analysis usage information of a pass.
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:34
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:139
Representation of each machine instruction.
Definition: MachineInstr.h:59
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineFunctionPass * createNVPTXPeephole()
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
void initializeNVPTXPeepholePass(PassRegistry &)
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:39
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:295