LLVM 20.0.0git
NVPTXPeephole.cpp
Go to the documentation of this file.
1//===-- NVPTXPeephole.cpp - NVPTX Peephole Optimiztions -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// In NVPTX, NVPTXFrameLowering will emit following instruction at the beginning
10// of a MachineFunction.
11//
12// mov %SPL, %depot
13// cvta.local %SP, %SPL
14//
15// Because Frame Index is a generic address and alloca can only return generic
16// pointer, without this pass the instructions producing alloca'ed address will
17// be based on %SP. NVPTXLowerAlloca tends to help replace store and load on
18// this address with their .local versions, but this may introduce a lot of
19// cvta.to.local instructions. Performance can be improved if we avoid casting
20// address back and forth and directly calculate local address based on %SPL.
21// This peephole pass optimizes these cases, for example
22//
23// It will transform the following pattern
24// %0 = LEA_ADDRi64 %VRFrame64, 4
25// %1 = cvta_to_local_64 %0
26//
27// into
28// %1 = LEA_ADDRi64 %VRFrameLocal64, 4
29//
30// %VRFrameLocal64 is the virtual register name of %SPL
31//
32//===----------------------------------------------------------------------===//
33
34#include "NVPTX.h"
35#include "NVPTXRegisterInfo.h"
36#include "NVPTXSubtarget.h"
42
43using namespace llvm;
44
45#define DEBUG_TYPE "nvptx-peephole"
46
47namespace llvm {
49}
50
51namespace {
52struct NVPTXPeephole : public MachineFunctionPass {
53 public:
54 static char ID;
55 NVPTXPeephole() : MachineFunctionPass(ID) {
57 }
58
59 bool runOnMachineFunction(MachineFunction &MF) override;
60
61 StringRef getPassName() const override {
62 return "NVPTX optimize redundant cvta.to.local instruction";
63 }
64
65 void getAnalysisUsage(AnalysisUsage &AU) const override {
67 }
68};
69}
70
71char NVPTXPeephole::ID = 0;
72
73INITIALIZE_PASS(NVPTXPeephole, "nvptx-peephole", "NVPTX Peephole", false, false)
74
76 auto &MBB = *Root.getParent();
77 auto &MF = *MBB.getParent();
78 // Check current instruction is cvta.to.local
79 if (Root.getOpcode() != NVPTX::cvta_to_local_64 &&
80 Root.getOpcode() != NVPTX::cvta_to_local)
81 return false;
82
83 auto &Op = Root.getOperand(1);
84 const auto &MRI = MF.getRegInfo();
85 MachineInstr *GenericAddrDef = nullptr;
86 if (Op.isReg() && Op.getReg().isVirtual()) {
87 GenericAddrDef = MRI.getUniqueVRegDef(Op.getReg());
88 }
89
90 // Check the register operand is uniquely defined by LEA_ADDRi instruction
91 if (!GenericAddrDef || GenericAddrDef->getParent() != &MBB ||
92 (GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi64 &&
93 GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi)) {
94 return false;
95 }
96
97 const NVPTXRegisterInfo *NRI =
98 MF.getSubtarget<NVPTXSubtarget>().getRegisterInfo();
99
100 // Check the LEA_ADDRi operand is Frame index
101 auto &BaseAddrOp = GenericAddrDef->getOperand(1);
102 if (BaseAddrOp.isReg() && BaseAddrOp.getReg() == NRI->getFrameRegister(MF)) {
103 return true;
104 }
105
106 return false;
107}
108
110 auto &MBB = *Root.getParent();
111 auto &MF = *MBB.getParent();
112 const auto &MRI = MF.getRegInfo();
113 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
114 auto &Prev = *MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
115
116 const NVPTXRegisterInfo *NRI =
117 MF.getSubtarget<NVPTXSubtarget>().getRegisterInfo();
118
120 BuildMI(MF, Root.getDebugLoc(), TII->get(Prev.getOpcode()),
121 Root.getOperand(0).getReg())
123 .add(Prev.getOperand(2));
124
126
127 // Check if MRI has only one non dbg use, which is Root
128 if (MRI.hasOneNonDBGUse(Prev.getOperand(0).getReg())) {
129 Prev.eraseFromParent();
130 }
131 Root.eraseFromParent();
132}
133
134bool NVPTXPeephole::runOnMachineFunction(MachineFunction &MF) {
135 if (skipFunction(MF.getFunction()))
136 return false;
137
138 bool Changed = false;
139 // Loop over all of the basic blocks.
140 for (auto &MBB : MF) {
141 // Traverse the basic block.
142 auto BlockIter = MBB.begin();
143
144 while (BlockIter != MBB.end()) {
145 auto &MI = *BlockIter++;
148 Changed = true;
149 }
150 } // Instruction
151 } // Basic Block
152
153 const NVPTXRegisterInfo *NRI =
154 MF.getSubtarget<NVPTXSubtarget>().getRegisterInfo();
155
156 // Remove unnecessary %VRFrame = cvta.local %VRFrameLocal
157 const auto &MRI = MF.getRegInfo();
158 if (MRI.use_empty(NRI->getFrameRegister(MF))) {
159 if (auto MI = MRI.getUniqueVRegDef(NRI->getFrameRegister(MF))) {
160 MI->eraseFromParent();
161 }
162 }
163
164 return Changed;
165}
166
167MachineFunctionPass *llvm::createNVPTXPeephole() { return new NVPTXPeephole(); }
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock & MBB
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static bool isCVTAToLocalCombinationCandidate(MachineInstr &Root)
static void CombineCVTAToLocal(MachineInstr &Root)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
Represent the analysis usage information of a pass.
This class represents an Operation in the Expression.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:575
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:347
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:499
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
Register getReg() const
getReg - Returns the register number.
Register getFrameLocalRegister(const MachineFunction &MF) const
Register getFrameRegister(const MachineFunction &MF) const override
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:37
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
TargetInstrInfo - Interface to description of machine instruction set.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
MachineFunctionPass * createNVPTXPeephole()
void initializeNVPTXPeepholePass(PassRegistry &)