LLVM 20.0.0git
MIRCanonicalizerPass.cpp
Go to the documentation of this file.
1//===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// The purpose of this pass is to employ a canonical code transformation so
10// that code compiled with slightly different IR passes can be diffed more
11// effectively than otherwise. This is done by renaming vregs in a given
12// LiveRange in a canonical way. This pass also does a pseudo-scheduling to
13// move defs closer to their use inorder to reduce diffs caused by slightly
14// different schedules.
15//
16// Basic Usage:
17//
18// llc -o - -run-pass mir-canonicalizer example.mir
19//
20// Reorders instructions canonically.
21// Renames virtual register operands canonically.
22// Strips certain MIR artifacts (optionally).
23//
24//===----------------------------------------------------------------------===//
25
26#include "MIRVRegNamerUtils.h"
28#include "llvm/ADT/STLExtras.h"
32#include "llvm/Pass.h"
33#include "llvm/Support/Debug.h"
35
36using namespace llvm;
37
38#define DEBUG_TYPE "mir-canonicalizer"
39
41 CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u),
42 cl::value_desc("N"),
43 cl::desc("Function number to canonicalize."));
44
45namespace {
46
47class MIRCanonicalizer : public MachineFunctionPass {
48public:
49 static char ID;
50 MIRCanonicalizer() : MachineFunctionPass(ID) {}
51
52 StringRef getPassName() const override {
53 return "Rename register operands in a canonical ordering.";
54 }
55
56 void getAnalysisUsage(AnalysisUsage &AU) const override {
57 AU.setPreservesCFG();
59 }
60
61 bool runOnMachineFunction(MachineFunction &MF) override;
62};
63
64} // end anonymous namespace
65
66char MIRCanonicalizer::ID;
67
68char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID;
69
70INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer",
71 "Rename Register Operands Canonically", false, false)
72
73INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer",
75
77 if (MF.empty())
78 return {};
80 std::vector<MachineBasicBlock *> RPOList;
81 append_range(RPOList, RPOT);
82
83 return RPOList;
84}
85
86static bool
87rescheduleLexographically(std::vector<MachineInstr *> instructions,
89 std::function<MachineBasicBlock::iterator()> getPos) {
90
91 bool Changed = false;
92 using StringInstrPair = std::pair<std::string, MachineInstr *>;
93 std::vector<StringInstrPair> StringInstrMap;
94
95 for (auto *II : instructions) {
96 std::string S;
98 II->print(OS);
99 OS.flush();
100
101 // Trim the assignment, or start from the beginning in the case of a store.
102 const size_t i = S.find('=');
103 StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II});
104 }
105
106 llvm::sort(StringInstrMap, llvm::less_first());
107
108 for (auto &II : StringInstrMap) {
109
110 LLVM_DEBUG({
111 dbgs() << "Splicing ";
112 II.second->dump();
113 dbgs() << " right before: ";
114 getPos()->dump();
115 });
116
117 Changed = true;
118 MBB->splice(getPos(), MBB, II.second);
119 }
120
121 return Changed;
122}
123
124static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
126
127 bool Changed = false;
128
129 // Calculates the distance of MI from the beginning of its parent BB.
130 auto getInstrIdx = [](const MachineInstr &MI) {
131 unsigned i = 0;
132 for (const auto &CurMI : *MI.getParent()) {
133 if (&CurMI == &MI)
134 return i;
135 i++;
136 }
137 return ~0U;
138 };
139
140 // Pre-Populate vector of instructions to reschedule so that we don't
141 // clobber the iterator.
142 std::vector<MachineInstr *> Instructions;
143 for (auto &MI : *MBB) {
144 Instructions.push_back(&MI);
145 }
146
147 std::map<MachineInstr *, std::vector<MachineInstr *>> MultiUsers;
148 std::map<unsigned, MachineInstr *> MultiUserLookup;
149 unsigned UseToBringDefCloserToCount = 0;
150 std::vector<MachineInstr *> PseudoIdempotentInstructions;
151 std::vector<unsigned> PhysRegDefs;
152 for (auto *II : Instructions) {
153 for (unsigned i = 1; i < II->getNumOperands(); i++) {
154 MachineOperand &MO = II->getOperand(i);
155 if (!MO.isReg())
156 continue;
157
158 if (MO.getReg().isVirtual())
159 continue;
160
161 if (!MO.isDef())
162 continue;
163
164 PhysRegDefs.push_back(MO.getReg());
165 }
166 }
167
168 for (auto *II : Instructions) {
169 if (II->getNumOperands() == 0)
170 continue;
171 if (II->mayLoadOrStore())
172 continue;
173
174 MachineOperand &MO = II->getOperand(0);
175 if (!MO.isReg() || !MO.getReg().isVirtual())
176 continue;
177 if (!MO.isDef())
178 continue;
179
180 bool IsPseudoIdempotent = true;
181 for (unsigned i = 1; i < II->getNumOperands(); i++) {
182
183 if (II->getOperand(i).isImm()) {
184 continue;
185 }
186
187 if (II->getOperand(i).isReg()) {
188 if (!II->getOperand(i).getReg().isVirtual())
189 if (!llvm::is_contained(PhysRegDefs, II->getOperand(i).getReg())) {
190 continue;
191 }
192 }
193
194 IsPseudoIdempotent = false;
195 break;
196 }
197
198 if (IsPseudoIdempotent) {
199 PseudoIdempotentInstructions.push_back(II);
200 continue;
201 }
202
203 LLVM_DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump(););
204
205 MachineInstr *Def = II;
206 unsigned Distance = ~0U;
207 MachineInstr *UseToBringDefCloserTo = nullptr;
209 for (auto &UO : MRI->use_nodbg_operands(MO.getReg())) {
210 MachineInstr *UseInst = UO.getParent();
211
212 const unsigned DefLoc = getInstrIdx(*Def);
213 const unsigned UseLoc = getInstrIdx(*UseInst);
214 const unsigned Delta = (UseLoc - DefLoc);
215
216 if (UseInst->getParent() != Def->getParent())
217 continue;
218 if (DefLoc >= UseLoc)
219 continue;
220
221 if (Delta < Distance) {
222 Distance = Delta;
223 UseToBringDefCloserTo = UseInst;
224 MultiUserLookup[UseToBringDefCloserToCount++] = UseToBringDefCloserTo;
225 }
226 }
227
228 const auto BBE = MBB->instr_end();
231
232 for (auto BBI = MBB->instr_begin(); BBI != BBE; ++BBI) {
233
234 if (DefI != BBE && UseI != BBE)
235 break;
236
237 if (&*BBI == Def) {
238 DefI = BBI;
239 continue;
240 }
241
242 if (&*BBI == UseToBringDefCloserTo) {
243 UseI = BBI;
244 continue;
245 }
246 }
247
248 if (DefI == BBE || UseI == BBE)
249 continue;
250
251 LLVM_DEBUG({
252 dbgs() << "Splicing ";
253 DefI->dump();
254 dbgs() << " right before: ";
255 UseI->dump();
256 });
257
258 MultiUsers[UseToBringDefCloserTo].push_back(Def);
259 Changed = true;
260 MBB->splice(UseI, MBB, DefI);
261 }
262
263 // Sort the defs for users of multiple defs lexographically.
264 for (const auto &E : MultiUserLookup) {
265
266 auto UseI = llvm::find_if(MBB->instrs(), [&](MachineInstr &MI) -> bool {
267 return &MI == E.second;
268 });
269
270 if (UseI == MBB->instr_end())
271 continue;
272
274 dbgs() << "Rescheduling Multi-Use Instructions Lexographically.");
275 Changed |= rescheduleLexographically(
276 MultiUsers[E.second], MBB,
277 [&]() -> MachineBasicBlock::iterator { return UseI; });
278 }
279
280 PseudoIdempotentInstCount = PseudoIdempotentInstructions.size();
281 LLVM_DEBUG(dbgs() << "Rescheduling Idempotent Instructions Lexographically.");
282 Changed |= rescheduleLexographically(
283 PseudoIdempotentInstructions, MBB,
284 [&]() -> MachineBasicBlock::iterator { return MBB->begin(); });
285
286 return Changed;
287}
288
290 bool Changed = false;
292
293 std::vector<MachineInstr *> Copies;
294 for (MachineInstr &MI : MBB->instrs()) {
295 if (MI.isCopy())
296 Copies.push_back(&MI);
297 }
298
299 for (MachineInstr *MI : Copies) {
300
301 if (!MI->getOperand(0).isReg())
302 continue;
303 if (!MI->getOperand(1).isReg())
304 continue;
305
306 const Register Dst = MI->getOperand(0).getReg();
307 const Register Src = MI->getOperand(1).getReg();
308
309 if (!Dst.isVirtual())
310 continue;
311 if (!Src.isVirtual())
312 continue;
313 // Not folding COPY instructions if regbankselect has not set the RCs.
314 // Why are we only considering Register Classes? Because the verifier
315 // sometimes gets upset if the register classes don't match even if the
316 // types do. A future patch might add COPY folding for matching types in
317 // pre-registerbankselect code.
318 if (!MRI.getRegClassOrNull(Dst))
319 continue;
320 if (MRI.getRegClass(Dst) != MRI.getRegClass(Src))
321 continue;
322
323 std::vector<MachineOperand *> Uses;
324 for (MachineOperand &MO : MRI.use_operands(Dst))
325 Uses.push_back(&MO);
326 for (auto *MO : Uses)
327 MO->setReg(Src);
328
329 Changed = true;
330 MI->eraseFromParent();
331 }
332
333 return Changed;
334}
335
337 bool Changed = false;
338
339 for (auto &MI : *MBB) {
340 for (auto &MO : MI.operands()) {
341 if (!MO.isReg())
342 continue;
343 if (!MO.isDef() && MO.isKill()) {
344 Changed = true;
345 MO.setIsKill(false);
346 }
347
348 if (MO.isDef() && MO.isDead()) {
349 Changed = true;
350 MO.setIsDead(false);
351 }
352 }
353 }
354
355 return Changed;
356}
357
359 unsigned BasicBlockNum, VRegRenamer &Renamer) {
360 LLVM_DEBUG({
361 dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << " \n\n";
362 dbgs() << "\n\n================================================\n\n";
363 });
364
365 bool Changed = false;
366
367 LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n");
368
369 LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n";
370 MBB->dump(););
371 Changed |= propagateLocalCopies(MBB);
372 LLVM_DEBUG(dbgs() << "MBB After Canonical Copy Propagation:\n"; MBB->dump(););
373
374 LLVM_DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump(););
375 unsigned IdempotentInstCount = 0;
376 Changed |= rescheduleCanonically(IdempotentInstCount, MBB);
377 LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump(););
378
379 Changed |= Renamer.renameVRegs(MBB, BasicBlockNum);
380
381 // TODO: Consider dropping this. Dropping kill defs is probably not
382 // semantically sound.
383 Changed |= doDefKillClear(MBB);
384
385 LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump();
386 dbgs() << "\n");
388 dbgs() << "\n\n================================================\n\n");
389 return Changed;
390}
391
392bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) {
393
394 static unsigned functionNum = 0;
395 if (CanonicalizeFunctionNumber != ~0U) {
396 if (CanonicalizeFunctionNumber != functionNum++)
397 return false;
398 LLVM_DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName()
399 << "\n";);
400 }
401
402 // we need a valid vreg to create a vreg type for skipping all those
403 // stray vreg numbers so reach alignment/canonical vreg values.
404 std::vector<MachineBasicBlock *> RPOList = GetRPOList(MF);
405
407 dbgs() << "\n\n NEW MACHINE FUNCTION: " << MF.getName() << " \n\n";
408 dbgs() << "\n\n================================================\n\n";
409 dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n";
410 for (auto MBB
411 : RPOList) { dbgs() << MBB->getName() << "\n"; } dbgs()
412 << "\n\n================================================\n\n";);
413
414 unsigned BBNum = 0;
415 bool Changed = false;
417 VRegRenamer Renamer(MRI);
418 for (auto *MBB : RPOList)
419 Changed |= runOnBasicBlock(MBB, BBNum++, Renamer);
420
421 return Changed;
422}
unsigned const MachineRegisterInfo * MRI
AMDGPU promote alloca to vector or false DEBUG_TYPE to vector
MachineBasicBlock & MBB
Expand Atomic instructions
#define LLVM_DEBUG(...)
Definition: Debug.h:106
IRTranslator LLVM IR MI
static bool runOnBasicBlock(MachineBasicBlock *MBB, unsigned BasicBlockNum, VRegRenamer &Renamer)
static bool rescheduleLexographically(std::vector< MachineInstr * > instructions, MachineBasicBlock *MBB, std::function< MachineBasicBlock::iterator()> getPos)
static cl::opt< unsigned > CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u), cl::value_desc("N"), cl::desc("Function number to canonicalize."))
static bool doDefKillClear(MachineBasicBlock *MBB)
static bool propagateLocalCopies(MachineBasicBlock *MBB)
mir canonicalizer
mir Rename Register Operands Canonically
static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount, MachineBasicBlock *MBB)
mir Rename Register Operands static false std::vector< MachineBasicBlock * > GetRPOList(MachineFunction &MF)
mir Rename Register Operands
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
Remove Loads Into Fake Uses
SI Lower i1 Copies
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
instr_iterator instr_begin()
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:347
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
VRegRenamer - This class is used for renaming vregs in a machine basic block according to semantics o...
bool renameVRegs(MachineBasicBlock *MBB, unsigned BBNum)
Same as the above, but sets a BBNum depending on BB traversal that will be used as prefix for the vre...
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2115
char & MIRCanonicalizerID
MIRCanonicalizer - This pass canonicalizes MIR by renaming vregs according to the semantics of the in...
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1664
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1766
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
Function object to check whether the first component of a container supported by std::get (like std::...
Definition: STLExtras.h:1467