LLVM  10.0.0svn
MIRCanonicalizerPass.cpp
Go to the documentation of this file.
1 //===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The purpose of this pass is to employ a canonical code transformation so
10 // that code compiled with slightly different IR passes can be diffed more
11 // effectively than otherwise. This is done by renaming vregs in a given
12 // LiveRange in a canonical way. This pass also does a pseudo-scheduling to
13 // move defs closer to their use inorder to reduce diffs caused by slightly
14 // different schedules.
15 //
16 // Basic Usage:
17 //
18 // llc -o - -run-pass mir-canonicalizer example.mir
19 //
20 // Reorders instructions canonically.
21 // Renames virtual register operands canonically.
22 // Strips certain MIR artifacts (optionally).
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "MIRVRegNamerUtils.h"
28 #include "llvm/ADT/STLExtras.h"
32 #include "llvm/CodeGen/Passes.h"
33 #include "llvm/Support/Debug.h"
35 
36 #include <queue>
37 
38 using namespace llvm;
39 
40 namespace llvm {
41 extern char &MIRCanonicalizerID;
42 } // namespace llvm
43 
44 #define DEBUG_TYPE "mir-canonicalizer"
45 
46 static cl::opt<unsigned>
47  CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u),
48  cl::value_desc("N"),
49  cl::desc("Function number to canonicalize."));
50 
52  "canon-nth-basicblock", cl::Hidden, cl::init(~0u), cl::value_desc("N"),
53  cl::desc("BasicBlock number to canonicalize."));
54 
55 namespace {
56 
57 class MIRCanonicalizer : public MachineFunctionPass {
58 public:
59  static char ID;
60  MIRCanonicalizer() : MachineFunctionPass(ID) {}
61 
62  StringRef getPassName() const override {
63  return "Rename register operands in a canonical ordering.";
64  }
65 
66  void getAnalysisUsage(AnalysisUsage &AU) const override {
67  AU.setPreservesCFG();
69  }
70 
71  bool runOnMachineFunction(MachineFunction &MF) override;
72 };
73 
74 } // end anonymous namespace
75 
77 
79 
80 INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer",
81  "Rename Register Operands Canonically", false, false)
82 
83 INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer",
84  "Rename Register Operands Canonically", false, false)
85 
87  if (MF.empty())
88  return {};
90  std::vector<MachineBasicBlock *> RPOList;
91  for (auto MBB : RPOT) {
92  RPOList.push_back(MBB);
93  }
94 
95  return RPOList;
96 }
97 
98 static bool
99 rescheduleLexographically(std::vector<MachineInstr *> instructions,
100  MachineBasicBlock *MBB,
102 
103  bool Changed = false;
104  using StringInstrPair = std::pair<std::string, MachineInstr *>;
105  std::vector<StringInstrPair> StringInstrMap;
106 
107  for (auto *II : instructions) {
108  std::string S;
109  raw_string_ostream OS(S);
110  II->print(OS);
111  OS.flush();
112 
113  // Trim the assignment, or start from the begining in the case of a store.
114  const size_t i = S.find("=");
115  StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II});
116  }
117 
118  llvm::sort(StringInstrMap,
119  [](const StringInstrPair &a, const StringInstrPair &b) -> bool {
120  return (a.first < b.first);
121  });
122 
123  for (auto &II : StringInstrMap) {
124 
125  LLVM_DEBUG({
126  dbgs() << "Splicing ";
127  II.second->dump();
128  dbgs() << " right before: ";
129  getPos()->dump();
130  });
131 
132  Changed = true;
133  MBB->splice(getPos(), MBB, II.second);
134  }
135 
136  return Changed;
137 }
138 
139 static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
140  MachineBasicBlock *MBB) {
141 
142  bool Changed = false;
143 
144  // Calculates the distance of MI from the begining of its parent BB.
145  auto getInstrIdx = [](const MachineInstr &MI) {
146  unsigned i = 0;
147  for (auto &CurMI : *MI.getParent()) {
148  if (&CurMI == &MI)
149  return i;
150  i++;
151  }
152  return ~0U;
153  };
154 
155  // Pre-Populate vector of instructions to reschedule so that we don't
156  // clobber the iterator.
157  std::vector<MachineInstr *> Instructions;
158  for (auto &MI : *MBB) {
159  Instructions.push_back(&MI);
160  }
161 
162  std::map<MachineInstr *, std::vector<MachineInstr *>> MultiUsers;
163  std::map<unsigned, MachineInstr *> MultiUserLookup;
164  unsigned UseToBringDefCloserToCount = 0;
165  std::vector<MachineInstr *> PseudoIdempotentInstructions;
166  std::vector<unsigned> PhysRegDefs;
167  for (auto *II : Instructions) {
168  for (unsigned i = 1; i < II->getNumOperands(); i++) {
169  MachineOperand &MO = II->getOperand(i);
170  if (!MO.isReg())
171  continue;
172 
174  continue;
175 
176  if (!MO.isDef())
177  continue;
178 
179  PhysRegDefs.push_back(MO.getReg());
180  }
181  }
182 
183  for (auto *II : Instructions) {
184  if (II->getNumOperands() == 0)
185  continue;
186  if (II->mayLoadOrStore())
187  continue;
188 
189  MachineOperand &MO = II->getOperand(0);
190  if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
191  continue;
192  if (!MO.isDef())
193  continue;
194 
195  bool IsPseudoIdempotent = true;
196  for (unsigned i = 1; i < II->getNumOperands(); i++) {
197 
198  if (II->getOperand(i).isImm()) {
199  continue;
200  }
201 
202  if (II->getOperand(i).isReg()) {
203  if (!Register::isVirtualRegister(II->getOperand(i).getReg()))
204  if (llvm::find(PhysRegDefs, II->getOperand(i).getReg()) ==
205  PhysRegDefs.end()) {
206  continue;
207  }
208  }
209 
210  IsPseudoIdempotent = false;
211  break;
212  }
213 
214  if (IsPseudoIdempotent) {
215  PseudoIdempotentInstructions.push_back(II);
216  continue;
217  }
218 
219  LLVM_DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump(););
220 
221  MachineInstr *Def = II;
222  unsigned Distance = ~0U;
223  MachineInstr *UseToBringDefCloserTo = nullptr;
224  MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo();
225  for (auto &UO : MRI->use_nodbg_operands(MO.getReg())) {
226  MachineInstr *UseInst = UO.getParent();
227 
228  const unsigned DefLoc = getInstrIdx(*Def);
229  const unsigned UseLoc = getInstrIdx(*UseInst);
230  const unsigned Delta = (UseLoc - DefLoc);
231 
232  if (UseInst->getParent() != Def->getParent())
233  continue;
234  if (DefLoc >= UseLoc)
235  continue;
236 
237  if (Delta < Distance) {
238  Distance = Delta;
239  UseToBringDefCloserTo = UseInst;
240  MultiUserLookup[UseToBringDefCloserToCount++] = UseToBringDefCloserTo;
241  }
242  }
243 
244  const auto BBE = MBB->instr_end();
245  MachineBasicBlock::iterator DefI = BBE;
246  MachineBasicBlock::iterator UseI = BBE;
247 
248  for (auto BBI = MBB->instr_begin(); BBI != BBE; ++BBI) {
249 
250  if (DefI != BBE && UseI != BBE)
251  break;
252 
253  if (&*BBI == Def) {
254  DefI = BBI;
255  continue;
256  }
257 
258  if (&*BBI == UseToBringDefCloserTo) {
259  UseI = BBI;
260  continue;
261  }
262  }
263 
264  if (DefI == BBE || UseI == BBE)
265  continue;
266 
267  LLVM_DEBUG({
268  dbgs() << "Splicing ";
269  DefI->dump();
270  dbgs() << " right before: ";
271  UseI->dump();
272  });
273 
274  MultiUsers[UseToBringDefCloserTo].push_back(Def);
275  Changed = true;
276  MBB->splice(UseI, MBB, DefI);
277  }
278 
279  // Sort the defs for users of multiple defs lexographically.
280  for (const auto &E : MultiUserLookup) {
281 
282  auto UseI =
283  std::find_if(MBB->instr_begin(), MBB->instr_end(),
284  [&](MachineInstr &MI) -> bool { return &MI == E.second; });
285 
286  if (UseI == MBB->instr_end())
287  continue;
288 
289  LLVM_DEBUG(
290  dbgs() << "Rescheduling Multi-Use Instructions Lexographically.";);
291  Changed |= rescheduleLexographically(
292  MultiUsers[E.second], MBB,
293  [&]() -> MachineBasicBlock::iterator { return UseI; });
294  }
295 
296  PseudoIdempotentInstCount = PseudoIdempotentInstructions.size();
297  LLVM_DEBUG(
298  dbgs() << "Rescheduling Idempotent Instructions Lexographically.";);
299  Changed |= rescheduleLexographically(
300  PseudoIdempotentInstructions, MBB,
301  [&]() -> MachineBasicBlock::iterator { return MBB->begin(); });
302 
303  return Changed;
304 }
305 
307  bool Changed = false;
309 
310  std::vector<MachineInstr *> Copies;
311  for (MachineInstr &MI : MBB->instrs()) {
312  if (MI.isCopy())
313  Copies.push_back(&MI);
314  }
315 
316  for (MachineInstr *MI : Copies) {
317 
318  if (!MI->getOperand(0).isReg())
319  continue;
320  if (!MI->getOperand(1).isReg())
321  continue;
322 
323  const Register Dst = MI->getOperand(0).getReg();
324  const Register Src = MI->getOperand(1).getReg();
325 
326  if (!Register::isVirtualRegister(Dst))
327  continue;
328  if (!Register::isVirtualRegister(Src))
329  continue;
330  // Not folding COPY instructions if regbankselect has not set the RCs.
331  // Why are we only considering Register Classes? Because the verifier
332  // sometimes gets upset if the register classes don't match even if the
333  // types do. A future patch might add COPY folding for matching types in
334  // pre-registerbankselect code.
335  if (!MRI.getRegClassOrNull(Dst))
336  continue;
337  if (MRI.getRegClass(Dst) != MRI.getRegClass(Src))
338  continue;
339 
340  std::vector<MachineOperand *> Uses;
341  for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI)
342  Uses.push_back(&*UI);
343  for (auto *MO : Uses)
344  MO->setReg(Src);
345 
346  Changed = true;
347  MI->eraseFromParent();
348  }
349 
350  return Changed;
351 }
352 
354  bool Changed = false;
355 
356  for (auto &MI : *MBB) {
357  for (auto &MO : MI.operands()) {
358  if (!MO.isReg())
359  continue;
360  if (!MO.isDef() && MO.isKill()) {
361  Changed = true;
362  MO.setIsKill(false);
363  }
364 
365  if (MO.isDef() && MO.isDead()) {
366  Changed = true;
367  MO.setIsDead(false);
368  }
369  }
370  }
371 
372  return Changed;
373 }
374 
376  std::vector<StringRef> &bbNames,
377  unsigned &basicBlockNum, NamedVRegCursor &NVC) {
378 
379  if (CanonicalizeBasicBlockNumber != ~0U) {
380  if (CanonicalizeBasicBlockNumber != basicBlockNum++)
381  return false;
382  LLVM_DEBUG(dbgs() << "\n Canonicalizing BasicBlock " << MBB->getName()
383  << "\n";);
384  }
385 
386  if (llvm::find(bbNames, MBB->getName()) != bbNames.end()) {
387  LLVM_DEBUG({
388  dbgs() << "Found potentially duplicate BasicBlocks: " << MBB->getName()
389  << "\n";
390  });
391  return false;
392  }
393 
394  LLVM_DEBUG({
395  dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << " \n\n";
396  dbgs() << "\n\n================================================\n\n";
397  });
398 
399  bool Changed = false;
400  MachineFunction &MF = *MBB->getParent();
402 
403  bbNames.push_back(MBB->getName());
404  LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";);
405 
406  LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n";
407  MBB->dump(););
408  Changed |= propagateLocalCopies(MBB);
409  LLVM_DEBUG(dbgs() << "MBB After Canonical Copy Propagation:\n"; MBB->dump(););
410 
411  LLVM_DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump(););
412  unsigned IdempotentInstCount = 0;
413  Changed |= rescheduleCanonically(IdempotentInstCount, MBB);
414  LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump(););
415 
416  Changed |= NVC.renameVRegs(MBB);
417 
418  // Here we renumber the def vregs for the idempotent instructions from the top
419  // of the MachineBasicBlock so that they are named in the order that we sorted
420  // them alphabetically. Eventually we wont need SkipVRegs because we will use
421  // named vregs instead.
422  if (IdempotentInstCount)
423  NVC.skipVRegs();
424 
425  auto MII = MBB->begin();
426  for (unsigned i = 0; i < IdempotentInstCount && MII != MBB->end(); ++i) {
427  MachineInstr &MI = *MII++;
428  Changed = true;
429  Register vRegToRename = MI.getOperand(0).getReg();
430  auto Rename = NVC.createVirtualRegister(vRegToRename);
431 
432  std::vector<MachineOperand *> RenameMOs;
433  for (auto &MO : MRI.reg_operands(vRegToRename)) {
434  RenameMOs.push_back(&MO);
435  }
436 
437  for (auto *MO : RenameMOs) {
438  MO->setReg(Rename);
439  }
440  }
441 
442  Changed |= doDefKillClear(MBB);
443 
444  LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump();
445  dbgs() << "\n";);
446  LLVM_DEBUG(
447  dbgs() << "\n\n================================================\n\n");
448  return Changed;
449 }
450 
451 bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) {
452 
453  static unsigned functionNum = 0;
454  if (CanonicalizeFunctionNumber != ~0U) {
455  if (CanonicalizeFunctionNumber != functionNum++)
456  return false;
457  LLVM_DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName()
458  << "\n";);
459  }
460 
461  // we need a valid vreg to create a vreg type for skipping all those
462  // stray vreg numbers so reach alignment/canonical vreg values.
463  std::vector<MachineBasicBlock *> RPOList = GetRPOList(MF);
464 
465  LLVM_DEBUG(
466  dbgs() << "\n\n NEW MACHINE FUNCTION: " << MF.getName() << " \n\n";
467  dbgs() << "\n\n================================================\n\n";
468  dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n";
469  for (auto MBB
470  : RPOList) { dbgs() << MBB->getName() << "\n"; } dbgs()
471  << "\n\n================================================\n\n";);
472 
473  std::vector<StringRef> BBNames;
474 
475  unsigned BBNum = 0;
476 
477  bool Changed = false;
478 
480  NamedVRegCursor NVC(MRI);
481  for (auto MBB : RPOList)
482  Changed |= runOnBasicBlock(MBB, BBNames, BBNum, NVC);
483 
484  return Changed;
485 }
char & MIRCanonicalizerID
This class represents lattice values for constants.
Definition: AllocatorList.h:23
bool renameVRegs(MachineBasicBlock *MBB)
renameVRegs - For a given MachineBasicBlock, scan for side-effecting instructions, walk the def-use from each side-effecting root (in sorted root order) and rename the encountered vregs in the def-use graph in a canonical ordering.
iterator_range< use_nodbg_iterator > use_nodbg_operands(unsigned Reg) const
static bool rescheduleLexographically(std::vector< MachineInstr *> instructions, MachineBasicBlock *MBB, std::function< MachineBasicBlock::iterator()> getPos)
iterator_range< reg_iterator > reg_operands(unsigned Reg) const
unsigned createVirtualRegister(unsigned VReg)
createVirtualRegister - Given an existing vreg, create a named vreg to take its place.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: BitVector.h:937
static use_iterator use_end()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
mir Rename Register Operands Canonically
mir Rename Register Operands
static bool doDefKillClear(MachineBasicBlock *MBB)
static cl::opt< unsigned > CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u), cl::value_desc("N"), cl::desc("Function number to canonicalize."))
static cl::opt< unsigned > CanonicalizeBasicBlockNumber("canon-nth-basicblock", cl::Hidden, cl::init(~0u), cl::value_desc("N"), cl::desc("BasicBlock number to canonicalize."))
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
unsigned const MachineRegisterInfo * MRI
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
mir Rename Register Operands static false std::vector< MachineBasicBlock * > GetRPOList(MachineFunction &MF)
Represent the analysis usage information of a pass.
void skipVRegs()
SkipGapSize - Skips modulo a gap value of indices.
static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount, MachineBasicBlock *MBB)
mir canonicalizer
auto find_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1193
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1186
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1095
static bool runOnBasicBlock(MachineBasicBlock *MBB, std::vector< StringRef > &bbNames, unsigned &basicBlockNum, NamedVRegCursor &NVC)
MachineOperand class - Representation of each machine instruction operand.
SI Lower i1 Copies
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:301
NamedVRegCursor - The cursor is an object that keeps track of what the next vreg name should be...
static bool propagateLocalCopies(MachineBasicBlock *MBB)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer", "Rename Register Operands Canonically", false, false) INITIALIZE_PASS_END(MIRCanonicalizer
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:255
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Representation of each machine instruction.
Definition: MachineInstr.h:63
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB &#39;Other&#39; at the position From, and insert it into this MBB right before &#39;...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
use_iterator use_begin(unsigned RegNo) const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
const TargetRegisterClass * getRegClassOrNull(unsigned Reg) const
Return the register class of Reg, or null if Reg has not been assigned a register class yet...
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:503
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:69
print Print MemDeps of function
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
inst_range instructions(Function *F)
Definition: InstIterator.h:133
Register getReg() const
getReg - Returns the register number.
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:415
Wrapper class representing virtual and physical registers.
Definition: Register.h:19