LLVM  14.0.0git
X86CmovConversion.cpp
Go to the documentation of this file.
1 //====- X86CmovConversion.cpp - Convert Cmov to Branch --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file implements a pass that converts X86 cmov instructions into
11 /// branches when profitable. This pass is conservative. It transforms if and
12 /// only if it can guarantee a gain with high confidence.
13 ///
14 /// Thus, the optimization applies under the following conditions:
15 /// 1. Consider as candidates only CMOVs in innermost loops (assume that
16 /// most hotspots are represented by these loops).
17 /// 2. Given a group of CMOV instructions that are using the same EFLAGS def
18 /// instruction:
19 /// a. Consider them as candidates only if all have the same code condition
20 /// or the opposite one to prevent generating more than one conditional
21 /// jump per EFLAGS def instruction.
22 /// b. Consider them as candidates only if all are profitable to be
23 /// converted (assume that one bad conversion may cause a degradation).
24 /// 3. Apply conversion only for loops that are found profitable and only for
25 /// CMOV candidates that were found profitable.
26 /// a. A loop is considered profitable only if conversion will reduce its
27 /// depth cost by some threshold.
28 /// b. CMOV is considered profitable if the cost of its condition is higher
29 /// than the average cost of its true-value and false-value by 25% of
30 /// branch-misprediction-penalty. This assures no degradation even with
31 /// 25% branch misprediction.
32 ///
33 /// Note: This pass is assumed to run on SSA machine code.
34 //
35 //===----------------------------------------------------------------------===//
36 //
37 // External interfaces:
38 // FunctionPass *llvm::createX86CmovConverterPass();
39 // bool X86CmovConverterPass::runOnMachineFunction(MachineFunction &MF);
40 //
41 //===----------------------------------------------------------------------===//
42 
43 #include "X86.h"
44 #include "X86InstrInfo.h"
45 #include "llvm/ADT/ArrayRef.h"
46 #include "llvm/ADT/DenseMap.h"
47 #include "llvm/ADT/STLExtras.h"
48 #include "llvm/ADT/SmallPtrSet.h"
49 #include "llvm/ADT/SmallVector.h"
50 #include "llvm/ADT/Statistic.h"
63 #include "llvm/IR/DebugLoc.h"
64 #include "llvm/InitializePasses.h"
65 #include "llvm/MC/MCSchedule.h"
66 #include "llvm/Pass.h"
68 #include "llvm/Support/Debug.h"
70 #include <algorithm>
71 #include <cassert>
72 #include <iterator>
73 #include <utility>
74 
75 using namespace llvm;
76 
77 #define DEBUG_TYPE "x86-cmov-conversion"
78 
79 STATISTIC(NumOfSkippedCmovGroups, "Number of unsupported CMOV-groups");
80 STATISTIC(NumOfCmovGroupCandidate, "Number of CMOV-group candidates");
81 STATISTIC(NumOfLoopCandidate, "Number of CMOV-conversion profitable loops");
82 STATISTIC(NumOfOptimizedCmovGroups, "Number of optimized CMOV-groups");
83 
84 // This internal switch can be used to turn off the cmov/branch optimization.
85 static cl::opt<bool>
86  EnableCmovConverter("x86-cmov-converter",
87  cl::desc("Enable the X86 cmov-to-branch optimization."),
88  cl::init(true), cl::Hidden);
89 
90 static cl::opt<unsigned>
91  GainCycleThreshold("x86-cmov-converter-threshold",
92  cl::desc("Minimum gain per loop (in cycles) threshold."),
93  cl::init(4), cl::Hidden);
94 
96  "x86-cmov-converter-force-mem-operand",
97  cl::desc("Convert cmovs to branches whenever they have memory operands."),
98  cl::init(true), cl::Hidden);
99 
100 namespace {
101 
102 /// Converts X86 cmov instructions into branches when profitable.
103 class X86CmovConverterPass : public MachineFunctionPass {
104 public:
105  X86CmovConverterPass() : MachineFunctionPass(ID) { }
106 
107  StringRef getPassName() const override { return "X86 cmov Conversion"; }
108  bool runOnMachineFunction(MachineFunction &MF) override;
109  void getAnalysisUsage(AnalysisUsage &AU) const override;
110 
111  /// Pass identification, replacement for typeid.
112  static char ID;
113 
114 private:
115  MachineRegisterInfo *MRI = nullptr;
116  const TargetInstrInfo *TII = nullptr;
117  const TargetRegisterInfo *TRI = nullptr;
118  MachineLoopInfo *MLI = nullptr;
119  TargetSchedModel TSchedModel;
120 
121  /// List of consecutive CMOV instructions.
122  using CmovGroup = SmallVector<MachineInstr *, 2>;
123  using CmovGroups = SmallVector<CmovGroup, 2>;
124 
125  /// Collect all CMOV-group-candidates in \p CurrLoop and update \p
126  /// CmovInstGroups accordingly.
127  ///
128  /// \param Blocks List of blocks to process.
129  /// \param CmovInstGroups List of consecutive CMOV instructions in CurrLoop.
130  /// \returns true iff it found any CMOV-group-candidate.
131  bool collectCmovCandidates(ArrayRef<MachineBasicBlock *> Blocks,
132  CmovGroups &CmovInstGroups,
133  bool IncludeLoads = false);
134 
135  /// Check if it is profitable to transform each CMOV-group-candidates into
136  /// branch. Remove all groups that are not profitable from \p CmovInstGroups.
137  ///
138  /// \param Blocks List of blocks to process.
139  /// \param CmovInstGroups List of consecutive CMOV instructions in CurrLoop.
140  /// \returns true iff any CMOV-group-candidate remain.
141  bool checkForProfitableCmovCandidates(ArrayRef<MachineBasicBlock *> Blocks,
142  CmovGroups &CmovInstGroups);
143 
144  /// Convert the given list of consecutive CMOV instructions into a branch.
145  ///
146  /// \param Group Consecutive CMOV instructions to be converted into branch.
147  void convertCmovInstsToBranches(SmallVectorImpl<MachineInstr *> &Group) const;
148 };
149 
150 } // end anonymous namespace
151 
152 char X86CmovConverterPass::ID = 0;
153 
154 void X86CmovConverterPass::getAnalysisUsage(AnalysisUsage &AU) const {
157 }
158 
159 bool X86CmovConverterPass::runOnMachineFunction(MachineFunction &MF) {
160  if (skipFunction(MF.getFunction()))
161  return false;
162  if (!EnableCmovConverter)
163  return false;
164 
165  LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()
166  << "**********\n");
167 
168  bool Changed = false;
169  MLI = &getAnalysis<MachineLoopInfo>();
170  const TargetSubtargetInfo &STI = MF.getSubtarget();
171  MRI = &MF.getRegInfo();
172  TII = STI.getInstrInfo();
173  TRI = STI.getRegisterInfo();
174  TSchedModel.init(&STI);
175 
176  // Before we handle the more subtle cases of register-register CMOVs inside
177  // of potentially hot loops, we want to quickly remove all CMOVs with
178  // a memory operand. The CMOV will risk a stall waiting for the load to
179  // complete that speculative execution behind a branch is better suited to
180  // handle on modern x86 chips.
181  if (ForceMemOperand) {
182  CmovGroups AllCmovGroups;
184  for (auto &MBB : MF)
185  Blocks.push_back(&MBB);
186  if (collectCmovCandidates(Blocks, AllCmovGroups, /*IncludeLoads*/ true)) {
187  for (auto &Group : AllCmovGroups) {
188  // Skip any group that doesn't do at least one memory operand cmov.
189  if (!llvm::any_of(Group, [&](MachineInstr *I) { return I->mayLoad(); }))
190  continue;
191 
192  // For CMOV groups which we can rewrite and which contain a memory load,
193  // always rewrite them. On x86, a CMOV will dramatically amplify any
194  // memory latency by blocking speculative execution.
195  Changed = true;
196  convertCmovInstsToBranches(Group);
197  }
198  }
199  }
200 
201  //===--------------------------------------------------------------------===//
202  // Register-operand Conversion Algorithm
203  // ---------
204  // For each inner most loop
205  // collectCmovCandidates() {
206  // Find all CMOV-group-candidates.
207  // }
208  //
209  // checkForProfitableCmovCandidates() {
210  // * Calculate both loop-depth and optimized-loop-depth.
211  // * Use these depth to check for loop transformation profitability.
212  // * Check for CMOV-group-candidate transformation profitability.
213  // }
214  //
215  // For each profitable CMOV-group-candidate
216  // convertCmovInstsToBranches() {
217  // * Create FalseBB, SinkBB, Conditional branch to SinkBB.
218  // * Replace each CMOV instruction with a PHI instruction in SinkBB.
219  // }
220  //
221  // Note: For more details, see each function description.
222  //===--------------------------------------------------------------------===//
223 
224  // Build up the loops in pre-order.
225  SmallVector<MachineLoop *, 4> Loops(MLI->begin(), MLI->end());
226  // Note that we need to check size on each iteration as we accumulate child
227  // loops.
228  for (int i = 0; i < (int)Loops.size(); ++i)
229  for (MachineLoop *Child : Loops[i]->getSubLoops())
230  Loops.push_back(Child);
231 
232  for (MachineLoop *CurrLoop : Loops) {
233  // Optimize only inner most loops.
234  if (!CurrLoop->getSubLoops().empty())
235  continue;
236 
237  // List of consecutive CMOV instructions to be processed.
238  CmovGroups CmovInstGroups;
239 
240  if (!collectCmovCandidates(CurrLoop->getBlocks(), CmovInstGroups))
241  continue;
242 
243  if (!checkForProfitableCmovCandidates(CurrLoop->getBlocks(),
244  CmovInstGroups))
245  continue;
246 
247  Changed = true;
248  for (auto &Group : CmovInstGroups)
249  convertCmovInstsToBranches(Group);
250  }
251 
252  return Changed;
253 }
254 
255 bool X86CmovConverterPass::collectCmovCandidates(
256  ArrayRef<MachineBasicBlock *> Blocks, CmovGroups &CmovInstGroups,
257  bool IncludeLoads) {
258  //===--------------------------------------------------------------------===//
259  // Collect all CMOV-group-candidates and add them into CmovInstGroups.
260  //
261  // CMOV-group:
262  // CMOV instructions, in same MBB, that uses same EFLAGS def instruction.
263  //
264  // CMOV-group-candidate:
265  // CMOV-group where all the CMOV instructions are
266  // 1. consecutive.
267  // 2. have same condition code or opposite one.
268  // 3. have only operand registers (X86::CMOVrr).
269  //===--------------------------------------------------------------------===//
270  // List of possible improvement (TODO's):
271  // --------------------------------------
272  // TODO: Add support for X86::CMOVrm instructions.
273  // TODO: Add support for X86::SETcc instructions.
274  // TODO: Add support for CMOV-groups with non consecutive CMOV instructions.
275  //===--------------------------------------------------------------------===//
276 
277  // Current processed CMOV-Group.
278  CmovGroup Group;
279  for (auto *MBB : Blocks) {
280  Group.clear();
281  // Condition code of first CMOV instruction current processed range and its
282  // opposite condition code.
283  X86::CondCode FirstCC = X86::COND_INVALID, FirstOppCC = X86::COND_INVALID,
284  MemOpCC = X86::COND_INVALID;
285  // Indicator of a non CMOVrr instruction in the current processed range.
286  bool FoundNonCMOVInst = false;
287  // Indicator for current processed CMOV-group if it should be skipped.
288  bool SkipGroup = false;
289 
290  for (auto &I : *MBB) {
291  // Skip debug instructions.
292  if (I.isDebugInstr())
293  continue;
295  // Check if we found a X86::CMOVrr instruction.
296  if (CC != X86::COND_INVALID && (IncludeLoads || !I.mayLoad())) {
297  if (Group.empty()) {
298  // We found first CMOV in the range, reset flags.
299  FirstCC = CC;
300  FirstOppCC = X86::GetOppositeBranchCondition(CC);
301  // Clear out the prior group's memory operand CC.
302  MemOpCC = X86::COND_INVALID;
303  FoundNonCMOVInst = false;
304  SkipGroup = false;
305  }
306  Group.push_back(&I);
307  // Check if it is a non-consecutive CMOV instruction or it has different
308  // condition code than FirstCC or FirstOppCC.
309  if (FoundNonCMOVInst || (CC != FirstCC && CC != FirstOppCC))
310  // Mark the SKipGroup indicator to skip current processed CMOV-Group.
311  SkipGroup = true;
312  if (I.mayLoad()) {
313  if (MemOpCC == X86::COND_INVALID)
314  // The first memory operand CMOV.
315  MemOpCC = CC;
316  else if (CC != MemOpCC)
317  // Can't handle mixed conditions with memory operands.
318  SkipGroup = true;
319  }
320  // Check if we were relying on zero-extending behavior of the CMOV.
321  if (!SkipGroup &&
322  llvm::any_of(
323  MRI->use_nodbg_instructions(I.defs().begin()->getReg()),
324  [&](MachineInstr &UseI) {
325  return UseI.getOpcode() == X86::SUBREG_TO_REG;
326  }))
327  // FIXME: We should model the cost of using an explicit MOV to handle
328  // the zero-extension rather than just refusing to handle this.
329  SkipGroup = true;
330  continue;
331  }
332  // If Group is empty, keep looking for first CMOV in the range.
333  if (Group.empty())
334  continue;
335 
336  // We found a non X86::CMOVrr instruction.
337  FoundNonCMOVInst = true;
338  // Check if this instruction define EFLAGS, to determine end of processed
339  // range, as there would be no more instructions using current EFLAGS def.
340  if (I.definesRegister(X86::EFLAGS)) {
341  // Check if current processed CMOV-group should not be skipped and add
342  // it as a CMOV-group-candidate.
343  if (!SkipGroup)
344  CmovInstGroups.push_back(Group);
345  else
346  ++NumOfSkippedCmovGroups;
347  Group.clear();
348  }
349  }
350  // End of basic block is considered end of range, check if current processed
351  // CMOV-group should not be skipped and add it as a CMOV-group-candidate.
352  if (Group.empty())
353  continue;
354  if (!SkipGroup)
355  CmovInstGroups.push_back(Group);
356  else
357  ++NumOfSkippedCmovGroups;
358  }
359 
360  NumOfCmovGroupCandidate += CmovInstGroups.size();
361  return !CmovInstGroups.empty();
362 }
363 
364 /// \returns Depth of CMOV instruction as if it was converted into branch.
365 /// \param TrueOpDepth depth cost of CMOV true value operand.
366 /// \param FalseOpDepth depth cost of CMOV false value operand.
367 static unsigned getDepthOfOptCmov(unsigned TrueOpDepth, unsigned FalseOpDepth) {
368  // The depth of the result after branch conversion is
369  // TrueOpDepth * TrueOpProbability + FalseOpDepth * FalseOpProbability.
370  // As we have no info about branch weight, we assume 75% for one and 25% for
371  // the other, and pick the result with the largest resulting depth.
372  return std::max(
373  divideCeil(TrueOpDepth * 3 + FalseOpDepth, 4),
374  divideCeil(FalseOpDepth * 3 + TrueOpDepth, 4));
375 }
376 
377 bool X86CmovConverterPass::checkForProfitableCmovCandidates(
378  ArrayRef<MachineBasicBlock *> Blocks, CmovGroups &CmovInstGroups) {
379  struct DepthInfo {
380  /// Depth of original loop.
381  unsigned Depth;
382  /// Depth of optimized loop.
383  unsigned OptDepth;
384  };
385  /// Number of loop iterations to calculate depth for ?!
386  static const unsigned LoopIterations = 2;
388  DepthInfo LoopDepth[LoopIterations] = {{0, 0}, {0, 0}};
389  enum { PhyRegType = 0, VirRegType = 1, RegTypeNum = 2 };
390  /// For each register type maps the register to its last def instruction.
391  DenseMap<unsigned, MachineInstr *> RegDefMaps[RegTypeNum];
392  /// Maps register operand to its def instruction, which can be nullptr if it
393  /// is unknown (e.g., operand is defined outside the loop).
395 
396  // Set depth of unknown instruction (i.e., nullptr) to zero.
397  DepthMap[nullptr] = {0, 0};
398 
399  SmallPtrSet<MachineInstr *, 4> CmovInstructions;
400  for (auto &Group : CmovInstGroups)
401  CmovInstructions.insert(Group.begin(), Group.end());
402 
403  //===--------------------------------------------------------------------===//
404  // Step 1: Calculate instruction depth and loop depth.
405  // Optimized-Loop:
406  // loop with CMOV-group-candidates converted into branches.
407  //
408  // Instruction-Depth:
409  // instruction latency + max operand depth.
410  // * For CMOV instruction in optimized loop the depth is calculated as:
411  // CMOV latency + getDepthOfOptCmov(True-Op-Depth, False-Op-depth)
412  // TODO: Find a better way to estimate the latency of the branch instruction
413  // rather than using the CMOV latency.
414  //
415  // Loop-Depth:
416  // max instruction depth of all instructions in the loop.
417  // Note: instruction with max depth represents the critical-path in the loop.
418  //
419  // Loop-Depth[i]:
420  // Loop-Depth calculated for first `i` iterations.
421  // Note: it is enough to calculate depth for up to two iterations.
422  //
423  // Depth-Diff[i]:
424  // Number of cycles saved in first 'i` iterations by optimizing the loop.
425  //===--------------------------------------------------------------------===//
426  for (unsigned I = 0; I < LoopIterations; ++I) {
427  DepthInfo &MaxDepth = LoopDepth[I];
428  for (auto *MBB : Blocks) {
429  // Clear physical registers Def map.
430  RegDefMaps[PhyRegType].clear();
431  for (MachineInstr &MI : *MBB) {
432  // Skip debug instructions.
433  if (MI.isDebugInstr())
434  continue;
435  unsigned MIDepth = 0;
436  unsigned MIDepthOpt = 0;
437  bool IsCMOV = CmovInstructions.count(&MI);
438  for (auto &MO : MI.uses()) {
439  // Checks for "isUse()" as "uses()" returns also implicit definitions.
440  if (!MO.isReg() || !MO.isUse())
441  continue;
442  Register Reg = MO.getReg();
443  auto &RDM = RegDefMaps[Reg.isVirtual()];
444  if (MachineInstr *DefMI = RDM.lookup(Reg)) {
445  OperandToDefMap[&MO] = DefMI;
446  DepthInfo Info = DepthMap.lookup(DefMI);
447  MIDepth = std::max(MIDepth, Info.Depth);
448  if (!IsCMOV)
449  MIDepthOpt = std::max(MIDepthOpt, Info.OptDepth);
450  }
451  }
452 
453  if (IsCMOV)
454  MIDepthOpt = getDepthOfOptCmov(
455  DepthMap[OperandToDefMap.lookup(&MI.getOperand(1))].OptDepth,
456  DepthMap[OperandToDefMap.lookup(&MI.getOperand(2))].OptDepth);
457 
458  // Iterates over all operands to handle implicit definitions as well.
459  for (auto &MO : MI.operands()) {
460  if (!MO.isReg() || !MO.isDef())
461  continue;
462  Register Reg = MO.getReg();
463  RegDefMaps[Reg.isVirtual()][Reg] = &MI;
464  }
465 
466  unsigned Latency = TSchedModel.computeInstrLatency(&MI);
467  DepthMap[&MI] = {MIDepth += Latency, MIDepthOpt += Latency};
468  MaxDepth.Depth = std::max(MaxDepth.Depth, MIDepth);
469  MaxDepth.OptDepth = std::max(MaxDepth.OptDepth, MIDepthOpt);
470  }
471  }
472  }
473 
474  unsigned Diff[LoopIterations] = {LoopDepth[0].Depth - LoopDepth[0].OptDepth,
475  LoopDepth[1].Depth - LoopDepth[1].OptDepth};
476 
477  //===--------------------------------------------------------------------===//
478  // Step 2: Check if Loop worth to be optimized.
479  // Worth-Optimize-Loop:
480  // case 1: Diff[1] == Diff[0]
481  // Critical-path is iteration independent - there is no dependency
482  // of critical-path instructions on critical-path instructions of
483  // previous iteration.
484  // Thus, it is enough to check gain percent of 1st iteration -
485  // To be conservative, the optimized loop need to have a depth of
486  // 12.5% cycles less than original loop, per iteration.
487  //
488  // case 2: Diff[1] > Diff[0]
489  // Critical-path is iteration dependent - there is dependency of
490  // critical-path instructions on critical-path instructions of
491  // previous iteration.
492  // Thus, check the gain percent of the 2nd iteration (similar to the
493  // previous case), but it is also required to check the gradient of
494  // the gain - the change in Depth-Diff compared to the change in
495  // Loop-Depth between 1st and 2nd iterations.
496  // To be conservative, the gradient need to be at least 50%.
497  //
498  // In addition, In order not to optimize loops with very small gain, the
499  // gain (in cycles) after 2nd iteration should not be less than a given
500  // threshold. Thus, the check (Diff[1] >= GainCycleThreshold) must apply.
501  //
502  // If loop is not worth optimizing, remove all CMOV-group-candidates.
503  //===--------------------------------------------------------------------===//
504  if (Diff[1] < GainCycleThreshold)
505  return false;
506 
507  bool WorthOptLoop = false;
508  if (Diff[1] == Diff[0])
509  WorthOptLoop = Diff[0] * 8 >= LoopDepth[0].Depth;
510  else if (Diff[1] > Diff[0])
511  WorthOptLoop =
512  (Diff[1] - Diff[0]) * 2 >= (LoopDepth[1].Depth - LoopDepth[0].Depth) &&
513  (Diff[1] * 8 >= LoopDepth[1].Depth);
514 
515  if (!WorthOptLoop)
516  return false;
517 
518  ++NumOfLoopCandidate;
519 
520  //===--------------------------------------------------------------------===//
521  // Step 3: Check for each CMOV-group-candidate if it worth to be optimized.
522  // Worth-Optimize-Group:
523  // Iff it worths to optimize all CMOV instructions in the group.
524  //
525  // Worth-Optimize-CMOV:
526  // Predicted branch is faster than CMOV by the difference between depth of
527  // condition operand and depth of taken (predicted) value operand.
528  // To be conservative, the gain of such CMOV transformation should cover at
529  // at least 25% of branch-misprediction-penalty.
530  //===--------------------------------------------------------------------===//
531  unsigned MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty;
532  CmovGroups TempGroups;
533  std::swap(TempGroups, CmovInstGroups);
534  for (auto &Group : TempGroups) {
535  bool WorthOpGroup = true;
536  for (auto *MI : Group) {
537  // Avoid CMOV instruction which value is used as a pointer to load from.
538  // This is another conservative check to avoid converting CMOV instruction
539  // used with tree-search like algorithm, where the branch is unpredicted.
540  auto UIs = MRI->use_instructions(MI->defs().begin()->getReg());
541  if (!UIs.empty() && ++UIs.begin() == UIs.end()) {
542  unsigned Op = UIs.begin()->getOpcode();
543  if (Op == X86::MOV64rm || Op == X86::MOV32rm) {
544  WorthOpGroup = false;
545  break;
546  }
547  }
548 
549  unsigned CondCost =
550  DepthMap[OperandToDefMap.lookup(&MI->getOperand(4))].Depth;
551  unsigned ValCost = getDepthOfOptCmov(
552  DepthMap[OperandToDefMap.lookup(&MI->getOperand(1))].Depth,
553  DepthMap[OperandToDefMap.lookup(&MI->getOperand(2))].Depth);
554  if (ValCost > CondCost || (CondCost - ValCost) * 4 < MispredictPenalty) {
555  WorthOpGroup = false;
556  break;
557  }
558  }
559 
560  if (WorthOpGroup)
561  CmovInstGroups.push_back(Group);
562  }
563 
564  return !CmovInstGroups.empty();
565 }
566 
568  if (MI->killsRegister(X86::EFLAGS))
569  return false;
570 
571  // The EFLAGS operand of MI might be missing a kill marker.
572  // Figure out whether EFLAGS operand should LIVE after MI instruction.
573  MachineBasicBlock *BB = MI->getParent();
575 
576  // Scan forward through BB for a use/def of EFLAGS.
577  for (auto I = std::next(ItrMI), E = BB->end(); I != E; ++I) {
578  if (I->readsRegister(X86::EFLAGS))
579  return true;
580  if (I->definesRegister(X86::EFLAGS))
581  return false;
582  }
583 
584  // We hit the end of the block, check whether EFLAGS is live into a successor.
585  for (MachineBasicBlock *Succ : BB->successors())
586  if (Succ->isLiveIn(X86::EFLAGS))
587  return true;
588 
589  return false;
590 }
591 
592 /// Given /p First CMOV instruction and /p Last CMOV instruction representing a
593 /// group of CMOV instructions, which may contain debug instructions in between,
594 /// move all debug instructions to after the last CMOV instruction, making the
595 /// CMOV group consecutive.
596 static void packCmovGroup(MachineInstr *First, MachineInstr *Last) {
598  "Last instruction in a CMOV group must be a CMOV instruction");
599 
600  SmallVector<MachineInstr *, 2> DBGInstructions;
601  for (auto I = First->getIterator(), E = Last->getIterator(); I != E; I++) {
602  if (I->isDebugInstr())
603  DBGInstructions.push_back(&*I);
604  }
605 
606  // Splice the debug instruction after the cmov group.
607  MachineBasicBlock *MBB = First->getParent();
608  for (auto *MI : DBGInstructions)
609  MBB->insertAfter(Last, MI->removeFromParent());
610 }
611 
612 void X86CmovConverterPass::convertCmovInstsToBranches(
613  SmallVectorImpl<MachineInstr *> &Group) const {
614  assert(!Group.empty() && "No CMOV instructions to convert");
615  ++NumOfOptimizedCmovGroups;
616 
617  // If the CMOV group is not packed, e.g., there are debug instructions between
618  // first CMOV and last CMOV, then pack the group and make the CMOV instruction
619  // consecutive by moving the debug instructions to after the last CMOV.
620  packCmovGroup(Group.front(), Group.back());
621 
622  // To convert a CMOVcc instruction, we actually have to insert the diamond
623  // control-flow pattern. The incoming instruction knows the destination vreg
624  // to set, the condition code register to branch on, the true/false values to
625  // select between, and a branch opcode to use.
626 
627  // Before
628  // -----
629  // MBB:
630  // cond = cmp ...
631  // v1 = CMOVge t1, f1, cond
632  // v2 = CMOVlt t2, f2, cond
633  // v3 = CMOVge v1, f3, cond
634  //
635  // After
636  // -----
637  // MBB:
638  // cond = cmp ...
639  // jge %SinkMBB
640  //
641  // FalseMBB:
642  // jmp %SinkMBB
643  //
644  // SinkMBB:
645  // %v1 = phi[%f1, %FalseMBB], [%t1, %MBB]
646  // %v2 = phi[%t2, %FalseMBB], [%f2, %MBB] ; For CMOV with OppCC switch
647  // ; true-value with false-value
648  // %v3 = phi[%f3, %FalseMBB], [%t1, %MBB] ; Phi instruction cannot use
649  // ; previous Phi instruction result
650 
651  MachineInstr &MI = *Group.front();
652  MachineInstr *LastCMOV = Group.back();
653  DebugLoc DL = MI.getDebugLoc();
654 
657  // Potentially swap the condition codes so that any memory operand to a CMOV
658  // is in the *false* position instead of the *true* position. We can invert
659  // any non-memory operand CMOV instructions to cope with this and we ensure
660  // memory operand CMOVs are only included with a single condition code.
661  if (llvm::any_of(Group, [&](MachineInstr *I) {
662  return I->mayLoad() && X86::getCondFromCMov(*I) == CC;
663  }))
664  std::swap(CC, OppCC);
665 
666  MachineBasicBlock *MBB = MI.getParent();
669  const BasicBlock *BB = MBB->getBasicBlock();
670 
671  MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(BB);
672  MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(BB);
673  F->insert(It, FalseMBB);
674  F->insert(It, SinkMBB);
675 
676  // If the EFLAGS register isn't dead in the terminator, then claim that it's
677  // live into the sink and copy blocks.
678  if (checkEFLAGSLive(LastCMOV)) {
679  FalseMBB->addLiveIn(X86::EFLAGS);
680  SinkMBB->addLiveIn(X86::EFLAGS);
681  }
682 
683  // Transfer the remainder of BB and its successor edges to SinkMBB.
684  SinkMBB->splice(SinkMBB->begin(), MBB,
685  std::next(MachineBasicBlock::iterator(LastCMOV)), MBB->end());
687 
688  // Add the false and sink blocks as its successors.
689  MBB->addSuccessor(FalseMBB);
690  MBB->addSuccessor(SinkMBB);
691 
692  // Create the conditional branch instruction.
693  BuildMI(MBB, DL, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(CC);
694 
695  // Add the sink block to the false block successors.
696  FalseMBB->addSuccessor(SinkMBB);
697 
701  std::next(MachineBasicBlock::iterator(LastCMOV));
702  MachineBasicBlock::iterator FalseInsertionPoint = FalseMBB->begin();
703  MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();
704 
705  // First we need to insert an explicit load on the false path for any memory
706  // operand. We also need to potentially do register rewriting here, but it is
707  // simpler as the memory operands are always on the false path so we can
708  // simply take that input, whatever it is.
709  DenseMap<unsigned, unsigned> FalseBBRegRewriteTable;
710  for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd;) {
711  auto &MI = *MIIt++;
712  // Skip any CMOVs in this group which don't load from memory.
713  if (!MI.mayLoad()) {
714  // Remember the false-side register input.
715  Register FalseReg =
716  MI.getOperand(X86::getCondFromCMov(MI) == CC ? 1 : 2).getReg();
717  // Walk back through any intermediate cmovs referenced.
718  while (true) {
719  auto FRIt = FalseBBRegRewriteTable.find(FalseReg);
720  if (FRIt == FalseBBRegRewriteTable.end())
721  break;
722  FalseReg = FRIt->second;
723  }
724  FalseBBRegRewriteTable[MI.getOperand(0).getReg()] = FalseReg;
725  continue;
726  }
727 
728  // The condition must be the *opposite* of the one we've decided to branch
729  // on as the branch will go *around* the load and the load should happen
730  // when the CMOV condition is false.
731  assert(X86::getCondFromCMov(MI) == OppCC &&
732  "Can only handle memory-operand cmov instructions with a condition "
733  "opposite to the selected branch direction.");
734 
735  // The goal is to rewrite the cmov from:
736  //
737  // MBB:
738  // %A = CMOVcc %B (tied), (mem)
739  //
740  // to
741  //
742  // MBB:
743  // %A = CMOVcc %B (tied), %C
744  // FalseMBB:
745  // %C = MOV (mem)
746  //
747  // Which will allow the next loop to rewrite the CMOV in terms of a PHI:
748  //
749  // MBB:
750  // JMP!cc SinkMBB
751  // FalseMBB:
752  // %C = MOV (mem)
753  // SinkMBB:
754  // %A = PHI [ %C, FalseMBB ], [ %B, MBB]
755 
756  // Get a fresh register to use as the destination of the MOV.
757  const TargetRegisterClass *RC = MRI->getRegClass(MI.getOperand(0).getReg());
758  Register TmpReg = MRI->createVirtualRegister(RC);
759 
761  bool Unfolded = TII->unfoldMemoryOperand(*MBB->getParent(), MI, TmpReg,
762  /*UnfoldLoad*/ true,
763  /*UnfoldStore*/ false, NewMIs);
764  (void)Unfolded;
765  assert(Unfolded && "Should never fail to unfold a loading cmov!");
766 
767  // Move the new CMOV to just before the old one and reset any impacted
768  // iterator.
769  auto *NewCMOV = NewMIs.pop_back_val();
770  assert(X86::getCondFromCMov(*NewCMOV) == OppCC &&
771  "Last new instruction isn't the expected CMOV!");
772  LLVM_DEBUG(dbgs() << "\tRewritten cmov: "; NewCMOV->dump());
774  if (&*MIItBegin == &MI)
775  MIItBegin = MachineBasicBlock::iterator(NewCMOV);
776 
777  // Sink whatever instructions were needed to produce the unfolded operand
778  // into the false block.
779  for (auto *NewMI : NewMIs) {
780  LLVM_DEBUG(dbgs() << "\tRewritten load instr: "; NewMI->dump());
781  FalseMBB->insert(FalseInsertionPoint, NewMI);
782  // Re-map any operands that are from other cmovs to the inputs for this block.
783  for (auto &MOp : NewMI->uses()) {
784  if (!MOp.isReg())
785  continue;
786  auto It = FalseBBRegRewriteTable.find(MOp.getReg());
787  if (It == FalseBBRegRewriteTable.end())
788  continue;
789 
790  MOp.setReg(It->second);
791  // This might have been a kill when it referenced the cmov result, but
792  // it won't necessarily be once rewritten.
793  // FIXME: We could potentially improve this by tracking whether the
794  // operand to the cmov was also a kill, and then skipping the PHI node
795  // construction below.
796  MOp.setIsKill(false);
797  }
798  }
799  MBB->erase(&MI);
800 
801  // Add this PHI to the rewrite table.
802  FalseBBRegRewriteTable[NewCMOV->getOperand(0).getReg()] = TmpReg;
803  }
804 
805  // As we are creating the PHIs, we have to be careful if there is more than
806  // one. Later CMOVs may reference the results of earlier CMOVs, but later
807  // PHIs have to reference the individual true/false inputs from earlier PHIs.
808  // That also means that PHI construction must work forward from earlier to
809  // later, and that the code must maintain a mapping from earlier PHI's
810  // destination registers, and the registers that went into the PHI.
812 
813  for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ++MIIt) {
814  Register DestReg = MIIt->getOperand(0).getReg();
815  Register Op1Reg = MIIt->getOperand(1).getReg();
816  Register Op2Reg = MIIt->getOperand(2).getReg();
817 
818  // If this CMOV we are processing is the opposite condition from the jump we
819  // generated, then we have to swap the operands for the PHI that is going to
820  // be generated.
821  if (X86::getCondFromCMov(*MIIt) == OppCC)
822  std::swap(Op1Reg, Op2Reg);
823 
824  auto Op1Itr = RegRewriteTable.find(Op1Reg);
825  if (Op1Itr != RegRewriteTable.end())
826  Op1Reg = Op1Itr->second.first;
827 
828  auto Op2Itr = RegRewriteTable.find(Op2Reg);
829  if (Op2Itr != RegRewriteTable.end())
830  Op2Reg = Op2Itr->second.second;
831 
832  // SinkMBB:
833  // %Result = phi [ %FalseValue, FalseMBB ], [ %TrueValue, MBB ]
834  // ...
835  MIB = BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(X86::PHI), DestReg)
836  .addReg(Op1Reg)
837  .addMBB(FalseMBB)
838  .addReg(Op2Reg)
839  .addMBB(MBB);
840  (void)MIB;
841  LLVM_DEBUG(dbgs() << "\tFrom: "; MIIt->dump());
842  LLVM_DEBUG(dbgs() << "\tTo: "; MIB->dump());
843 
844  // Add this PHI to the rewrite table.
845  RegRewriteTable[DestReg] = std::make_pair(Op1Reg, Op2Reg);
846  }
847 
848  // Now remove the CMOV(s).
849  MBB->erase(MIItBegin, MIItEnd);
850 
851  // Add new basic blocks to MachineLoopInfo.
852  if (MachineLoop *L = MLI->getLoopFor(MBB)) {
853  L->addBasicBlockToLoop(FalseMBB, MLI->getBase());
854  L->addBasicBlockToLoop(SinkMBB, MLI->getBase());
855  }
856 }
857 
858 INITIALIZE_PASS_BEGIN(X86CmovConverterPass, DEBUG_TYPE, "X86 cmov Conversion",
859  false, false)
861 INITIALIZE_PASS_END(X86CmovConverterPass, DEBUG_TYPE, "X86 cmov Conversion",
863 
865  return new X86CmovConverterPass();
866 }
i
i
Definition: README.txt:29
packCmovGroup
static void packCmovGroup(MachineInstr *First, MachineInstr *Last)
Given /p First CMOV instruction and /p Last CMOV instruction representing a group of CMOV instruction...
Definition: X86CmovConversion.cpp:596
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:105
MachineInstr.h
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
Reg
unsigned Reg
Definition: MachineSink.cpp:1558
llvm::MachineBasicBlock::getBasicBlock
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
Definition: MachineBasicBlock.h:202
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:158
llvm::Latency
@ Latency
Definition: SIMachineScheduler.h:34
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
Pass.h
llvm::TargetSubtargetInfo::getInstrInfo
virtual const TargetInstrInfo * getInstrInfo() const
Definition: TargetSubtargetInfo.h:92
Loops
Hexagon Hardware Loops
Definition: HexagonHardwareLoops.cpp:372
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
checkEFLAGSLive
static bool checkEFLAGSLive(MachineInstr *MI)
Definition: X86CmovConversion.cpp:567
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
MachineBasicBlock.h
llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Definition: TargetSubtargetInfo.h:124
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::MachineRegisterInfo::use_nodbg_instructions
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
Definition: MachineRegisterInfo.h:543
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:233
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:36
DenseMap.h
llvm::MachineRegisterInfo::use_instructions
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
Definition: MachineRegisterInfo.h:485
TargetInstrInfo.h
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
getDepthOfOptCmov
static unsigned getDepthOfOptCmov(unsigned TrueOpDepth, unsigned FalseOpDepth)
Definition: X86CmovConversion.cpp:367
STLExtras.h
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
llvm::X86::CondCode
CondCode
Definition: X86BaseInfo.h:80
llvm::X86::COND_INVALID
@ COND_INVALID
Definition: X86BaseInfo.h:107
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1559
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:102
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::MachineLoopInfo
Definition: MachineLoopInfo.h:90
MachineRegisterInfo.h
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::MachineBasicBlock::erase
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
Definition: MachineBasicBlock.cpp:1301
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::X86::getCondFromCMov
CondCode getCondFromCMov(const MachineInstr &MI)
Return condition code of a CMov opcode.
Definition: X86InstrInfo.cpp:2836
llvm::MachineBasicBlock::addSuccessor
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Definition: MachineBasicBlock.cpp:750
CommandLine.h
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:644
X86.h
llvm::MachineBasicBlock::insertAfter
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
Definition: MachineBasicBlock.h:871
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:97
MachineLoopInfo.h
llvm::MachineInstrBuilder::addMBB
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:146
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
ForceMemOperand
static cl::opt< bool > ForceMemOperand("x86-cmov-converter-force-mem-operand", cl::desc("Convert cmovs to branches whenever they have memory operands."), cl::init(true), cl::Hidden)
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
DEBUG_TYPE
#define DEBUG_TYPE
Definition: X86CmovConversion.cpp:77
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
false
Definition: StackSlotColoring.cpp:142
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
First
into llvm powi allowing the code generator to produce balanced multiplication trees First
Definition: README.txt:54
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
DebugLoc.h
SmallPtrSet.h
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:634
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:634
llvm::cl::opt< bool >
llvm::divideCeil
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:742
llvm::MachineLoop
Definition: MachineLoopInfo.h:45
TargetSchedule.h
MCSchedule.h
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::clear
void clear()
Definition: DenseMap.h:111
llvm::TargetSchedModel
Provide an instruction scheduling machine model to CodeGen passes.
Definition: TargetSchedule.h:30
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::DenseMap
Definition: DenseMap.h:714
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
ArrayRef.h
MachineFunctionPass.h
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::MachineFunction::getName
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Definition: MachineFunction.cpp:542
EnableCmovConverter
static cl::opt< bool > EnableCmovConverter("x86-cmov-converter", cl::desc("Enable the X86 cmov-to-branch optimization."), cl::init(true), cl::Hidden)
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:225
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::X86::GetOppositeBranchCondition
CondCode GetOppositeBranchCondition(CondCode CC)
GetOppositeBranchCondition - Return the inverse of the specified cond, e.g.
Definition: X86InstrInfo.cpp:2848
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
llvm::MachineFunction
Definition: MachineFunction.h:234
llvm::MachineInstr::dump
void dump() const
Definition: MachineInstr.cpp:1541
GainCycleThreshold
static cl::opt< unsigned > GainCycleThreshold("x86-cmov-converter-threshold", cl::desc("Minimum gain per loop (in cycles) threshold."), cl::init(4), cl::Hidden)
llvm::MachineBasicBlock::iterator
MachineInstrBundleIterator< MachineInstr > iterator
Definition: MachineBasicBlock.h:233
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1588
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
llvm::MachineBasicBlock::splice
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Definition: MachineBasicBlock.h:950
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
TargetSubtargetInfo.h
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:59
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
MaxDepth
static const unsigned MaxDepth
Definition: InstCombineMulDivRem.cpp:875
llvm::MachineBasicBlock::addLiveIn
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
Definition: MachineBasicBlock.h:367
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:600
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:83
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:324
llvm::MachineBasicBlock::insert
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
Definition: MachineBasicBlock.cpp:1314
llvm::ilist_iterator
Iterator for intrusive lists based on ilist_node.
Definition: ilist_iterator.h:57
Conversion
X86 cmov Conversion
Definition: X86CmovConversion.cpp:861
SmallVector.h
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:268
MachineInstrBuilder.h
llvm::createX86CmovConverterPass
FunctionPass * createX86CmovConverterPass()
This pass converts X86 cmov instructions into branch when profitable.
Definition: X86CmovConversion.cpp:864
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
DefMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Definition: AArch64ExpandPseudoInsts.cpp:103
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::SmallVectorImpl< MachineInstr * >
MachineOperand.h
llvm::MachineBasicBlock::transferSuccessorsAndUpdatePHIs
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
Definition: MachineBasicBlock.cpp:889
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::cl::desc
Definition: CommandLine.h:412
raw_ostream.h
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(X86CmovConverterPass, DEBUG_TYPE, "X86 cmov Conversion", false, false) INITIALIZE_PASS_END(X86CmovConverterPass
MachineFunction.h
X86InstrInfo.h
llvm::MachineInstrBundleIterator< MachineInstr >
InitializePasses.h
TargetRegisterInfo.h
Debug.h
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:270
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38