LLVM  16.0.0git
X86CmovConversion.cpp
Go to the documentation of this file.
1 //====- X86CmovConversion.cpp - Convert Cmov to Branch --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file implements a pass that converts X86 cmov instructions into
11 /// branches when profitable. This pass is conservative. It transforms if and
12 /// only if it can guarantee a gain with high confidence.
13 ///
14 /// Thus, the optimization applies under the following conditions:
15 /// 1. Consider as candidates only CMOVs in innermost loops (assume that
16 /// most hotspots are represented by these loops).
17 /// 2. Given a group of CMOV instructions that are using the same EFLAGS def
18 /// instruction:
19 /// a. Consider them as candidates only if all have the same code condition
20 /// or the opposite one to prevent generating more than one conditional
21 /// jump per EFLAGS def instruction.
22 /// b. Consider them as candidates only if all are profitable to be
23 /// converted (assume that one bad conversion may cause a degradation).
24 /// 3. Apply conversion only for loops that are found profitable and only for
25 /// CMOV candidates that were found profitable.
26 /// a. A loop is considered profitable only if conversion will reduce its
27 /// depth cost by some threshold.
28 /// b. CMOV is considered profitable if the cost of its condition is higher
29 /// than the average cost of its true-value and false-value by 25% of
30 /// branch-misprediction-penalty. This assures no degradation even with
31 /// 25% branch misprediction.
32 ///
33 /// Note: This pass is assumed to run on SSA machine code.
34 //
35 //===----------------------------------------------------------------------===//
36 //
37 // External interfaces:
38 // FunctionPass *llvm::createX86CmovConverterPass();
39 // bool X86CmovConverterPass::runOnMachineFunction(MachineFunction &MF);
40 //
41 //===----------------------------------------------------------------------===//
42 
43 #include "X86.h"
44 #include "X86InstrInfo.h"
45 #include "llvm/ADT/ArrayRef.h"
46 #include "llvm/ADT/DenseMap.h"
47 #include "llvm/ADT/STLExtras.h"
48 #include "llvm/ADT/SmallPtrSet.h"
49 #include "llvm/ADT/SmallVector.h"
50 #include "llvm/ADT/Statistic.h"
63 #include "llvm/IR/DebugLoc.h"
64 #include "llvm/InitializePasses.h"
65 #include "llvm/MC/MCSchedule.h"
66 #include "llvm/Pass.h"
68 #include "llvm/Support/Debug.h"
71 #include <algorithm>
72 #include <cassert>
73 #include <iterator>
74 #include <utility>
75 
76 using namespace llvm;
77 
78 #define DEBUG_TYPE "x86-cmov-conversion"
79 
80 STATISTIC(NumOfSkippedCmovGroups, "Number of unsupported CMOV-groups");
81 STATISTIC(NumOfCmovGroupCandidate, "Number of CMOV-group candidates");
82 STATISTIC(NumOfLoopCandidate, "Number of CMOV-conversion profitable loops");
83 STATISTIC(NumOfOptimizedCmovGroups, "Number of optimized CMOV-groups");
84 
85 // This internal switch can be used to turn off the cmov/branch optimization.
86 static cl::opt<bool>
87  EnableCmovConverter("x86-cmov-converter",
88  cl::desc("Enable the X86 cmov-to-branch optimization."),
89  cl::init(true), cl::Hidden);
90 
91 static cl::opt<unsigned>
92  GainCycleThreshold("x86-cmov-converter-threshold",
93  cl::desc("Minimum gain per loop (in cycles) threshold."),
94  cl::init(4), cl::Hidden);
95 
97  "x86-cmov-converter-force-mem-operand",
98  cl::desc("Convert cmovs to branches whenever they have memory operands."),
99  cl::init(true), cl::Hidden);
100 
101 static cl::opt<bool> ForceAll(
102  "x86-cmov-converter-force-all",
103  cl::desc("Convert all cmovs to branches."),
104  cl::init(false), cl::Hidden);
105 
106 namespace {
107 
108 /// Converts X86 cmov instructions into branches when profitable.
109 class X86CmovConverterPass : public MachineFunctionPass {
110 public:
111  X86CmovConverterPass() : MachineFunctionPass(ID) { }
112 
113  StringRef getPassName() const override { return "X86 cmov Conversion"; }
114  bool runOnMachineFunction(MachineFunction &MF) override;
115  void getAnalysisUsage(AnalysisUsage &AU) const override;
116 
117  /// Pass identification, replacement for typeid.
118  static char ID;
119 
120 private:
121  MachineRegisterInfo *MRI = nullptr;
122  const TargetInstrInfo *TII = nullptr;
123  const TargetRegisterInfo *TRI = nullptr;
124  MachineLoopInfo *MLI = nullptr;
125  TargetSchedModel TSchedModel;
126 
127  /// List of consecutive CMOV instructions.
128  using CmovGroup = SmallVector<MachineInstr *, 2>;
129  using CmovGroups = SmallVector<CmovGroup, 2>;
130 
131  /// Collect all CMOV-group-candidates in \p CurrLoop and update \p
132  /// CmovInstGroups accordingly.
133  ///
134  /// \param Blocks List of blocks to process.
135  /// \param CmovInstGroups List of consecutive CMOV instructions in CurrLoop.
136  /// \returns true iff it found any CMOV-group-candidate.
137  bool collectCmovCandidates(ArrayRef<MachineBasicBlock *> Blocks,
138  CmovGroups &CmovInstGroups,
139  bool IncludeLoads = false);
140 
141  /// Check if it is profitable to transform each CMOV-group-candidates into
142  /// branch. Remove all groups that are not profitable from \p CmovInstGroups.
143  ///
144  /// \param Blocks List of blocks to process.
145  /// \param CmovInstGroups List of consecutive CMOV instructions in CurrLoop.
146  /// \returns true iff any CMOV-group-candidate remain.
147  bool checkForProfitableCmovCandidates(ArrayRef<MachineBasicBlock *> Blocks,
148  CmovGroups &CmovInstGroups);
149 
150  /// Convert the given list of consecutive CMOV instructions into a branch.
151  ///
152  /// \param Group Consecutive CMOV instructions to be converted into branch.
153  void convertCmovInstsToBranches(SmallVectorImpl<MachineInstr *> &Group) const;
154 };
155 
156 } // end anonymous namespace
157 
158 char X86CmovConverterPass::ID = 0;
159 
160 void X86CmovConverterPass::getAnalysisUsage(AnalysisUsage &AU) const {
163 }
164 
165 bool X86CmovConverterPass::runOnMachineFunction(MachineFunction &MF) {
166  if (skipFunction(MF.getFunction()))
167  return false;
168  if (!EnableCmovConverter)
169  return false;
170 
171  // If the SelectOptimize pass is enabled, cmovs have already been optimized.
173  return false;
174 
175  LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()
176  << "**********\n");
177 
178  bool Changed = false;
179  MLI = &getAnalysis<MachineLoopInfo>();
180  const TargetSubtargetInfo &STI = MF.getSubtarget();
181  MRI = &MF.getRegInfo();
182  TII = STI.getInstrInfo();
183  TRI = STI.getRegisterInfo();
184  TSchedModel.init(&STI);
185 
186  // Before we handle the more subtle cases of register-register CMOVs inside
187  // of potentially hot loops, we want to quickly remove all CMOVs (ForceAll) or
188  // the ones with a memory operand (ForceMemOperand option). The latter CMOV
189  // will risk a stall waiting for the load to complete that speculative
190  // execution behind a branch is better suited to handle on modern x86 chips.
191  if (ForceMemOperand || ForceAll) {
192  CmovGroups AllCmovGroups;
194  for (auto &MBB : MF)
195  Blocks.push_back(&MBB);
196  if (collectCmovCandidates(Blocks, AllCmovGroups, /*IncludeLoads*/ true)) {
197  for (auto &Group : AllCmovGroups) {
198  // Skip any group that doesn't do at least one memory operand cmov.
199  if (ForceMemOperand && !ForceAll &&
200  llvm::none_of(Group, [&](MachineInstr *I) { return I->mayLoad(); }))
201  continue;
202 
203  // For CMOV groups which we can rewrite and which contain a memory load,
204  // always rewrite them. On x86, a CMOV will dramatically amplify any
205  // memory latency by blocking speculative execution.
206  Changed = true;
207  convertCmovInstsToBranches(Group);
208  }
209  }
210  // Early return as ForceAll converts all CmovGroups.
211  if (ForceAll)
212  return Changed;
213  }
214 
215  //===--------------------------------------------------------------------===//
216  // Register-operand Conversion Algorithm
217  // ---------
218  // For each innermost loop
219  // collectCmovCandidates() {
220  // Find all CMOV-group-candidates.
221  // }
222  //
223  // checkForProfitableCmovCandidates() {
224  // * Calculate both loop-depth and optimized-loop-depth.
225  // * Use these depth to check for loop transformation profitability.
226  // * Check for CMOV-group-candidate transformation profitability.
227  // }
228  //
229  // For each profitable CMOV-group-candidate
230  // convertCmovInstsToBranches() {
231  // * Create FalseBB, SinkBB, Conditional branch to SinkBB.
232  // * Replace each CMOV instruction with a PHI instruction in SinkBB.
233  // }
234  //
235  // Note: For more details, see each function description.
236  //===--------------------------------------------------------------------===//
237 
238  // Build up the loops in pre-order.
239  SmallVector<MachineLoop *, 4> Loops(MLI->begin(), MLI->end());
240  // Note that we need to check size on each iteration as we accumulate child
241  // loops.
242  for (int i = 0; i < (int)Loops.size(); ++i)
243  for (MachineLoop *Child : Loops[i]->getSubLoops())
244  Loops.push_back(Child);
245 
246  for (MachineLoop *CurrLoop : Loops) {
247  // Optimize only innermost loops.
248  if (!CurrLoop->getSubLoops().empty())
249  continue;
250 
251  // List of consecutive CMOV instructions to be processed.
252  CmovGroups CmovInstGroups;
253 
254  if (!collectCmovCandidates(CurrLoop->getBlocks(), CmovInstGroups))
255  continue;
256 
257  if (!checkForProfitableCmovCandidates(CurrLoop->getBlocks(),
258  CmovInstGroups))
259  continue;
260 
261  Changed = true;
262  for (auto &Group : CmovInstGroups)
263  convertCmovInstsToBranches(Group);
264  }
265 
266  return Changed;
267 }
268 
269 bool X86CmovConverterPass::collectCmovCandidates(
270  ArrayRef<MachineBasicBlock *> Blocks, CmovGroups &CmovInstGroups,
271  bool IncludeLoads) {
272  //===--------------------------------------------------------------------===//
273  // Collect all CMOV-group-candidates and add them into CmovInstGroups.
274  //
275  // CMOV-group:
276  // CMOV instructions, in same MBB, that uses same EFLAGS def instruction.
277  //
278  // CMOV-group-candidate:
279  // CMOV-group where all the CMOV instructions are
280  // 1. consecutive.
281  // 2. have same condition code or opposite one.
282  // 3. have only operand registers (X86::CMOVrr).
283  //===--------------------------------------------------------------------===//
284  // List of possible improvement (TODO's):
285  // --------------------------------------
286  // TODO: Add support for X86::CMOVrm instructions.
287  // TODO: Add support for X86::SETcc instructions.
288  // TODO: Add support for CMOV-groups with non consecutive CMOV instructions.
289  //===--------------------------------------------------------------------===//
290 
291  // Current processed CMOV-Group.
292  CmovGroup Group;
293  for (auto *MBB : Blocks) {
294  Group.clear();
295  // Condition code of first CMOV instruction current processed range and its
296  // opposite condition code.
297  X86::CondCode FirstCC = X86::COND_INVALID, FirstOppCC = X86::COND_INVALID,
298  MemOpCC = X86::COND_INVALID;
299  // Indicator of a non CMOVrr instruction in the current processed range.
300  bool FoundNonCMOVInst = false;
301  // Indicator for current processed CMOV-group if it should be skipped.
302  bool SkipGroup = false;
303 
304  for (auto &I : *MBB) {
305  // Skip debug instructions.
306  if (I.isDebugInstr())
307  continue;
309  // Check if we found a X86::CMOVrr instruction.
310  if (CC != X86::COND_INVALID && (IncludeLoads || !I.mayLoad())) {
311  if (Group.empty()) {
312  // We found first CMOV in the range, reset flags.
313  FirstCC = CC;
314  FirstOppCC = X86::GetOppositeBranchCondition(CC);
315  // Clear out the prior group's memory operand CC.
316  MemOpCC = X86::COND_INVALID;
317  FoundNonCMOVInst = false;
318  SkipGroup = false;
319  }
320  Group.push_back(&I);
321  // Check if it is a non-consecutive CMOV instruction or it has different
322  // condition code than FirstCC or FirstOppCC.
323  if (FoundNonCMOVInst || (CC != FirstCC && CC != FirstOppCC))
324  // Mark the SKipGroup indicator to skip current processed CMOV-Group.
325  SkipGroup = true;
326  if (I.mayLoad()) {
327  if (MemOpCC == X86::COND_INVALID)
328  // The first memory operand CMOV.
329  MemOpCC = CC;
330  else if (CC != MemOpCC)
331  // Can't handle mixed conditions with memory operands.
332  SkipGroup = true;
333  }
334  // Check if we were relying on zero-extending behavior of the CMOV.
335  if (!SkipGroup &&
336  llvm::any_of(
337  MRI->use_nodbg_instructions(I.defs().begin()->getReg()),
338  [&](MachineInstr &UseI) {
339  return UseI.getOpcode() == X86::SUBREG_TO_REG;
340  }))
341  // FIXME: We should model the cost of using an explicit MOV to handle
342  // the zero-extension rather than just refusing to handle this.
343  SkipGroup = true;
344  continue;
345  }
346  // If Group is empty, keep looking for first CMOV in the range.
347  if (Group.empty())
348  continue;
349 
350  // We found a non X86::CMOVrr instruction.
351  FoundNonCMOVInst = true;
352  // Check if this instruction define EFLAGS, to determine end of processed
353  // range, as there would be no more instructions using current EFLAGS def.
354  if (I.definesRegister(X86::EFLAGS)) {
355  // Check if current processed CMOV-group should not be skipped and add
356  // it as a CMOV-group-candidate.
357  if (!SkipGroup)
358  CmovInstGroups.push_back(Group);
359  else
360  ++NumOfSkippedCmovGroups;
361  Group.clear();
362  }
363  }
364  // End of basic block is considered end of range, check if current processed
365  // CMOV-group should not be skipped and add it as a CMOV-group-candidate.
366  if (Group.empty())
367  continue;
368  if (!SkipGroup)
369  CmovInstGroups.push_back(Group);
370  else
371  ++NumOfSkippedCmovGroups;
372  }
373 
374  NumOfCmovGroupCandidate += CmovInstGroups.size();
375  return !CmovInstGroups.empty();
376 }
377 
378 /// \returns Depth of CMOV instruction as if it was converted into branch.
379 /// \param TrueOpDepth depth cost of CMOV true value operand.
380 /// \param FalseOpDepth depth cost of CMOV false value operand.
381 static unsigned getDepthOfOptCmov(unsigned TrueOpDepth, unsigned FalseOpDepth) {
382  // The depth of the result after branch conversion is
383  // TrueOpDepth * TrueOpProbability + FalseOpDepth * FalseOpProbability.
384  // As we have no info about branch weight, we assume 75% for one and 25% for
385  // the other, and pick the result with the largest resulting depth.
386  return std::max(
387  divideCeil(TrueOpDepth * 3 + FalseOpDepth, 4),
388  divideCeil(FalseOpDepth * 3 + TrueOpDepth, 4));
389 }
390 
391 bool X86CmovConverterPass::checkForProfitableCmovCandidates(
392  ArrayRef<MachineBasicBlock *> Blocks, CmovGroups &CmovInstGroups) {
393  struct DepthInfo {
394  /// Depth of original loop.
395  unsigned Depth;
396  /// Depth of optimized loop.
397  unsigned OptDepth;
398  };
399  /// Number of loop iterations to calculate depth for ?!
400  static const unsigned LoopIterations = 2;
402  DepthInfo LoopDepth[LoopIterations] = {{0, 0}, {0, 0}};
403  enum { PhyRegType = 0, VirRegType = 1, RegTypeNum = 2 };
404  /// For each register type maps the register to its last def instruction.
405  DenseMap<unsigned, MachineInstr *> RegDefMaps[RegTypeNum];
406  /// Maps register operand to its def instruction, which can be nullptr if it
407  /// is unknown (e.g., operand is defined outside the loop).
409 
410  // Set depth of unknown instruction (i.e., nullptr) to zero.
411  DepthMap[nullptr] = {0, 0};
412 
413  SmallPtrSet<MachineInstr *, 4> CmovInstructions;
414  for (auto &Group : CmovInstGroups)
415  CmovInstructions.insert(Group.begin(), Group.end());
416 
417  //===--------------------------------------------------------------------===//
418  // Step 1: Calculate instruction depth and loop depth.
419  // Optimized-Loop:
420  // loop with CMOV-group-candidates converted into branches.
421  //
422  // Instruction-Depth:
423  // instruction latency + max operand depth.
424  // * For CMOV instruction in optimized loop the depth is calculated as:
425  // CMOV latency + getDepthOfOptCmov(True-Op-Depth, False-Op-depth)
426  // TODO: Find a better way to estimate the latency of the branch instruction
427  // rather than using the CMOV latency.
428  //
429  // Loop-Depth:
430  // max instruction depth of all instructions in the loop.
431  // Note: instruction with max depth represents the critical-path in the loop.
432  //
433  // Loop-Depth[i]:
434  // Loop-Depth calculated for first `i` iterations.
435  // Note: it is enough to calculate depth for up to two iterations.
436  //
437  // Depth-Diff[i]:
438  // Number of cycles saved in first 'i` iterations by optimizing the loop.
439  //===--------------------------------------------------------------------===//
440  for (DepthInfo &MaxDepth : LoopDepth) {
441  for (auto *MBB : Blocks) {
442  // Clear physical registers Def map.
443  RegDefMaps[PhyRegType].clear();
444  for (MachineInstr &MI : *MBB) {
445  // Skip debug instructions.
446  if (MI.isDebugInstr())
447  continue;
448  unsigned MIDepth = 0;
449  unsigned MIDepthOpt = 0;
450  bool IsCMOV = CmovInstructions.count(&MI);
451  for (auto &MO : MI.uses()) {
452  // Checks for "isUse()" as "uses()" returns also implicit definitions.
453  if (!MO.isReg() || !MO.isUse())
454  continue;
455  Register Reg = MO.getReg();
456  auto &RDM = RegDefMaps[Reg.isVirtual()];
457  if (MachineInstr *DefMI = RDM.lookup(Reg)) {
458  OperandToDefMap[&MO] = DefMI;
459  DepthInfo Info = DepthMap.lookup(DefMI);
460  MIDepth = std::max(MIDepth, Info.Depth);
461  if (!IsCMOV)
462  MIDepthOpt = std::max(MIDepthOpt, Info.OptDepth);
463  }
464  }
465 
466  if (IsCMOV)
467  MIDepthOpt = getDepthOfOptCmov(
468  DepthMap[OperandToDefMap.lookup(&MI.getOperand(1))].OptDepth,
469  DepthMap[OperandToDefMap.lookup(&MI.getOperand(2))].OptDepth);
470 
471  // Iterates over all operands to handle implicit definitions as well.
472  for (auto &MO : MI.operands()) {
473  if (!MO.isReg() || !MO.isDef())
474  continue;
475  Register Reg = MO.getReg();
476  RegDefMaps[Reg.isVirtual()][Reg] = &MI;
477  }
478 
479  unsigned Latency = TSchedModel.computeInstrLatency(&MI);
480  DepthMap[&MI] = {MIDepth += Latency, MIDepthOpt += Latency};
481  MaxDepth.Depth = std::max(MaxDepth.Depth, MIDepth);
482  MaxDepth.OptDepth = std::max(MaxDepth.OptDepth, MIDepthOpt);
483  }
484  }
485  }
486 
487  unsigned Diff[LoopIterations] = {LoopDepth[0].Depth - LoopDepth[0].OptDepth,
488  LoopDepth[1].Depth - LoopDepth[1].OptDepth};
489 
490  //===--------------------------------------------------------------------===//
491  // Step 2: Check if Loop worth to be optimized.
492  // Worth-Optimize-Loop:
493  // case 1: Diff[1] == Diff[0]
494  // Critical-path is iteration independent - there is no dependency
495  // of critical-path instructions on critical-path instructions of
496  // previous iteration.
497  // Thus, it is enough to check gain percent of 1st iteration -
498  // To be conservative, the optimized loop need to have a depth of
499  // 12.5% cycles less than original loop, per iteration.
500  //
501  // case 2: Diff[1] > Diff[0]
502  // Critical-path is iteration dependent - there is dependency of
503  // critical-path instructions on critical-path instructions of
504  // previous iteration.
505  // Thus, check the gain percent of the 2nd iteration (similar to the
506  // previous case), but it is also required to check the gradient of
507  // the gain - the change in Depth-Diff compared to the change in
508  // Loop-Depth between 1st and 2nd iterations.
509  // To be conservative, the gradient need to be at least 50%.
510  //
511  // In addition, In order not to optimize loops with very small gain, the
512  // gain (in cycles) after 2nd iteration should not be less than a given
513  // threshold. Thus, the check (Diff[1] >= GainCycleThreshold) must apply.
514  //
515  // If loop is not worth optimizing, remove all CMOV-group-candidates.
516  //===--------------------------------------------------------------------===//
517  if (Diff[1] < GainCycleThreshold)
518  return false;
519 
520  bool WorthOptLoop = false;
521  if (Diff[1] == Diff[0])
522  WorthOptLoop = Diff[0] * 8 >= LoopDepth[0].Depth;
523  else if (Diff[1] > Diff[0])
524  WorthOptLoop =
525  (Diff[1] - Diff[0]) * 2 >= (LoopDepth[1].Depth - LoopDepth[0].Depth) &&
526  (Diff[1] * 8 >= LoopDepth[1].Depth);
527 
528  if (!WorthOptLoop)
529  return false;
530 
531  ++NumOfLoopCandidate;
532 
533  //===--------------------------------------------------------------------===//
534  // Step 3: Check for each CMOV-group-candidate if it worth to be optimized.
535  // Worth-Optimize-Group:
536  // Iff it is worth to optimize all CMOV instructions in the group.
537  //
538  // Worth-Optimize-CMOV:
539  // Predicted branch is faster than CMOV by the difference between depth of
540  // condition operand and depth of taken (predicted) value operand.
541  // To be conservative, the gain of such CMOV transformation should cover at
542  // at least 25% of branch-misprediction-penalty.
543  //===--------------------------------------------------------------------===//
544  unsigned MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty;
545  CmovGroups TempGroups;
546  std::swap(TempGroups, CmovInstGroups);
547  for (auto &Group : TempGroups) {
548  bool WorthOpGroup = true;
549  for (auto *MI : Group) {
550  // Avoid CMOV instruction which value is used as a pointer to load from.
551  // This is another conservative check to avoid converting CMOV instruction
552  // used with tree-search like algorithm, where the branch is unpredicted.
553  auto UIs = MRI->use_instructions(MI->defs().begin()->getReg());
554  if (!UIs.empty() && ++UIs.begin() == UIs.end()) {
555  unsigned Op = UIs.begin()->getOpcode();
556  if (Op == X86::MOV64rm || Op == X86::MOV32rm) {
557  WorthOpGroup = false;
558  break;
559  }
560  }
561 
562  unsigned CondCost =
563  DepthMap[OperandToDefMap.lookup(&MI->getOperand(4))].Depth;
564  unsigned ValCost = getDepthOfOptCmov(
565  DepthMap[OperandToDefMap.lookup(&MI->getOperand(1))].Depth,
566  DepthMap[OperandToDefMap.lookup(&MI->getOperand(2))].Depth);
567  if (ValCost > CondCost || (CondCost - ValCost) * 4 < MispredictPenalty) {
568  WorthOpGroup = false;
569  break;
570  }
571  }
572 
573  if (WorthOpGroup)
574  CmovInstGroups.push_back(Group);
575  }
576 
577  return !CmovInstGroups.empty();
578 }
579 
581  if (MI->killsRegister(X86::EFLAGS))
582  return false;
583 
584  // The EFLAGS operand of MI might be missing a kill marker.
585  // Figure out whether EFLAGS operand should LIVE after MI instruction.
586  MachineBasicBlock *BB = MI->getParent();
588 
589  // Scan forward through BB for a use/def of EFLAGS.
590  for (auto I = std::next(ItrMI), E = BB->end(); I != E; ++I) {
591  if (I->readsRegister(X86::EFLAGS))
592  return true;
593  if (I->definesRegister(X86::EFLAGS))
594  return false;
595  }
596 
597  // We hit the end of the block, check whether EFLAGS is live into a successor.
598  for (MachineBasicBlock *Succ : BB->successors())
599  if (Succ->isLiveIn(X86::EFLAGS))
600  return true;
601 
602  return false;
603 }
604 
605 /// Given /p First CMOV instruction and /p Last CMOV instruction representing a
606 /// group of CMOV instructions, which may contain debug instructions in between,
607 /// move all debug instructions to after the last CMOV instruction, making the
608 /// CMOV group consecutive.
609 static void packCmovGroup(MachineInstr *First, MachineInstr *Last) {
611  "Last instruction in a CMOV group must be a CMOV instruction");
612 
613  SmallVector<MachineInstr *, 2> DBGInstructions;
614  for (auto I = First->getIterator(), E = Last->getIterator(); I != E; I++) {
615  if (I->isDebugInstr())
616  DBGInstructions.push_back(&*I);
617  }
618 
619  // Splice the debug instruction after the cmov group.
620  MachineBasicBlock *MBB = First->getParent();
621  for (auto *MI : DBGInstructions)
622  MBB->insertAfter(Last, MI->removeFromParent());
623 }
624 
625 void X86CmovConverterPass::convertCmovInstsToBranches(
626  SmallVectorImpl<MachineInstr *> &Group) const {
627  assert(!Group.empty() && "No CMOV instructions to convert");
628  ++NumOfOptimizedCmovGroups;
629 
630  // If the CMOV group is not packed, e.g., there are debug instructions between
631  // first CMOV and last CMOV, then pack the group and make the CMOV instruction
632  // consecutive by moving the debug instructions to after the last CMOV.
633  packCmovGroup(Group.front(), Group.back());
634 
635  // To convert a CMOVcc instruction, we actually have to insert the diamond
636  // control-flow pattern. The incoming instruction knows the destination vreg
637  // to set, the condition code register to branch on, the true/false values to
638  // select between, and a branch opcode to use.
639 
640  // Before
641  // -----
642  // MBB:
643  // cond = cmp ...
644  // v1 = CMOVge t1, f1, cond
645  // v2 = CMOVlt t2, f2, cond
646  // v3 = CMOVge v1, f3, cond
647  //
648  // After
649  // -----
650  // MBB:
651  // cond = cmp ...
652  // jge %SinkMBB
653  //
654  // FalseMBB:
655  // jmp %SinkMBB
656  //
657  // SinkMBB:
658  // %v1 = phi[%f1, %FalseMBB], [%t1, %MBB]
659  // %v2 = phi[%t2, %FalseMBB], [%f2, %MBB] ; For CMOV with OppCC switch
660  // ; true-value with false-value
661  // %v3 = phi[%f3, %FalseMBB], [%t1, %MBB] ; Phi instruction cannot use
662  // ; previous Phi instruction result
663 
664  MachineInstr &MI = *Group.front();
665  MachineInstr *LastCMOV = Group.back();
666  DebugLoc DL = MI.getDebugLoc();
667 
670  // Potentially swap the condition codes so that any memory operand to a CMOV
671  // is in the *false* position instead of the *true* position. We can invert
672  // any non-memory operand CMOV instructions to cope with this and we ensure
673  // memory operand CMOVs are only included with a single condition code.
674  if (llvm::any_of(Group, [&](MachineInstr *I) {
675  return I->mayLoad() && X86::getCondFromCMov(*I) == CC;
676  }))
677  std::swap(CC, OppCC);
678 
679  MachineBasicBlock *MBB = MI.getParent();
682  const BasicBlock *BB = MBB->getBasicBlock();
683 
684  MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(BB);
685  MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(BB);
686  F->insert(It, FalseMBB);
687  F->insert(It, SinkMBB);
688 
689  // If the EFLAGS register isn't dead in the terminator, then claim that it's
690  // live into the sink and copy blocks.
691  if (checkEFLAGSLive(LastCMOV)) {
692  FalseMBB->addLiveIn(X86::EFLAGS);
693  SinkMBB->addLiveIn(X86::EFLAGS);
694  }
695 
696  // Transfer the remainder of BB and its successor edges to SinkMBB.
697  SinkMBB->splice(SinkMBB->begin(), MBB,
698  std::next(MachineBasicBlock::iterator(LastCMOV)), MBB->end());
700 
701  // Add the false and sink blocks as its successors.
702  MBB->addSuccessor(FalseMBB);
703  MBB->addSuccessor(SinkMBB);
704 
705  // Create the conditional branch instruction.
706  BuildMI(MBB, DL, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(CC);
707 
708  // Add the sink block to the false block successors.
709  FalseMBB->addSuccessor(SinkMBB);
710 
714  std::next(MachineBasicBlock::iterator(LastCMOV));
715  MachineBasicBlock::iterator FalseInsertionPoint = FalseMBB->begin();
716  MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();
717 
718  // First we need to insert an explicit load on the false path for any memory
719  // operand. We also need to potentially do register rewriting here, but it is
720  // simpler as the memory operands are always on the false path so we can
721  // simply take that input, whatever it is.
722  DenseMap<unsigned, unsigned> FalseBBRegRewriteTable;
723  for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd;) {
724  auto &MI = *MIIt++;
725  // Skip any CMOVs in this group which don't load from memory.
726  if (!MI.mayLoad()) {
727  // Remember the false-side register input.
728  Register FalseReg =
729  MI.getOperand(X86::getCondFromCMov(MI) == CC ? 1 : 2).getReg();
730  // Walk back through any intermediate cmovs referenced.
731  while (true) {
732  auto FRIt = FalseBBRegRewriteTable.find(FalseReg);
733  if (FRIt == FalseBBRegRewriteTable.end())
734  break;
735  FalseReg = FRIt->second;
736  }
737  FalseBBRegRewriteTable[MI.getOperand(0).getReg()] = FalseReg;
738  continue;
739  }
740 
741  // The condition must be the *opposite* of the one we've decided to branch
742  // on as the branch will go *around* the load and the load should happen
743  // when the CMOV condition is false.
744  assert(X86::getCondFromCMov(MI) == OppCC &&
745  "Can only handle memory-operand cmov instructions with a condition "
746  "opposite to the selected branch direction.");
747 
748  // The goal is to rewrite the cmov from:
749  //
750  // MBB:
751  // %A = CMOVcc %B (tied), (mem)
752  //
753  // to
754  //
755  // MBB:
756  // %A = CMOVcc %B (tied), %C
757  // FalseMBB:
758  // %C = MOV (mem)
759  //
760  // Which will allow the next loop to rewrite the CMOV in terms of a PHI:
761  //
762  // MBB:
763  // JMP!cc SinkMBB
764  // FalseMBB:
765  // %C = MOV (mem)
766  // SinkMBB:
767  // %A = PHI [ %C, FalseMBB ], [ %B, MBB]
768 
769  // Get a fresh register to use as the destination of the MOV.
770  const TargetRegisterClass *RC = MRI->getRegClass(MI.getOperand(0).getReg());
771  Register TmpReg = MRI->createVirtualRegister(RC);
772 
774  bool Unfolded = TII->unfoldMemoryOperand(*MBB->getParent(), MI, TmpReg,
775  /*UnfoldLoad*/ true,
776  /*UnfoldStore*/ false, NewMIs);
777  (void)Unfolded;
778  assert(Unfolded && "Should never fail to unfold a loading cmov!");
779 
780  // Move the new CMOV to just before the old one and reset any impacted
781  // iterator.
782  auto *NewCMOV = NewMIs.pop_back_val();
783  assert(X86::getCondFromCMov(*NewCMOV) == OppCC &&
784  "Last new instruction isn't the expected CMOV!");
785  LLVM_DEBUG(dbgs() << "\tRewritten cmov: "; NewCMOV->dump());
787  if (&*MIItBegin == &MI)
788  MIItBegin = MachineBasicBlock::iterator(NewCMOV);
789 
790  // Sink whatever instructions were needed to produce the unfolded operand
791  // into the false block.
792  for (auto *NewMI : NewMIs) {
793  LLVM_DEBUG(dbgs() << "\tRewritten load instr: "; NewMI->dump());
794  FalseMBB->insert(FalseInsertionPoint, NewMI);
795  // Re-map any operands that are from other cmovs to the inputs for this block.
796  for (auto &MOp : NewMI->uses()) {
797  if (!MOp.isReg())
798  continue;
799  auto It = FalseBBRegRewriteTable.find(MOp.getReg());
800  if (It == FalseBBRegRewriteTable.end())
801  continue;
802 
803  MOp.setReg(It->second);
804  // This might have been a kill when it referenced the cmov result, but
805  // it won't necessarily be once rewritten.
806  // FIXME: We could potentially improve this by tracking whether the
807  // operand to the cmov was also a kill, and then skipping the PHI node
808  // construction below.
809  MOp.setIsKill(false);
810  }
811  }
812  MBB->erase(&MI);
813 
814  // Add this PHI to the rewrite table.
815  FalseBBRegRewriteTable[NewCMOV->getOperand(0).getReg()] = TmpReg;
816  }
817 
818  // As we are creating the PHIs, we have to be careful if there is more than
819  // one. Later CMOVs may reference the results of earlier CMOVs, but later
820  // PHIs have to reference the individual true/false inputs from earlier PHIs.
821  // That also means that PHI construction must work forward from earlier to
822  // later, and that the code must maintain a mapping from earlier PHI's
823  // destination registers, and the registers that went into the PHI.
825 
826  for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ++MIIt) {
827  Register DestReg = MIIt->getOperand(0).getReg();
828  Register Op1Reg = MIIt->getOperand(1).getReg();
829  Register Op2Reg = MIIt->getOperand(2).getReg();
830 
831  // If this CMOV we are processing is the opposite condition from the jump we
832  // generated, then we have to swap the operands for the PHI that is going to
833  // be generated.
834  if (X86::getCondFromCMov(*MIIt) == OppCC)
835  std::swap(Op1Reg, Op2Reg);
836 
837  auto Op1Itr = RegRewriteTable.find(Op1Reg);
838  if (Op1Itr != RegRewriteTable.end())
839  Op1Reg = Op1Itr->second.first;
840 
841  auto Op2Itr = RegRewriteTable.find(Op2Reg);
842  if (Op2Itr != RegRewriteTable.end())
843  Op2Reg = Op2Itr->second.second;
844 
845  // SinkMBB:
846  // %Result = phi [ %FalseValue, FalseMBB ], [ %TrueValue, MBB ]
847  // ...
848  MIB = BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(X86::PHI), DestReg)
849  .addReg(Op1Reg)
850  .addMBB(FalseMBB)
851  .addReg(Op2Reg)
852  .addMBB(MBB);
853  (void)MIB;
854  LLVM_DEBUG(dbgs() << "\tFrom: "; MIIt->dump());
855  LLVM_DEBUG(dbgs() << "\tTo: "; MIB->dump());
856 
857  // Add this PHI to the rewrite table.
858  RegRewriteTable[DestReg] = std::make_pair(Op1Reg, Op2Reg);
859  }
860 
861  // Now remove the CMOV(s).
862  MBB->erase(MIItBegin, MIItEnd);
863 
864  // Add new basic blocks to MachineLoopInfo.
865  if (MachineLoop *L = MLI->getLoopFor(MBB)) {
866  L->addBasicBlockToLoop(FalseMBB, MLI->getBase());
867  L->addBasicBlockToLoop(SinkMBB, MLI->getBase());
868  }
869 }
870 
871 INITIALIZE_PASS_BEGIN(X86CmovConverterPass, DEBUG_TYPE, "X86 cmov Conversion",
872  false, false)
874 INITIALIZE_PASS_END(X86CmovConverterPass, DEBUG_TYPE, "X86 cmov Conversion",
876 
878  return new X86CmovConverterPass();
879 }
i
i
Definition: README.txt:29
packCmovGroup
static void packCmovGroup(MachineInstr *First, MachineInstr *Last)
Given /p First CMOV instruction and /p Last CMOV instruction representing a group of CMOV instruction...
Definition: X86CmovConversion.cpp:609
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:108
MachineInstr.h
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::none_of
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1604
llvm::MachineBasicBlock::getBasicBlock
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
Definition: MachineBasicBlock.h:209
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:156
PHI
Rewrite undef for PHI
Definition: AMDGPURewriteUndefForPHI.cpp:101
llvm::Latency
@ Latency
Definition: SIMachineScheduler.h:34
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:50
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
Pass.h
llvm::TargetSubtargetInfo::getInstrInfo
virtual const TargetInstrInfo * getInstrInfo() const
Definition: TargetSubtargetInfo.h:93
DisableSelectOptimize
static cl::opt< bool > DisableSelectOptimize("disable-select-optimize", cl::init(true), cl::Hidden, cl::desc("Disable the select-optimization pass from running"))
Disable the select optimization pass.
Loops
Hexagon Hardware Loops
Definition: HexagonHardwareLoops.cpp:372
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1181
Statistic.h
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
CGPassBuilderOption.h
checkEFLAGSLive
static bool checkEFLAGSLive(MachineInstr *MI)
Definition: X86CmovConversion.cpp:580
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
MachineBasicBlock.h
llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Definition: TargetSubtargetInfo.h:125
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::MachineRegisterInfo::use_nodbg_instructions
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
Definition: MachineRegisterInfo.h:551
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:237
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:36
DenseMap.h
llvm::MachineRegisterInfo::use_instructions
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
Definition: MachineRegisterInfo.h:493
TargetInstrInfo.h
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
getDepthOfOptCmov
static unsigned getDepthOfOptCmov(unsigned TrueOpDepth, unsigned FalseOpDepth)
Definition: X86CmovConversion.cpp:381
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
ForceAll
static cl::opt< bool > ForceAll("x86-cmov-converter-force-all", cl::desc("Convert all cmovs to branches."), cl::init(false), cl::Hidden)
STLExtras.h
llvm::X86::CondCode
CondCode
Definition: X86BaseInfo.h:80
llvm::X86::COND_INVALID
@ COND_INVALID
Definition: X86BaseInfo.h:107
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1628
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:167
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::MachineLoopInfo
Definition: MachineLoopInfo.h:89
MachineRegisterInfo.h
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::MachineBasicBlock::erase
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
Definition: MachineBasicBlock.cpp:1314
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::X86::getCondFromCMov
CondCode getCondFromCMov(const MachineInstr &MI)
Definition: X86InstrInfo.cpp:2744
llvm::MachineBasicBlock::addSuccessor
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Definition: MachineBasicBlock.cpp:762
CommandLine.h
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:666
X86.h
llvm::MachineBasicBlock::insertAfter
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
Definition: MachineBasicBlock.h:930
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:98
MachineLoopInfo.h
llvm::MachineInstrBuilder::addMBB
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:146
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
ForceMemOperand
static cl::opt< bool > ForceMemOperand("x86-cmov-converter-force-mem-operand", cl::desc("Convert cmovs to branches whenever they have memory operands."), cl::init(true), cl::Hidden)
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
DEBUG_TYPE
#define DEBUG_TYPE
Definition: X86CmovConversion.cpp:78
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
false
Definition: StackSlotColoring.cpp:141
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125
First
into llvm powi allowing the code generator to produce balanced multiplication trees First
Definition: README.txt:54
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
DebugLoc.h
SmallPtrSet.h
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:647
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:656
llvm::cl::opt< bool >
llvm::divideCeil
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:683
llvm::MachineLoop
Definition: MachineLoopInfo.h:44
TargetSchedule.h
MCSchedule.h
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::clear
void clear()
Definition: DenseMap.h:110
llvm::TargetSchedModel
Provide an instruction scheduling machine model to CodeGen passes.
Definition: TargetSchedule.h:30
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::DenseMap
Definition: DenseMap.h:714
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:440
ArrayRef.h
MachineFunctionPass.h
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::MachineFunction::getName
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Definition: MachineFunction.cpp:567
EnableCmovConverter
static cl::opt< bool > EnableCmovConverter("x86-cmov-converter", cl::desc("Enable the X86 cmov-to-branch optimization."), cl::init(true), cl::Hidden)
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:261
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::X86::GetOppositeBranchCondition
CondCode GetOppositeBranchCondition(CondCode CC)
GetOppositeBranchCondition - Return the inverse of the specified cond, e.g.
Definition: X86InstrInfo.cpp:2751
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::MachineInstr::dump
void dump() const
Definition: MachineInstr.cpp:1518
GainCycleThreshold
static cl::opt< unsigned > GainCycleThreshold("x86-cmov-converter-threshold", cl::desc("Minimum gain per loop (in cycles) threshold."), cl::init(4), cl::Hidden)
llvm::MachineBasicBlock::iterator
MachineInstrBundleIterator< MachineInstr > iterator
Definition: MachineBasicBlock.h:269
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1597
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm::MachineBasicBlock::splice
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Definition: MachineBasicBlock.h:1009
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
TargetSubtargetInfo.h
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
CC
auto CC
Definition: RISCVRedundantCopyElimination.cpp:79
llvm::getCGPassBuilderOption
CGPassBuilderOption getCGPassBuilderOption()
Definition: TargetPassConfig.cpp:474
llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:60
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
MaxDepth
static const unsigned MaxDepth
Definition: InstCombineMulDivRem.cpp:968
llvm::MachineBasicBlock::addLiveIn
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
Definition: MachineBasicBlock.h:404
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:622
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:84
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:348
llvm::MachineBasicBlock::insert
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
Definition: MachineBasicBlock.cpp:1327
llvm::ilist_iterator
Iterator for intrusive lists based on ilist_node.
Definition: ilist_iterator.h:57
Conversion
X86 cmov Conversion
Definition: X86CmovConversion.cpp:874
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:357
SmallVector.h
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:305
MachineInstrBuilder.h
llvm::createX86CmovConverterPass
FunctionPass * createX86CmovConverterPass()
This pass converts X86 cmov instructions into branch when profitable.
Definition: X86CmovConversion.cpp:877
DefMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Definition: AArch64ExpandPseudoInsts.cpp:106
llvm::SmallVectorImpl::pop_back_val
T pop_back_val()
Definition: SmallVector.h:659
llvm::SmallVectorImpl< MachineInstr * >
MachineOperand.h
llvm::MachineBasicBlock::transferSuccessorsAndUpdatePHIs
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
Definition: MachineBasicBlock.cpp:901
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::cl::desc
Definition: CommandLine.h:413
raw_ostream.h
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(X86CmovConverterPass, DEBUG_TYPE, "X86 cmov Conversion", false, false) INITIALIZE_PASS_END(X86CmovConverterPass
MachineFunction.h
X86InstrInfo.h
llvm::MachineInstrBundleIterator< MachineInstr >
InitializePasses.h
TargetRegisterInfo.h
Debug.h
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:307
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38