LLVM 19.0.0git
IfConversion.cpp
Go to the documentation of this file.
1//===- IfConversion.cpp - Machine code if conversion pass -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the machine instruction level if-conversion pass, which
10// tries to convert conditional branches into predicated instructions.
11//
12//===----------------------------------------------------------------------===//
13
14#include "BranchFolding.h"
15#include "llvm/ADT/STLExtras.h"
16#include "llvm/ADT/ScopeExit.h"
17#include "llvm/ADT/SmallSet.h"
19#include "llvm/ADT/SparseSet.h"
20#include "llvm/ADT/Statistic.h"
39#include "llvm/IR/DebugLoc.h"
42#include "llvm/Pass.h"
45#include "llvm/Support/Debug.h"
48#include <algorithm>
49#include <cassert>
50#include <functional>
51#include <iterator>
52#include <memory>
53#include <utility>
54#include <vector>
55
56using namespace llvm;
57
58#define DEBUG_TYPE "if-converter"
59
60// Hidden options for help debugging.
61static cl::opt<int> IfCvtFnStart("ifcvt-fn-start", cl::init(-1), cl::Hidden);
62static cl::opt<int> IfCvtFnStop("ifcvt-fn-stop", cl::init(-1), cl::Hidden);
63static cl::opt<int> IfCvtLimit("ifcvt-limit", cl::init(-1), cl::Hidden);
64static cl::opt<bool> DisableSimple("disable-ifcvt-simple",
65 cl::init(false), cl::Hidden);
66static cl::opt<bool> DisableSimpleF("disable-ifcvt-simple-false",
67 cl::init(false), cl::Hidden);
68static cl::opt<bool> DisableTriangle("disable-ifcvt-triangle",
69 cl::init(false), cl::Hidden);
70static cl::opt<bool> DisableTriangleR("disable-ifcvt-triangle-rev",
71 cl::init(false), cl::Hidden);
72static cl::opt<bool> DisableTriangleF("disable-ifcvt-triangle-false",
73 cl::init(false), cl::Hidden);
74static cl::opt<bool> DisableDiamond("disable-ifcvt-diamond",
75 cl::init(false), cl::Hidden);
76static cl::opt<bool> DisableForkedDiamond("disable-ifcvt-forked-diamond",
77 cl::init(false), cl::Hidden);
78static cl::opt<bool> IfCvtBranchFold("ifcvt-branch-fold",
79 cl::init(true), cl::Hidden);
80
81STATISTIC(NumSimple, "Number of simple if-conversions performed");
82STATISTIC(NumSimpleFalse, "Number of simple (F) if-conversions performed");
83STATISTIC(NumTriangle, "Number of triangle if-conversions performed");
84STATISTIC(NumTriangleRev, "Number of triangle (R) if-conversions performed");
85STATISTIC(NumTriangleFalse,"Number of triangle (F) if-conversions performed");
86STATISTIC(NumTriangleFRev, "Number of triangle (F/R) if-conversions performed");
87STATISTIC(NumDiamonds, "Number of diamond if-conversions performed");
88STATISTIC(NumForkedDiamonds, "Number of forked-diamond if-conversions performed");
89STATISTIC(NumIfConvBBs, "Number of if-converted blocks");
90STATISTIC(NumDupBBs, "Number of duplicated blocks");
91STATISTIC(NumUnpred, "Number of true blocks of diamonds unpredicated");
92
93namespace {
94
95 class IfConverter : public MachineFunctionPass {
96 enum IfcvtKind {
97 ICNotClassfied, // BB data valid, but not classified.
98 ICSimpleFalse, // Same as ICSimple, but on the false path.
99 ICSimple, // BB is entry of an one split, no rejoin sub-CFG.
100 ICTriangleFRev, // Same as ICTriangleFalse, but false path rev condition.
101 ICTriangleRev, // Same as ICTriangle, but true path rev condition.
102 ICTriangleFalse, // Same as ICTriangle, but on the false path.
103 ICTriangle, // BB is entry of a triangle sub-CFG.
104 ICDiamond, // BB is entry of a diamond sub-CFG.
105 ICForkedDiamond // BB is entry of an almost diamond sub-CFG, with a
106 // common tail that can be shared.
107 };
108
109 /// One per MachineBasicBlock, this is used to cache the result
110 /// if-conversion feasibility analysis. This includes results from
111 /// TargetInstrInfo::analyzeBranch() (i.e. TBB, FBB, and Cond), and its
112 /// classification, and common tail block of its successors (if it's a
113 /// diamond shape), its size, whether it's predicable, and whether any
114 /// instruction can clobber the 'would-be' predicate.
115 ///
116 /// IsDone - True if BB is not to be considered for ifcvt.
117 /// IsBeingAnalyzed - True if BB is currently being analyzed.
118 /// IsAnalyzed - True if BB has been analyzed (info is still valid).
119 /// IsEnqueued - True if BB has been enqueued to be ifcvt'ed.
120 /// IsBrAnalyzable - True if analyzeBranch() returns false.
121 /// HasFallThrough - True if BB may fallthrough to the following BB.
122 /// IsUnpredicable - True if BB is known to be unpredicable.
123 /// ClobbersPred - True if BB could modify predicates (e.g. has
124 /// cmp, call, etc.)
125 /// NonPredSize - Number of non-predicated instructions.
126 /// ExtraCost - Extra cost for multi-cycle instructions.
127 /// ExtraCost2 - Some instructions are slower when predicated
128 /// BB - Corresponding MachineBasicBlock.
129 /// TrueBB / FalseBB- See analyzeBranch().
130 /// BrCond - Conditions for end of block conditional branches.
131 /// Predicate - Predicate used in the BB.
132 struct BBInfo {
133 bool IsDone : 1;
134 bool IsBeingAnalyzed : 1;
135 bool IsAnalyzed : 1;
136 bool IsEnqueued : 1;
137 bool IsBrAnalyzable : 1;
138 bool IsBrReversible : 1;
139 bool HasFallThrough : 1;
140 bool IsUnpredicable : 1;
141 bool CannotBeCopied : 1;
142 bool ClobbersPred : 1;
143 unsigned NonPredSize = 0;
144 unsigned ExtraCost = 0;
145 unsigned ExtraCost2 = 0;
146 MachineBasicBlock *BB = nullptr;
147 MachineBasicBlock *TrueBB = nullptr;
148 MachineBasicBlock *FalseBB = nullptr;
151
152 BBInfo() : IsDone(false), IsBeingAnalyzed(false),
153 IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false),
154 IsBrReversible(false), HasFallThrough(false),
155 IsUnpredicable(false), CannotBeCopied(false),
156 ClobbersPred(false) {}
157 };
158
159 /// Record information about pending if-conversions to attempt:
160 /// BBI - Corresponding BBInfo.
161 /// Kind - Type of block. See IfcvtKind.
162 /// NeedSubsumption - True if the to-be-predicated BB has already been
163 /// predicated.
164 /// NumDups - Number of instructions that would be duplicated due
165 /// to this if-conversion. (For diamonds, the number of
166 /// identical instructions at the beginnings of both
167 /// paths).
168 /// NumDups2 - For diamonds, the number of identical instructions
169 /// at the ends of both paths.
170 struct IfcvtToken {
171 BBInfo &BBI;
172 IfcvtKind Kind;
173 unsigned NumDups;
174 unsigned NumDups2;
175 bool NeedSubsumption : 1;
176 bool TClobbersPred : 1;
177 bool FClobbersPred : 1;
178
179 IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0,
180 bool tc = false, bool fc = false)
181 : BBI(b), Kind(k), NumDups(d), NumDups2(d2), NeedSubsumption(s),
182 TClobbersPred(tc), FClobbersPred(fc) {}
183 };
184
185 /// Results of if-conversion feasibility analysis indexed by basic block
186 /// number.
187 std::vector<BBInfo> BBAnalysis;
188 TargetSchedModel SchedModel;
189
190 const TargetLoweringBase *TLI = nullptr;
191 const TargetInstrInfo *TII = nullptr;
192 const TargetRegisterInfo *TRI = nullptr;
193 const MachineBranchProbabilityInfo *MBPI = nullptr;
194 MachineRegisterInfo *MRI = nullptr;
195
196 LivePhysRegs Redefs;
197
198 bool PreRegAlloc = true;
199 bool MadeChange = false;
200 int FnNum = -1;
201 std::function<bool(const MachineFunction &)> PredicateFtor;
202
203 public:
204 static char ID;
205
206 IfConverter(std::function<bool(const MachineFunction &)> Ftor = nullptr)
207 : MachineFunctionPass(ID), PredicateFtor(std::move(Ftor)) {
209 }
210
211 void getAnalysisUsage(AnalysisUsage &AU) const override {
216 }
217
218 bool runOnMachineFunction(MachineFunction &MF) override;
219
222 MachineFunctionProperties::Property::NoVRegs);
223 }
224
225 private:
226 bool reverseBranchCondition(BBInfo &BBI) const;
227 bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
228 BranchProbability Prediction) const;
229 bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
230 bool FalseBranch, unsigned &Dups,
231 BranchProbability Prediction) const;
232 bool CountDuplicatedInstructions(
235 unsigned &Dups1, unsigned &Dups2,
237 bool SkipUnconditionalBranches) const;
238 bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
239 unsigned &Dups1, unsigned &Dups2,
240 BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const;
241 bool ValidForkedDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
242 unsigned &Dups1, unsigned &Dups2,
243 BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const;
244 void AnalyzeBranches(BBInfo &BBI);
245 void ScanInstructions(BBInfo &BBI,
248 bool BranchUnpredicable = false) const;
249 bool RescanInstructions(
252 BBInfo &TrueBBI, BBInfo &FalseBBI) const;
253 void AnalyzeBlock(MachineBasicBlock &MBB,
254 std::vector<std::unique_ptr<IfcvtToken>> &Tokens);
255 bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl<MachineOperand> &Pred,
256 bool isTriangle = false, bool RevBranch = false,
257 bool hasCommonTail = false);
258 void AnalyzeBlocks(MachineFunction &MF,
259 std::vector<std::unique_ptr<IfcvtToken>> &Tokens);
260 void InvalidatePreds(MachineBasicBlock &MBB);
261 bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind);
262 bool IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind);
263 bool IfConvertDiamondCommon(BBInfo &BBI, BBInfo &TrueBBI, BBInfo &FalseBBI,
264 unsigned NumDups1, unsigned NumDups2,
265 bool TClobbersPred, bool FClobbersPred,
266 bool RemoveBranch, bool MergeAddEdges);
267 bool IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
268 unsigned NumDups1, unsigned NumDups2,
269 bool TClobbers, bool FClobbers);
270 bool IfConvertForkedDiamond(BBInfo &BBI, IfcvtKind Kind,
271 unsigned NumDups1, unsigned NumDups2,
272 bool TClobbers, bool FClobbers);
273 void PredicateBlock(BBInfo &BBI,
276 SmallSet<MCPhysReg, 4> *LaterRedefs = nullptr);
277 void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
279 bool IgnoreBr = false);
280 void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges = true);
281
282 bool MeetIfcvtSizeLimit(MachineBasicBlock &BB,
283 unsigned Cycle, unsigned Extra,
284 BranchProbability Prediction) const {
285 return Cycle > 0 && TII->isProfitableToIfCvt(BB, Cycle, Extra,
286 Prediction);
287 }
288
289 bool MeetIfcvtSizeLimit(BBInfo &TBBInfo, BBInfo &FBBInfo,
290 MachineBasicBlock &CommBB, unsigned Dups,
291 BranchProbability Prediction, bool Forked) const {
292 const MachineFunction &MF = *TBBInfo.BB->getParent();
293 if (MF.getFunction().hasMinSize()) {
294 MachineBasicBlock::iterator TIB = TBBInfo.BB->begin();
295 MachineBasicBlock::iterator FIB = FBBInfo.BB->begin();
296 MachineBasicBlock::iterator TIE = TBBInfo.BB->end();
297 MachineBasicBlock::iterator FIE = FBBInfo.BB->end();
298
299 unsigned Dups1 = 0, Dups2 = 0;
300 if (!CountDuplicatedInstructions(TIB, FIB, TIE, FIE, Dups1, Dups2,
301 *TBBInfo.BB, *FBBInfo.BB,
302 /*SkipUnconditionalBranches*/ true))
303 llvm_unreachable("should already have been checked by ValidDiamond");
304
305 unsigned BranchBytes = 0;
306 unsigned CommonBytes = 0;
307
308 // Count common instructions at the start of the true and false blocks.
309 for (auto &I : make_range(TBBInfo.BB->begin(), TIB)) {
310 LLVM_DEBUG(dbgs() << "Common inst: " << I);
311 CommonBytes += TII->getInstSizeInBytes(I);
312 }
313 for (auto &I : make_range(FBBInfo.BB->begin(), FIB)) {
314 LLVM_DEBUG(dbgs() << "Common inst: " << I);
315 CommonBytes += TII->getInstSizeInBytes(I);
316 }
317
318 // Count instructions at the end of the true and false blocks, after
319 // the ones we plan to predicate. Analyzable branches will be removed
320 // (unless this is a forked diamond), and all other instructions are
321 // common between the two blocks.
322 for (auto &I : make_range(TIE, TBBInfo.BB->end())) {
323 if (I.isBranch() && TBBInfo.IsBrAnalyzable && !Forked) {
324 LLVM_DEBUG(dbgs() << "Saving branch: " << I);
325 BranchBytes += TII->predictBranchSizeForIfCvt(I);
326 } else {
327 LLVM_DEBUG(dbgs() << "Common inst: " << I);
328 CommonBytes += TII->getInstSizeInBytes(I);
329 }
330 }
331 for (auto &I : make_range(FIE, FBBInfo.BB->end())) {
332 if (I.isBranch() && FBBInfo.IsBrAnalyzable && !Forked) {
333 LLVM_DEBUG(dbgs() << "Saving branch: " << I);
334 BranchBytes += TII->predictBranchSizeForIfCvt(I);
335 } else {
336 LLVM_DEBUG(dbgs() << "Common inst: " << I);
337 CommonBytes += TII->getInstSizeInBytes(I);
338 }
339 }
340 for (auto &I : CommBB.terminators()) {
341 if (I.isBranch()) {
342 LLVM_DEBUG(dbgs() << "Saving branch: " << I);
343 BranchBytes += TII->predictBranchSizeForIfCvt(I);
344 }
345 }
346
347 // The common instructions in one branch will be eliminated, halving
348 // their code size.
349 CommonBytes /= 2;
350
351 // Count the instructions which we need to predicate.
352 unsigned NumPredicatedInstructions = 0;
353 for (auto &I : make_range(TIB, TIE)) {
354 if (!I.isDebugInstr()) {
355 LLVM_DEBUG(dbgs() << "Predicating: " << I);
356 NumPredicatedInstructions++;
357 }
358 }
359 for (auto &I : make_range(FIB, FIE)) {
360 if (!I.isDebugInstr()) {
361 LLVM_DEBUG(dbgs() << "Predicating: " << I);
362 NumPredicatedInstructions++;
363 }
364 }
365
366 // Even though we're optimising for size at the expense of performance,
367 // avoid creating really long predicated blocks.
368 if (NumPredicatedInstructions > 15)
369 return false;
370
371 // Some targets (e.g. Thumb2) need to insert extra instructions to
372 // start predicated blocks.
373 unsigned ExtraPredicateBytes = TII->extraSizeToPredicateInstructions(
374 MF, NumPredicatedInstructions);
375
376 LLVM_DEBUG(dbgs() << "MeetIfcvtSizeLimit(BranchBytes=" << BranchBytes
377 << ", CommonBytes=" << CommonBytes
378 << ", NumPredicatedInstructions="
379 << NumPredicatedInstructions
380 << ", ExtraPredicateBytes=" << ExtraPredicateBytes
381 << ")\n");
382 return (BranchBytes + CommonBytes) > ExtraPredicateBytes;
383 } else {
384 unsigned TCycle = TBBInfo.NonPredSize + TBBInfo.ExtraCost - Dups;
385 unsigned FCycle = FBBInfo.NonPredSize + FBBInfo.ExtraCost - Dups;
386 bool Res = TCycle > 0 && FCycle > 0 &&
388 *TBBInfo.BB, TCycle, TBBInfo.ExtraCost2, *FBBInfo.BB,
389 FCycle, FBBInfo.ExtraCost2, Prediction);
390 LLVM_DEBUG(dbgs() << "MeetIfcvtSizeLimit(TCycle=" << TCycle
391 << ", FCycle=" << FCycle
392 << ", TExtra=" << TBBInfo.ExtraCost2 << ", FExtra="
393 << FBBInfo.ExtraCost2 << ") = " << Res << "\n");
394 return Res;
395 }
396 }
397
398 /// Returns true if Block ends without a terminator.
399 bool blockAlwaysFallThrough(BBInfo &BBI) const {
400 return BBI.IsBrAnalyzable && BBI.TrueBB == nullptr;
401 }
402
403 /// Used to sort if-conversion candidates.
404 static bool IfcvtTokenCmp(const std::unique_ptr<IfcvtToken> &C1,
405 const std::unique_ptr<IfcvtToken> &C2) {
406 int Incr1 = (C1->Kind == ICDiamond)
407 ? -(int)(C1->NumDups + C1->NumDups2) : (int)C1->NumDups;
408 int Incr2 = (C2->Kind == ICDiamond)
409 ? -(int)(C2->NumDups + C2->NumDups2) : (int)C2->NumDups;
410 if (Incr1 > Incr2)
411 return true;
412 else if (Incr1 == Incr2) {
413 // Favors subsumption.
414 if (!C1->NeedSubsumption && C2->NeedSubsumption)
415 return true;
416 else if (C1->NeedSubsumption == C2->NeedSubsumption) {
417 // Favors diamond over triangle, etc.
418 if ((unsigned)C1->Kind < (unsigned)C2->Kind)
419 return true;
420 else if (C1->Kind == C2->Kind)
421 return C1->BBI.BB->getNumber() < C2->BBI.BB->getNumber();
422 }
423 }
424 return false;
425 }
426 };
427
428} // end anonymous namespace
429
430char IfConverter::ID = 0;
431
432char &llvm::IfConverterID = IfConverter::ID;
433
434INITIALIZE_PASS_BEGIN(IfConverter, DEBUG_TYPE, "If Converter", false, false)
438
439bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
440 if (skipFunction(MF.getFunction()) || (PredicateFtor && !PredicateFtor(MF)))
441 return false;
442
443 const TargetSubtargetInfo &ST = MF.getSubtarget();
444 TLI = ST.getTargetLowering();
445 TII = ST.getInstrInfo();
446 TRI = ST.getRegisterInfo();
447 MBFIWrapper MBFI(getAnalysis<MachineBlockFrequencyInfo>());
448 MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
449 ProfileSummaryInfo *PSI =
450 &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
451 MRI = &MF.getRegInfo();
452 SchedModel.init(&ST);
453
454 if (!TII) return false;
455
456 PreRegAlloc = MRI->isSSA();
457
458 bool BFChange = false;
459 if (!PreRegAlloc) {
460 // Tail merge tend to expose more if-conversion opportunities.
461 BranchFolder BF(true, false, MBFI, *MBPI, PSI);
462 BFChange = BF.OptimizeFunction(MF, TII, ST.getRegisterInfo());
463 }
464
465 LLVM_DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'"
466 << MF.getName() << "\'");
467
468 if (FnNum < IfCvtFnStart || (IfCvtFnStop != -1 && FnNum > IfCvtFnStop)) {
469 LLVM_DEBUG(dbgs() << " skipped\n");
470 return false;
471 }
472 LLVM_DEBUG(dbgs() << "\n");
473
474 MF.RenumberBlocks();
475 BBAnalysis.resize(MF.getNumBlockIDs());
476
477 std::vector<std::unique_ptr<IfcvtToken>> Tokens;
478 MadeChange = false;
479 unsigned NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle +
480 NumTriangleRev + NumTriangleFalse + NumTriangleFRev + NumDiamonds;
481 while (IfCvtLimit == -1 || (int)NumIfCvts < IfCvtLimit) {
482 // Do an initial analysis for each basic block and find all the potential
483 // candidates to perform if-conversion.
484 bool Change = false;
485 AnalyzeBlocks(MF, Tokens);
486 while (!Tokens.empty()) {
487 std::unique_ptr<IfcvtToken> Token = std::move(Tokens.back());
488 Tokens.pop_back();
489 BBInfo &BBI = Token->BBI;
490 IfcvtKind Kind = Token->Kind;
491 unsigned NumDups = Token->NumDups;
492 unsigned NumDups2 = Token->NumDups2;
493
494 // If the block has been evicted out of the queue or it has already been
495 // marked dead (due to it being predicated), then skip it.
496 if (BBI.IsDone)
497 BBI.IsEnqueued = false;
498 if (!BBI.IsEnqueued)
499 continue;
500
501 BBI.IsEnqueued = false;
502
503 bool RetVal = false;
504 switch (Kind) {
505 default: llvm_unreachable("Unexpected!");
506 case ICSimple:
507 case ICSimpleFalse: {
508 bool isFalse = Kind == ICSimpleFalse;
509 if ((isFalse && DisableSimpleF) || (!isFalse && DisableSimple)) break;
510 LLVM_DEBUG(dbgs() << "Ifcvt (Simple"
511 << (Kind == ICSimpleFalse ? " false" : "")
512 << "): " << printMBBReference(*BBI.BB) << " ("
513 << ((Kind == ICSimpleFalse) ? BBI.FalseBB->getNumber()
514 : BBI.TrueBB->getNumber())
515 << ") ");
516 RetVal = IfConvertSimple(BBI, Kind);
517 LLVM_DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
518 if (RetVal) {
519 if (isFalse) ++NumSimpleFalse;
520 else ++NumSimple;
521 }
522 break;
523 }
524 case ICTriangle:
525 case ICTriangleRev:
526 case ICTriangleFalse:
527 case ICTriangleFRev: {
528 bool isFalse = Kind == ICTriangleFalse;
529 bool isRev = (Kind == ICTriangleRev || Kind == ICTriangleFRev);
530 if (DisableTriangle && !isFalse && !isRev) break;
531 if (DisableTriangleR && !isFalse && isRev) break;
532 if (DisableTriangleF && isFalse && !isRev) break;
533 LLVM_DEBUG(dbgs() << "Ifcvt (Triangle");
534 if (isFalse)
535 LLVM_DEBUG(dbgs() << " false");
536 if (isRev)
537 LLVM_DEBUG(dbgs() << " rev");
538 LLVM_DEBUG(dbgs() << "): " << printMBBReference(*BBI.BB)
539 << " (T:" << BBI.TrueBB->getNumber()
540 << ",F:" << BBI.FalseBB->getNumber() << ") ");
541 RetVal = IfConvertTriangle(BBI, Kind);
542 LLVM_DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
543 if (RetVal) {
544 if (isFalse)
545 ++NumTriangleFalse;
546 else if (isRev)
547 ++NumTriangleRev;
548 else
549 ++NumTriangle;
550 }
551 break;
552 }
553 case ICDiamond:
554 if (DisableDiamond) break;
555 LLVM_DEBUG(dbgs() << "Ifcvt (Diamond): " << printMBBReference(*BBI.BB)
556 << " (T:" << BBI.TrueBB->getNumber()
557 << ",F:" << BBI.FalseBB->getNumber() << ") ");
558 RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2,
559 Token->TClobbersPred,
560 Token->FClobbersPred);
561 LLVM_DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
562 if (RetVal) ++NumDiamonds;
563 break;
564 case ICForkedDiamond:
565 if (DisableForkedDiamond) break;
566 LLVM_DEBUG(dbgs() << "Ifcvt (Forked Diamond): "
567 << printMBBReference(*BBI.BB)
568 << " (T:" << BBI.TrueBB->getNumber()
569 << ",F:" << BBI.FalseBB->getNumber() << ") ");
570 RetVal = IfConvertForkedDiamond(BBI, Kind, NumDups, NumDups2,
571 Token->TClobbersPred,
572 Token->FClobbersPred);
573 LLVM_DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
574 if (RetVal) ++NumForkedDiamonds;
575 break;
576 }
577
578 if (RetVal && MRI->tracksLiveness())
579 recomputeLivenessFlags(*BBI.BB);
580
581 Change |= RetVal;
582
583 NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle + NumTriangleRev +
584 NumTriangleFalse + NumTriangleFRev + NumDiamonds;
585 if (IfCvtLimit != -1 && (int)NumIfCvts >= IfCvtLimit)
586 break;
587 }
588
589 if (!Change)
590 break;
591 MadeChange |= Change;
592 }
593
594 Tokens.clear();
595 BBAnalysis.clear();
596
597 if (MadeChange && IfCvtBranchFold) {
598 BranchFolder BF(false, false, MBFI, *MBPI, PSI);
599 BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo());
600 }
601
602 MadeChange |= BFChange;
603 return MadeChange;
604}
605
606/// BB has a fallthrough. Find its 'false' successor given its 'true' successor.
608 MachineBasicBlock *TrueBB) {
609 for (MachineBasicBlock *SuccBB : BB->successors()) {
610 if (SuccBB != TrueBB)
611 return SuccBB;
612 }
613 return nullptr;
614}
615
616/// Reverse the condition of the end of the block branch. Swap block's 'true'
617/// and 'false' successors.
618bool IfConverter::reverseBranchCondition(BBInfo &BBI) const {
619 DebugLoc dl; // FIXME: this is nowhere
620 if (!TII->reverseBranchCondition(BBI.BrCond)) {
621 TII->removeBranch(*BBI.BB);
622 TII->insertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond, dl);
623 std::swap(BBI.TrueBB, BBI.FalseBB);
624 return true;
625 }
626 return false;
627}
628
629/// Returns the next block in the function blocks ordering. If it is the end,
630/// returns NULL.
634 if (++I == E)
635 return nullptr;
636 return &*I;
637}
638
639/// Returns true if the 'true' block (along with its predecessor) forms a valid
640/// simple shape for ifcvt. It also returns the number of instructions that the
641/// ifcvt would need to duplicate if performed in Dups.
642bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
643 BranchProbability Prediction) const {
644 Dups = 0;
645 if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
646 return false;
647
648 if (TrueBBI.IsBrAnalyzable)
649 return false;
650
651 if (TrueBBI.BB->pred_size() > 1) {
652 if (TrueBBI.CannotBeCopied ||
653 !TII->isProfitableToDupForIfCvt(*TrueBBI.BB, TrueBBI.NonPredSize,
654 Prediction))
655 return false;
656 Dups = TrueBBI.NonPredSize;
657 }
658
659 return true;
660}
661
662/// Returns true if the 'true' and 'false' blocks (along with their common
663/// predecessor) forms a valid triangle shape for ifcvt. If 'FalseBranch' is
664/// true, it checks if 'true' block's false branch branches to the 'false' block
665/// rather than the other way around. It also returns the number of instructions
666/// that the ifcvt would need to duplicate if performed in 'Dups'.
667bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
668 bool FalseBranch, unsigned &Dups,
669 BranchProbability Prediction) const {
670 Dups = 0;
671 if (TrueBBI.BB == FalseBBI.BB)
672 return false;
673
674 if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
675 return false;
676
677 if (TrueBBI.BB->pred_size() > 1) {
678 if (TrueBBI.CannotBeCopied)
679 return false;
680
681 unsigned Size = TrueBBI.NonPredSize;
682 if (TrueBBI.IsBrAnalyzable) {
683 if (TrueBBI.TrueBB && TrueBBI.BrCond.empty())
684 // Ends with an unconditional branch. It will be removed.
685 --Size;
686 else {
687 MachineBasicBlock *FExit = FalseBranch
688 ? TrueBBI.TrueBB : TrueBBI.FalseBB;
689 if (FExit)
690 // Require a conditional branch
691 ++Size;
692 }
693 }
694 if (!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, Size, Prediction))
695 return false;
696 Dups = Size;
697 }
698
699 MachineBasicBlock *TExit = FalseBranch ? TrueBBI.FalseBB : TrueBBI.TrueBB;
700 if (!TExit && blockAlwaysFallThrough(TrueBBI)) {
701 MachineFunction::iterator I = TrueBBI.BB->getIterator();
702 if (++I == TrueBBI.BB->getParent()->end())
703 return false;
704 TExit = &*I;
705 }
706 return TExit && TExit == FalseBBI.BB;
707}
708
709/// Count duplicated instructions and move the iterators to show where they
710/// are.
711/// @param TIB True Iterator Begin
712/// @param FIB False Iterator Begin
713/// These two iterators initially point to the first instruction of the two
714/// blocks, and finally point to the first non-shared instruction.
715/// @param TIE True Iterator End
716/// @param FIE False Iterator End
717/// These two iterators initially point to End() for the two blocks() and
718/// finally point to the first shared instruction in the tail.
719/// Upon return [TIB, TIE), and [FIB, FIE) mark the un-duplicated portions of
720/// two blocks.
721/// @param Dups1 count of duplicated instructions at the beginning of the 2
722/// blocks.
723/// @param Dups2 count of duplicated instructions at the end of the 2 blocks.
724/// @param SkipUnconditionalBranches if true, Don't make sure that
725/// unconditional branches at the end of the blocks are the same. True is
726/// passed when the blocks are analyzable to allow for fallthrough to be
727/// handled.
728/// @return false if the shared portion prevents if conversion.
729bool IfConverter::CountDuplicatedInstructions(
734 unsigned &Dups1, unsigned &Dups2,
736 bool SkipUnconditionalBranches) const {
737 while (TIB != TIE && FIB != FIE) {
738 // Skip dbg_value instructions. These do not count.
739 TIB = skipDebugInstructionsForward(TIB, TIE, false);
740 FIB = skipDebugInstructionsForward(FIB, FIE, false);
741 if (TIB == TIE || FIB == FIE)
742 break;
743 if (!TIB->isIdenticalTo(*FIB))
744 break;
745 // A pred-clobbering instruction in the shared portion prevents
746 // if-conversion.
747 std::vector<MachineOperand> PredDefs;
748 if (TII->ClobbersPredicate(*TIB, PredDefs, false))
749 return false;
750 // If we get all the way to the branch instructions, don't count them.
751 if (!TIB->isBranch())
752 ++Dups1;
753 ++TIB;
754 ++FIB;
755 }
756
757 // Check for already containing all of the block.
758 if (TIB == TIE || FIB == FIE)
759 return true;
760 // Now, in preparation for counting duplicate instructions at the ends of the
761 // blocks, switch to reverse_iterators. Note that getReverse() returns an
762 // iterator that points to the same instruction, unlike std::reverse_iterator.
763 // We have to do our own shifting so that we get the same range.
764 MachineBasicBlock::reverse_iterator RTIE = std::next(TIE.getReverse());
765 MachineBasicBlock::reverse_iterator RFIE = std::next(FIE.getReverse());
766 const MachineBasicBlock::reverse_iterator RTIB = std::next(TIB.getReverse());
767 const MachineBasicBlock::reverse_iterator RFIB = std::next(FIB.getReverse());
768
769 if (!TBB.succ_empty() || !FBB.succ_empty()) {
770 if (SkipUnconditionalBranches) {
771 while (RTIE != RTIB && RTIE->isUnconditionalBranch())
772 ++RTIE;
773 while (RFIE != RFIB && RFIE->isUnconditionalBranch())
774 ++RFIE;
775 }
776 }
777
778 // Count duplicate instructions at the ends of the blocks.
779 while (RTIE != RTIB && RFIE != RFIB) {
780 // Skip dbg_value instructions. These do not count.
781 // Note that these are reverse iterators going forward.
782 RTIE = skipDebugInstructionsForward(RTIE, RTIB, false);
783 RFIE = skipDebugInstructionsForward(RFIE, RFIB, false);
784 if (RTIE == RTIB || RFIE == RFIB)
785 break;
786 if (!RTIE->isIdenticalTo(*RFIE))
787 break;
788 // We have to verify that any branch instructions are the same, and then we
789 // don't count them toward the # of duplicate instructions.
790 if (!RTIE->isBranch())
791 ++Dups2;
792 ++RTIE;
793 ++RFIE;
794 }
795 TIE = std::next(RTIE.getReverse());
796 FIE = std::next(RFIE.getReverse());
797 return true;
798}
799
800/// RescanInstructions - Run ScanInstructions on a pair of blocks.
801/// @param TIB - True Iterator Begin, points to first non-shared instruction
802/// @param FIB - False Iterator Begin, points to first non-shared instruction
803/// @param TIE - True Iterator End, points past last non-shared instruction
804/// @param FIE - False Iterator End, points past last non-shared instruction
805/// @param TrueBBI - BBInfo to update for the true block.
806/// @param FalseBBI - BBInfo to update for the false block.
807/// @returns - false if either block cannot be predicated or if both blocks end
808/// with a predicate-clobbering instruction.
809bool IfConverter::RescanInstructions(
812 BBInfo &TrueBBI, BBInfo &FalseBBI) const {
813 bool BranchUnpredicable = true;
814 TrueBBI.IsUnpredicable = FalseBBI.IsUnpredicable = false;
815 ScanInstructions(TrueBBI, TIB, TIE, BranchUnpredicable);
816 if (TrueBBI.IsUnpredicable)
817 return false;
818 ScanInstructions(FalseBBI, FIB, FIE, BranchUnpredicable);
819 if (FalseBBI.IsUnpredicable)
820 return false;
821 if (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred)
822 return false;
823 return true;
824}
825
826#ifndef NDEBUG
828 MachineBasicBlock *MBB1,
829 MachineBasicBlock *MBB2) {
830 const MachineBasicBlock::reverse_iterator B1 = MBB1->rend();
831 const MachineBasicBlock::reverse_iterator B2 = MBB2->rend();
834 while (E1 != B1 && E2 != B2) {
835 skipDebugInstructionsForward(E1, B1, false);
836 skipDebugInstructionsForward(E2, B2, false);
837 if (E1 == B1 && E2 == B2)
838 break;
839
840 if (E1 == B1) {
841 assert(!E2->isBranch() && "Branch mis-match, one block is empty.");
842 break;
843 }
844 if (E2 == B2) {
845 assert(!E1->isBranch() && "Branch mis-match, one block is empty.");
846 break;
847 }
848
849 if (E1->isBranch() || E2->isBranch())
850 assert(E1->isIdenticalTo(*E2) &&
851 "Branch mis-match, branch instructions don't match.");
852 else
853 break;
854 ++E1;
855 ++E2;
856 }
857}
858#endif
859
860/// ValidForkedDiamond - Returns true if the 'true' and 'false' blocks (along
861/// with their common predecessor) form a diamond if a common tail block is
862/// extracted.
863/// While not strictly a diamond, this pattern would form a diamond if
864/// tail-merging had merged the shared tails.
865/// EBB
866/// _/ \_
867/// | |
868/// TBB FBB
869/// / \ / \
870/// FalseBB TrueBB FalseBB
871/// Currently only handles analyzable branches.
872/// Specifically excludes actual diamonds to avoid overlap.
873bool IfConverter::ValidForkedDiamond(
874 BBInfo &TrueBBI, BBInfo &FalseBBI,
875 unsigned &Dups1, unsigned &Dups2,
876 BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const {
877 Dups1 = Dups2 = 0;
878 if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone ||
879 FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone)
880 return false;
881
882 if (!TrueBBI.IsBrAnalyzable || !FalseBBI.IsBrAnalyzable)
883 return false;
884 // Don't IfConvert blocks that can't be folded into their predecessor.
885 if (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1)
886 return false;
887
888 // This function is specifically looking for conditional tails, as
889 // unconditional tails are already handled by the standard diamond case.
890 if (TrueBBI.BrCond.size() == 0 ||
891 FalseBBI.BrCond.size() == 0)
892 return false;
893
894 MachineBasicBlock *TT = TrueBBI.TrueBB;
895 MachineBasicBlock *TF = TrueBBI.FalseBB;
896 MachineBasicBlock *FT = FalseBBI.TrueBB;
897 MachineBasicBlock *FF = FalseBBI.FalseBB;
898
899 if (!TT)
900 TT = getNextBlock(*TrueBBI.BB);
901 if (!TF)
902 TF = getNextBlock(*TrueBBI.BB);
903 if (!FT)
904 FT = getNextBlock(*FalseBBI.BB);
905 if (!FF)
906 FF = getNextBlock(*FalseBBI.BB);
907
908 if (!TT || !TF)
909 return false;
910
911 // Check successors. If they don't match, bail.
912 if (!((TT == FT && TF == FF) || (TF == FT && TT == FF)))
913 return false;
914
915 bool FalseReversed = false;
916 if (TF == FT && TT == FF) {
917 // If the branches are opposing, but we can't reverse, don't do it.
918 if (!FalseBBI.IsBrReversible)
919 return false;
920 FalseReversed = true;
921 reverseBranchCondition(FalseBBI);
922 }
923 auto UnReverseOnExit = make_scope_exit([&]() {
924 if (FalseReversed)
925 reverseBranchCondition(FalseBBI);
926 });
927
928 // Count duplicate instructions at the beginning of the true and false blocks.
929 MachineBasicBlock::iterator TIB = TrueBBI.BB->begin();
930 MachineBasicBlock::iterator FIB = FalseBBI.BB->begin();
931 MachineBasicBlock::iterator TIE = TrueBBI.BB->end();
932 MachineBasicBlock::iterator FIE = FalseBBI.BB->end();
933 if(!CountDuplicatedInstructions(TIB, FIB, TIE, FIE, Dups1, Dups2,
934 *TrueBBI.BB, *FalseBBI.BB,
935 /* SkipUnconditionalBranches */ true))
936 return false;
937
938 TrueBBICalc.BB = TrueBBI.BB;
939 FalseBBICalc.BB = FalseBBI.BB;
940 TrueBBICalc.IsBrAnalyzable = TrueBBI.IsBrAnalyzable;
941 FalseBBICalc.IsBrAnalyzable = FalseBBI.IsBrAnalyzable;
942 if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc))
943 return false;
944
945 // The size is used to decide whether to if-convert, and the shared portions
946 // are subtracted off. Because of the subtraction, we just use the size that
947 // was calculated by the original ScanInstructions, as it is correct.
948 TrueBBICalc.NonPredSize = TrueBBI.NonPredSize;
949 FalseBBICalc.NonPredSize = FalseBBI.NonPredSize;
950 return true;
951}
952
953/// ValidDiamond - Returns true if the 'true' and 'false' blocks (along
954/// with their common predecessor) forms a valid diamond shape for ifcvt.
955bool IfConverter::ValidDiamond(
956 BBInfo &TrueBBI, BBInfo &FalseBBI,
957 unsigned &Dups1, unsigned &Dups2,
958 BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const {
959 Dups1 = Dups2 = 0;
960 if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone ||
961 FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone)
962 return false;
963
964 // If the True and False BBs are equal we're dealing with a degenerate case
965 // that we don't treat as a diamond.
966 if (TrueBBI.BB == FalseBBI.BB)
967 return false;
968
969 MachineBasicBlock *TT = TrueBBI.TrueBB;
970 MachineBasicBlock *FT = FalseBBI.TrueBB;
971
972 if (!TT && blockAlwaysFallThrough(TrueBBI))
973 TT = getNextBlock(*TrueBBI.BB);
974 if (!FT && blockAlwaysFallThrough(FalseBBI))
975 FT = getNextBlock(*FalseBBI.BB);
976 if (TT != FT)
977 return false;
978 if (!TT && (TrueBBI.IsBrAnalyzable || FalseBBI.IsBrAnalyzable))
979 return false;
980 if (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1)
981 return false;
982
983 // FIXME: Allow true block to have an early exit?
984 if (TrueBBI.FalseBB || FalseBBI.FalseBB)
985 return false;
986
987 // Count duplicate instructions at the beginning and end of the true and
988 // false blocks.
989 // Skip unconditional branches only if we are considering an analyzable
990 // diamond. Otherwise the branches must be the same.
991 bool SkipUnconditionalBranches =
992 TrueBBI.IsBrAnalyzable && FalseBBI.IsBrAnalyzable;
993 MachineBasicBlock::iterator TIB = TrueBBI.BB->begin();
994 MachineBasicBlock::iterator FIB = FalseBBI.BB->begin();
995 MachineBasicBlock::iterator TIE = TrueBBI.BB->end();
996 MachineBasicBlock::iterator FIE = FalseBBI.BB->end();
997 if(!CountDuplicatedInstructions(TIB, FIB, TIE, FIE, Dups1, Dups2,
998 *TrueBBI.BB, *FalseBBI.BB,
999 SkipUnconditionalBranches))
1000 return false;
1001
1002 TrueBBICalc.BB = TrueBBI.BB;
1003 FalseBBICalc.BB = FalseBBI.BB;
1004 TrueBBICalc.IsBrAnalyzable = TrueBBI.IsBrAnalyzable;
1005 FalseBBICalc.IsBrAnalyzable = FalseBBI.IsBrAnalyzable;
1006 if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc))
1007 return false;
1008 // The size is used to decide whether to if-convert, and the shared portions
1009 // are subtracted off. Because of the subtraction, we just use the size that
1010 // was calculated by the original ScanInstructions, as it is correct.
1011 TrueBBICalc.NonPredSize = TrueBBI.NonPredSize;
1012 FalseBBICalc.NonPredSize = FalseBBI.NonPredSize;
1013 return true;
1014}
1015
1016/// AnalyzeBranches - Look at the branches at the end of a block to determine if
1017/// the block is predicable.
1018void IfConverter::AnalyzeBranches(BBInfo &BBI) {
1019 if (BBI.IsDone)
1020 return;
1021
1022 BBI.TrueBB = BBI.FalseBB = nullptr;
1023 BBI.BrCond.clear();
1024 BBI.IsBrAnalyzable =
1025 !TII->analyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond);
1026 if (!BBI.IsBrAnalyzable) {
1027 BBI.TrueBB = nullptr;
1028 BBI.FalseBB = nullptr;
1029 BBI.BrCond.clear();
1030 }
1031
1032 SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
1033 BBI.IsBrReversible = (RevCond.size() == 0) ||
1034 !TII->reverseBranchCondition(RevCond);
1035 BBI.HasFallThrough = BBI.IsBrAnalyzable && BBI.FalseBB == nullptr;
1036
1037 if (BBI.BrCond.size()) {
1038 // No false branch. This BB must end with a conditional branch and a
1039 // fallthrough.
1040 if (!BBI.FalseBB)
1041 BBI.FalseBB = findFalseBlock(BBI.BB, BBI.TrueBB);
1042 if (!BBI.FalseBB) {
1043 // Malformed bcc? True and false blocks are the same?
1044 BBI.IsUnpredicable = true;
1045 }
1046 }
1047}
1048
1049/// ScanInstructions - Scan all the instructions in the block to determine if
1050/// the block is predicable. In most cases, that means all the instructions
1051/// in the block are isPredicable(). Also checks if the block contains any
1052/// instruction which can clobber a predicate (e.g. condition code register).
1053/// If so, the block is not predicable unless it's the last instruction.
1054void IfConverter::ScanInstructions(BBInfo &BBI,
1057 bool BranchUnpredicable) const {
1058 if (BBI.IsDone || BBI.IsUnpredicable)
1059 return;
1060
1061 bool AlreadyPredicated = !BBI.Predicate.empty();
1062
1063 BBI.NonPredSize = 0;
1064 BBI.ExtraCost = 0;
1065 BBI.ExtraCost2 = 0;
1066 BBI.ClobbersPred = false;
1067 for (MachineInstr &MI : make_range(Begin, End)) {
1068 if (MI.isDebugInstr())
1069 continue;
1070
1071 // It's unsafe to duplicate convergent instructions in this context, so set
1072 // BBI.CannotBeCopied to true if MI is convergent. To see why, consider the
1073 // following CFG, which is subject to our "simple" transformation.
1074 //
1075 // BB0 // if (c1) goto BB1; else goto BB2;
1076 // / \
1077 // BB1 |
1078 // | BB2 // if (c2) goto TBB; else goto FBB;
1079 // | / |
1080 // | / |
1081 // TBB |
1082 // | |
1083 // | FBB
1084 // |
1085 // exit
1086 //
1087 // Suppose we want to move TBB's contents up into BB1 and BB2 (in BB1 they'd
1088 // be unconditional, and in BB2, they'd be predicated upon c2), and suppose
1089 // TBB contains a convergent instruction. This is safe iff doing so does
1090 // not add a control-flow dependency to the convergent instruction -- i.e.,
1091 // it's safe iff the set of control flows that leads us to the convergent
1092 // instruction does not get smaller after the transformation.
1093 //
1094 // Originally we executed TBB if c1 || c2. After the transformation, there
1095 // are two copies of TBB's instructions. We get to the first if c1, and we
1096 // get to the second if !c1 && c2.
1097 //
1098 // There are clearly fewer ways to satisfy the condition "c1" than
1099 // "c1 || c2". Since we've shrunk the set of control flows which lead to
1100 // our convergent instruction, the transformation is unsafe.
1101 if (MI.isNotDuplicable() || MI.isConvergent())
1102 BBI.CannotBeCopied = true;
1103
1105 bool isCondBr = BBI.IsBrAnalyzable && MI.isConditionalBranch();
1106
1107 if (BranchUnpredicable && MI.isBranch()) {
1108 BBI.IsUnpredicable = true;
1109 return;
1110 }
1111
1112 // A conditional branch is not predicable, but it may be eliminated.
1113 if (isCondBr)
1114 continue;
1115
1116 if (!isPredicated) {
1117 BBI.NonPredSize++;
1118 unsigned ExtraPredCost = TII->getPredicationCost(MI);
1119 unsigned NumCycles = SchedModel.computeInstrLatency(&MI, false);
1120 if (NumCycles > 1)
1121 BBI.ExtraCost += NumCycles-1;
1122 BBI.ExtraCost2 += ExtraPredCost;
1123 } else if (!AlreadyPredicated) {
1124 // FIXME: This instruction is already predicated before the
1125 // if-conversion pass. It's probably something like a conditional move.
1126 // Mark this block unpredicable for now.
1127 BBI.IsUnpredicable = true;
1128 return;
1129 }
1130
1131 if (BBI.ClobbersPred && !isPredicated) {
1132 // Predicate modification instruction should end the block (except for
1133 // already predicated instructions and end of block branches).
1134 // Predicate may have been modified, the subsequent (currently)
1135 // unpredicated instructions cannot be correctly predicated.
1136 BBI.IsUnpredicable = true;
1137 return;
1138 }
1139
1140 // FIXME: Make use of PredDefs? e.g. ADDC, SUBC sets predicates but are
1141 // still potentially predicable.
1142 std::vector<MachineOperand> PredDefs;
1143 if (TII->ClobbersPredicate(MI, PredDefs, true))
1144 BBI.ClobbersPred = true;
1145
1146 if (!TII->isPredicable(MI)) {
1147 BBI.IsUnpredicable = true;
1148 return;
1149 }
1150 }
1151}
1152
1153/// Determine if the block is a suitable candidate to be predicated by the
1154/// specified predicate.
1155/// @param BBI BBInfo for the block to check
1156/// @param Pred Predicate array for the branch that leads to BBI
1157/// @param isTriangle true if the Analysis is for a triangle
1158/// @param RevBranch true if Reverse(Pred) leads to BBI (e.g. BBI is the false
1159/// case
1160/// @param hasCommonTail true if BBI shares a tail with a sibling block that
1161/// contains any instruction that would make the block unpredicable.
1162bool IfConverter::FeasibilityAnalysis(BBInfo &BBI,
1164 bool isTriangle, bool RevBranch,
1165 bool hasCommonTail) {
1166 // If the block is dead or unpredicable, then it cannot be predicated.
1167 // Two blocks may share a common unpredicable tail, but this doesn't prevent
1168 // them from being if-converted. The non-shared portion is assumed to have
1169 // been checked
1170 if (BBI.IsDone || (BBI.IsUnpredicable && !hasCommonTail))
1171 return false;
1172
1173 // If it is already predicated but we couldn't analyze its terminator, the
1174 // latter might fallthrough, but we can't determine where to.
1175 // Conservatively avoid if-converting again.
1176 if (BBI.Predicate.size() && !BBI.IsBrAnalyzable)
1177 return false;
1178
1179 // If it is already predicated, check if the new predicate subsumes
1180 // its predicate.
1181 if (BBI.Predicate.size() && !TII->SubsumesPredicate(Pred, BBI.Predicate))
1182 return false;
1183
1184 if (!hasCommonTail && BBI.BrCond.size()) {
1185 if (!isTriangle)
1186 return false;
1187
1188 // Test predicate subsumption.
1189 SmallVector<MachineOperand, 4> RevPred(Pred.begin(), Pred.end());
1190 SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
1191 if (RevBranch) {
1193 return false;
1194 }
1195 if (TII->reverseBranchCondition(RevPred) ||
1196 !TII->SubsumesPredicate(Cond, RevPred))
1197 return false;
1198 }
1199
1200 return true;
1201}
1202
1203/// Analyze the structure of the sub-CFG starting from the specified block.
1204/// Record its successors and whether it looks like an if-conversion candidate.
1205void IfConverter::AnalyzeBlock(
1206 MachineBasicBlock &MBB, std::vector<std::unique_ptr<IfcvtToken>> &Tokens) {
1207 struct BBState {
1208 BBState(MachineBasicBlock &MBB) : MBB(&MBB) {}
1210
1211 /// This flag is true if MBB's successors have been analyzed.
1212 bool SuccsAnalyzed = false;
1213 };
1214
1215 // Push MBB to the stack.
1216 SmallVector<BBState, 16> BBStack(1, MBB);
1217
1218 while (!BBStack.empty()) {
1219 BBState &State = BBStack.back();
1220 MachineBasicBlock *BB = State.MBB;
1221 BBInfo &BBI = BBAnalysis[BB->getNumber()];
1222
1223 if (!State.SuccsAnalyzed) {
1224 if (BBI.IsAnalyzed || BBI.IsBeingAnalyzed) {
1225 BBStack.pop_back();
1226 continue;
1227 }
1228
1229 BBI.BB = BB;
1230 BBI.IsBeingAnalyzed = true;
1231
1232 AnalyzeBranches(BBI);
1233 MachineBasicBlock::iterator Begin = BBI.BB->begin();
1234 MachineBasicBlock::iterator End = BBI.BB->end();
1235 ScanInstructions(BBI, Begin, End);
1236
1237 // Unanalyzable or ends with fallthrough or unconditional branch, or if is
1238 // not considered for ifcvt anymore.
1239 if (!BBI.IsBrAnalyzable || BBI.BrCond.empty() || BBI.IsDone) {
1240 BBI.IsBeingAnalyzed = false;
1241 BBI.IsAnalyzed = true;
1242 BBStack.pop_back();
1243 continue;
1244 }
1245
1246 // Do not ifcvt if either path is a back edge to the entry block.
1247 if (BBI.TrueBB == BB || BBI.FalseBB == BB) {
1248 BBI.IsBeingAnalyzed = false;
1249 BBI.IsAnalyzed = true;
1250 BBStack.pop_back();
1251 continue;
1252 }
1253
1254 // Do not ifcvt if true and false fallthrough blocks are the same.
1255 if (!BBI.FalseBB) {
1256 BBI.IsBeingAnalyzed = false;
1257 BBI.IsAnalyzed = true;
1258 BBStack.pop_back();
1259 continue;
1260 }
1261
1262 // Push the False and True blocks to the stack.
1263 State.SuccsAnalyzed = true;
1264 BBStack.push_back(*BBI.FalseBB);
1265 BBStack.push_back(*BBI.TrueBB);
1266 continue;
1267 }
1268
1269 BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
1270 BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
1271
1272 if (TrueBBI.IsDone && FalseBBI.IsDone) {
1273 BBI.IsBeingAnalyzed = false;
1274 BBI.IsAnalyzed = true;
1275 BBStack.pop_back();
1276 continue;
1277 }
1278
1280 RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
1281 bool CanRevCond = !TII->reverseBranchCondition(RevCond);
1282
1283 unsigned Dups = 0;
1284 unsigned Dups2 = 0;
1285 bool TNeedSub = !TrueBBI.Predicate.empty();
1286 bool FNeedSub = !FalseBBI.Predicate.empty();
1287 bool Enqueued = false;
1288
1289 BranchProbability Prediction = MBPI->getEdgeProbability(BB, TrueBBI.BB);
1290
1291 if (CanRevCond) {
1292 BBInfo TrueBBICalc, FalseBBICalc;
1293 auto feasibleDiamond = [&](bool Forked) {
1294 bool MeetsSize = MeetIfcvtSizeLimit(TrueBBICalc, FalseBBICalc, *BB,
1295 Dups + Dups2, Prediction, Forked);
1296 bool TrueFeasible = FeasibilityAnalysis(TrueBBI, BBI.BrCond,
1297 /* IsTriangle */ false, /* RevCond */ false,
1298 /* hasCommonTail */ true);
1299 bool FalseFeasible = FeasibilityAnalysis(FalseBBI, RevCond,
1300 /* IsTriangle */ false, /* RevCond */ false,
1301 /* hasCommonTail */ true);
1302 return MeetsSize && TrueFeasible && FalseFeasible;
1303 };
1304
1305 if (ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2,
1306 TrueBBICalc, FalseBBICalc)) {
1307 if (feasibleDiamond(false)) {
1308 // Diamond:
1309 // EBB
1310 // / \_
1311 // | |
1312 // TBB FBB
1313 // \ /
1314 // TailBB
1315 // Note TailBB can be empty.
1316 Tokens.push_back(std::make_unique<IfcvtToken>(
1317 BBI, ICDiamond, TNeedSub | FNeedSub, Dups, Dups2,
1318 (bool) TrueBBICalc.ClobbersPred, (bool) FalseBBICalc.ClobbersPred));
1319 Enqueued = true;
1320 }
1321 } else if (ValidForkedDiamond(TrueBBI, FalseBBI, Dups, Dups2,
1322 TrueBBICalc, FalseBBICalc)) {
1323 if (feasibleDiamond(true)) {
1324 // ForkedDiamond:
1325 // if TBB and FBB have a common tail that includes their conditional
1326 // branch instructions, then we can If Convert this pattern.
1327 // EBB
1328 // _/ \_
1329 // | |
1330 // TBB FBB
1331 // / \ / \
1332 // FalseBB TrueBB FalseBB
1333 //
1334 Tokens.push_back(std::make_unique<IfcvtToken>(
1335 BBI, ICForkedDiamond, TNeedSub | FNeedSub, Dups, Dups2,
1336 (bool) TrueBBICalc.ClobbersPred, (bool) FalseBBICalc.ClobbersPred));
1337 Enqueued = true;
1338 }
1339 }
1340 }
1341
1342 if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction) &&
1343 MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
1344 TrueBBI.ExtraCost2, Prediction) &&
1345 FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) {
1346 // Triangle:
1347 // EBB
1348 // | \_
1349 // | |
1350 // | TBB
1351 // | /
1352 // FBB
1353 Tokens.push_back(
1354 std::make_unique<IfcvtToken>(BBI, ICTriangle, TNeedSub, Dups));
1355 Enqueued = true;
1356 }
1357
1358 if (ValidTriangle(TrueBBI, FalseBBI, true, Dups, Prediction) &&
1359 MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
1360 TrueBBI.ExtraCost2, Prediction) &&
1361 FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) {
1362 Tokens.push_back(
1363 std::make_unique<IfcvtToken>(BBI, ICTriangleRev, TNeedSub, Dups));
1364 Enqueued = true;
1365 }
1366
1367 if (ValidSimple(TrueBBI, Dups, Prediction) &&
1368 MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
1369 TrueBBI.ExtraCost2, Prediction) &&
1370 FeasibilityAnalysis(TrueBBI, BBI.BrCond)) {
1371 // Simple (split, no rejoin):
1372 // EBB
1373 // | \_
1374 // | |
1375 // | TBB---> exit
1376 // |
1377 // FBB
1378 Tokens.push_back(
1379 std::make_unique<IfcvtToken>(BBI, ICSimple, TNeedSub, Dups));
1380 Enqueued = true;
1381 }
1382
1383 if (CanRevCond) {
1384 // Try the other path...
1385 if (ValidTriangle(FalseBBI, TrueBBI, false, Dups,
1386 Prediction.getCompl()) &&
1387 MeetIfcvtSizeLimit(*FalseBBI.BB,
1388 FalseBBI.NonPredSize + FalseBBI.ExtraCost,
1389 FalseBBI.ExtraCost2, Prediction.getCompl()) &&
1390 FeasibilityAnalysis(FalseBBI, RevCond, true)) {
1391 Tokens.push_back(std::make_unique<IfcvtToken>(BBI, ICTriangleFalse,
1392 FNeedSub, Dups));
1393 Enqueued = true;
1394 }
1395
1396 if (ValidTriangle(FalseBBI, TrueBBI, true, Dups,
1397 Prediction.getCompl()) &&
1398 MeetIfcvtSizeLimit(*FalseBBI.BB,
1399 FalseBBI.NonPredSize + FalseBBI.ExtraCost,
1400 FalseBBI.ExtraCost2, Prediction.getCompl()) &&
1401 FeasibilityAnalysis(FalseBBI, RevCond, true, true)) {
1402 Tokens.push_back(
1403 std::make_unique<IfcvtToken>(BBI, ICTriangleFRev, FNeedSub, Dups));
1404 Enqueued = true;
1405 }
1406
1407 if (ValidSimple(FalseBBI, Dups, Prediction.getCompl()) &&
1408 MeetIfcvtSizeLimit(*FalseBBI.BB,
1409 FalseBBI.NonPredSize + FalseBBI.ExtraCost,
1410 FalseBBI.ExtraCost2, Prediction.getCompl()) &&
1411 FeasibilityAnalysis(FalseBBI, RevCond)) {
1412 Tokens.push_back(
1413 std::make_unique<IfcvtToken>(BBI, ICSimpleFalse, FNeedSub, Dups));
1414 Enqueued = true;
1415 }
1416 }
1417
1418 BBI.IsEnqueued = Enqueued;
1419 BBI.IsBeingAnalyzed = false;
1420 BBI.IsAnalyzed = true;
1421 BBStack.pop_back();
1422 }
1423}
1424
1425/// Analyze all blocks and find entries for all if-conversion candidates.
1426void IfConverter::AnalyzeBlocks(
1427 MachineFunction &MF, std::vector<std::unique_ptr<IfcvtToken>> &Tokens) {
1428 for (MachineBasicBlock &MBB : MF)
1429 AnalyzeBlock(MBB, Tokens);
1430
1431 // Sort to favor more complex ifcvt scheme.
1432 llvm::stable_sort(Tokens, IfcvtTokenCmp);
1433}
1434
1435/// Returns true either if ToMBB is the next block after MBB or that all the
1436/// intervening blocks are empty (given MBB can fall through to its next block).
1439 MachineFunction::iterator I = std::next(PI);
1442 while (I != TI) {
1443 // Check isSuccessor to avoid case where the next block is empty, but
1444 // it's not a successor.
1445 if (I == E || !I->empty() || !PI->isSuccessor(&*I))
1446 return false;
1447 PI = I++;
1448 }
1449 // Finally see if the last I is indeed a successor to PI.
1450 return PI->isSuccessor(&*I);
1451}
1452
1453/// Invalidate predecessor BB info so it would be re-analyzed to determine if it
1454/// can be if-converted. If predecessor is already enqueued, dequeue it!
1455void IfConverter::InvalidatePreds(MachineBasicBlock &MBB) {
1456 for (const MachineBasicBlock *Predecessor : MBB.predecessors()) {
1457 BBInfo &PBBI = BBAnalysis[Predecessor->getNumber()];
1458 if (PBBI.IsDone || PBBI.BB == &MBB)
1459 continue;
1460 PBBI.IsAnalyzed = false;
1461 PBBI.IsEnqueued = false;
1462 }
1463}
1464
1465/// Inserts an unconditional branch from \p MBB to \p ToMBB.
1467 const TargetInstrInfo *TII) {
1468 DebugLoc dl; // FIXME: this is nowhere
1470 TII->insertBranch(MBB, &ToMBB, nullptr, NoCond, dl);
1471}
1472
1473/// Behaves like LiveRegUnits::StepForward() but also adds implicit uses to all
1474/// values defined in MI which are also live/used by MI.
1476 const TargetRegisterInfo *TRI = MI.getMF()->getSubtarget().getRegisterInfo();
1477
1478 // Before stepping forward past MI, remember which regs were live
1479 // before MI. This is needed to set the Undef flag only when reg is
1480 // dead.
1482 LiveBeforeMI.setUniverse(TRI->getNumRegs());
1483 for (unsigned Reg : Redefs)
1484 LiveBeforeMI.insert(Reg);
1485
1487 Redefs.stepForward(MI, Clobbers);
1488
1489 // Now add the implicit uses for each of the clobbered values.
1490 for (auto Clobber : Clobbers) {
1491 // FIXME: Const cast here is nasty, but better than making StepForward
1492 // take a mutable instruction instead of const.
1493 unsigned Reg = Clobber.first;
1494 MachineOperand &Op = const_cast<MachineOperand&>(*Clobber.second);
1495 MachineInstr *OpMI = Op.getParent();
1496 MachineInstrBuilder MIB(*OpMI->getMF(), OpMI);
1497 if (Op.isRegMask()) {
1498 // First handle regmasks. They clobber any entries in the mask which
1499 // means that we need a def for those registers.
1500 if (LiveBeforeMI.count(Reg))
1501 MIB.addReg(Reg, RegState::Implicit);
1502
1503 // We also need to add an implicit def of this register for the later
1504 // use to read from.
1505 // For the register allocator to have allocated a register clobbered
1506 // by the call which is used later, it must be the case that
1507 // the call doesn't return.
1509 continue;
1510 }
1511 if (any_of(TRI->subregs_inclusive(Reg),
1512 [&](MCPhysReg S) { return LiveBeforeMI.count(S); }))
1513 MIB.addReg(Reg, RegState::Implicit);
1514 }
1515}
1516
1517/// If convert a simple (split, no rejoin) sub-CFG.
1518bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
1519 BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
1520 BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
1521 BBInfo *CvtBBI = &TrueBBI;
1522 BBInfo *NextBBI = &FalseBBI;
1523
1524 SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
1525 if (Kind == ICSimpleFalse)
1526 std::swap(CvtBBI, NextBBI);
1527
1528 MachineBasicBlock &CvtMBB = *CvtBBI->BB;
1529 MachineBasicBlock &NextMBB = *NextBBI->BB;
1530 if (CvtBBI->IsDone ||
1531 (CvtBBI->CannotBeCopied && CvtMBB.pred_size() > 1)) {
1532 // Something has changed. It's no longer safe to predicate this block.
1533 BBI.IsAnalyzed = false;
1534 CvtBBI->IsAnalyzed = false;
1535 return false;
1536 }
1537
1538 if (CvtMBB.hasAddressTaken())
1539 // Conservatively abort if-conversion if BB's address is taken.
1540 return false;
1541
1542 if (Kind == ICSimpleFalse)
1544 llvm_unreachable("Unable to reverse branch condition!");
1545
1546 Redefs.init(*TRI);
1547
1548 if (MRI->tracksLiveness()) {
1549 // Initialize liveins to the first BB. These are potentially redefined by
1550 // predicated instructions.
1551 Redefs.addLiveInsNoPristines(CvtMBB);
1552 Redefs.addLiveInsNoPristines(NextMBB);
1553 }
1554
1555 // Remove the branches from the entry so we can add the contents of the true
1556 // block to it.
1557 BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
1558
1559 if (CvtMBB.pred_size() > 1) {
1560 // Copy instructions in the true block, predicate them, and add them to
1561 // the entry block.
1562 CopyAndPredicateBlock(BBI, *CvtBBI, Cond);
1563
1564 // Keep the CFG updated.
1565 BBI.BB->removeSuccessor(&CvtMBB, true);
1566 } else {
1567 // Predicate the instructions in the true block.
1568 PredicateBlock(*CvtBBI, CvtMBB.end(), Cond);
1569
1570 // Merge converted block into entry block. The BB to Cvt edge is removed
1571 // by MergeBlocks.
1572 MergeBlocks(BBI, *CvtBBI);
1573 }
1574
1575 bool IterIfcvt = true;
1576 if (!canFallThroughTo(*BBI.BB, NextMBB)) {
1577 InsertUncondBranch(*BBI.BB, NextMBB, TII);
1578 BBI.HasFallThrough = false;
1579 // Now ifcvt'd block will look like this:
1580 // BB:
1581 // ...
1582 // t, f = cmp
1583 // if t op
1584 // b BBf
1585 //
1586 // We cannot further ifcvt this block because the unconditional branch
1587 // will have to be predicated on the new condition, that will not be
1588 // available if cmp executes.
1589 IterIfcvt = false;
1590 }
1591
1592 // Update block info. BB can be iteratively if-converted.
1593 if (!IterIfcvt)
1594 BBI.IsDone = true;
1595 InvalidatePreds(*BBI.BB);
1596 CvtBBI->IsDone = true;
1597
1598 // FIXME: Must maintain LiveIns.
1599 return true;
1600}
1601
1602/// If convert a triangle sub-CFG.
1603bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
1604 BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
1605 BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
1606 BBInfo *CvtBBI = &TrueBBI;
1607 BBInfo *NextBBI = &FalseBBI;
1608 DebugLoc dl; // FIXME: this is nowhere
1609
1610 SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
1611 if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
1612 std::swap(CvtBBI, NextBBI);
1613
1614 MachineBasicBlock &CvtMBB = *CvtBBI->BB;
1615 MachineBasicBlock &NextMBB = *NextBBI->BB;
1616 if (CvtBBI->IsDone ||
1617 (CvtBBI->CannotBeCopied && CvtMBB.pred_size() > 1)) {
1618 // Something has changed. It's no longer safe to predicate this block.
1619 BBI.IsAnalyzed = false;
1620 CvtBBI->IsAnalyzed = false;
1621 return false;
1622 }
1623
1624 if (CvtMBB.hasAddressTaken())
1625 // Conservatively abort if-conversion if BB's address is taken.
1626 return false;
1627
1628 if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
1630 llvm_unreachable("Unable to reverse branch condition!");
1631
1632 if (Kind == ICTriangleRev || Kind == ICTriangleFRev) {
1633 if (reverseBranchCondition(*CvtBBI)) {
1634 // BB has been changed, modify its predecessors (except for this
1635 // one) so they don't get ifcvt'ed based on bad intel.
1636 for (MachineBasicBlock *PBB : CvtMBB.predecessors()) {
1637 if (PBB == BBI.BB)
1638 continue;
1639 BBInfo &PBBI = BBAnalysis[PBB->getNumber()];
1640 if (PBBI.IsEnqueued) {
1641 PBBI.IsAnalyzed = false;
1642 PBBI.IsEnqueued = false;
1643 }
1644 }
1645 }
1646 }
1647
1648 // Initialize liveins to the first BB. These are potentially redefined by
1649 // predicated instructions.
1650 Redefs.init(*TRI);
1651 if (MRI->tracksLiveness()) {
1652 Redefs.addLiveInsNoPristines(CvtMBB);
1653 Redefs.addLiveInsNoPristines(NextMBB);
1654 }
1655
1656 bool HasEarlyExit = CvtBBI->FalseBB != nullptr;
1657 BranchProbability CvtNext, CvtFalse, BBNext, BBCvt;
1658
1659 if (HasEarlyExit) {
1660 // Get probabilities before modifying CvtMBB and BBI.BB.
1661 CvtNext = MBPI->getEdgeProbability(&CvtMBB, &NextMBB);
1662 CvtFalse = MBPI->getEdgeProbability(&CvtMBB, CvtBBI->FalseBB);
1663 BBNext = MBPI->getEdgeProbability(BBI.BB, &NextMBB);
1664 BBCvt = MBPI->getEdgeProbability(BBI.BB, &CvtMBB);
1665 }
1666
1667 // Remove the branches from the entry so we can add the contents of the true
1668 // block to it.
1669 BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
1670
1671 if (CvtMBB.pred_size() > 1) {
1672 // Copy instructions in the true block, predicate them, and add them to
1673 // the entry block.
1674 CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true);
1675 } else {
1676 // Predicate the 'true' block after removing its branch.
1677 CvtBBI->NonPredSize -= TII->removeBranch(CvtMBB);
1678 PredicateBlock(*CvtBBI, CvtMBB.end(), Cond);
1679
1680 // Now merge the entry of the triangle with the true block.
1681 MergeBlocks(BBI, *CvtBBI, false);
1682 }
1683
1684 // Keep the CFG updated.
1685 BBI.BB->removeSuccessor(&CvtMBB, true);
1686
1687 // If 'true' block has a 'false' successor, add an exit branch to it.
1688 if (HasEarlyExit) {
1689 SmallVector<MachineOperand, 4> RevCond(CvtBBI->BrCond.begin(),
1690 CvtBBI->BrCond.end());
1691 if (TII->reverseBranchCondition(RevCond))
1692 llvm_unreachable("Unable to reverse branch condition!");
1693
1694 // Update the edge probability for both CvtBBI->FalseBB and NextBBI.
1695 // NewNext = New_Prob(BBI.BB, NextMBB) =
1696 // Prob(BBI.BB, NextMBB) +
1697 // Prob(BBI.BB, CvtMBB) * Prob(CvtMBB, NextMBB)
1698 // NewFalse = New_Prob(BBI.BB, CvtBBI->FalseBB) =
1699 // Prob(BBI.BB, CvtMBB) * Prob(CvtMBB, CvtBBI->FalseBB)
1700 auto NewTrueBB = getNextBlock(*BBI.BB);
1701 auto NewNext = BBNext + BBCvt * CvtNext;
1702 auto NewTrueBBIter = find(BBI.BB->successors(), NewTrueBB);
1703 if (NewTrueBBIter != BBI.BB->succ_end())
1704 BBI.BB->setSuccProbability(NewTrueBBIter, NewNext);
1705
1706 auto NewFalse = BBCvt * CvtFalse;
1707 TII->insertBranch(*BBI.BB, CvtBBI->FalseBB, nullptr, RevCond, dl);
1708 BBI.BB->addSuccessor(CvtBBI->FalseBB, NewFalse);
1709 }
1710
1711 // Merge in the 'false' block if the 'false' block has no other
1712 // predecessors. Otherwise, add an unconditional branch to 'false'.
1713 bool FalseBBDead = false;
1714 bool IterIfcvt = true;
1715 bool isFallThrough = canFallThroughTo(*BBI.BB, NextMBB);
1716 if (!isFallThrough) {
1717 // Only merge them if the true block does not fallthrough to the false
1718 // block. By not merging them, we make it possible to iteratively
1719 // ifcvt the blocks.
1720 if (!HasEarlyExit &&
1721 NextMBB.pred_size() == 1 && !NextBBI->HasFallThrough &&
1722 !NextMBB.hasAddressTaken()) {
1723 MergeBlocks(BBI, *NextBBI);
1724 FalseBBDead = true;
1725 } else {
1726 InsertUncondBranch(*BBI.BB, NextMBB, TII);
1727 BBI.HasFallThrough = false;
1728 }
1729 // Mixed predicated and unpredicated code. This cannot be iteratively
1730 // predicated.
1731 IterIfcvt = false;
1732 }
1733
1734 // Update block info. BB can be iteratively if-converted.
1735 if (!IterIfcvt)
1736 BBI.IsDone = true;
1737 InvalidatePreds(*BBI.BB);
1738 CvtBBI->IsDone = true;
1739 if (FalseBBDead)
1740 NextBBI->IsDone = true;
1741
1742 // FIXME: Must maintain LiveIns.
1743 return true;
1744}
1745
1746/// Common code shared between diamond conversions.
1747/// \p BBI, \p TrueBBI, and \p FalseBBI form the diamond shape.
1748/// \p NumDups1 - number of shared instructions at the beginning of \p TrueBBI
1749/// and FalseBBI
1750/// \p NumDups2 - number of shared instructions at the end of \p TrueBBI
1751/// and \p FalseBBI
1752/// \p RemoveBranch - Remove the common branch of the two blocks before
1753/// predicating. Only false for unanalyzable fallthrough
1754/// cases. The caller will replace the branch if necessary.
1755/// \p MergeAddEdges - Add successor edges when merging blocks. Only false for
1756/// unanalyzable fallthrough
1757bool IfConverter::IfConvertDiamondCommon(
1758 BBInfo &BBI, BBInfo &TrueBBI, BBInfo &FalseBBI,
1759 unsigned NumDups1, unsigned NumDups2,
1760 bool TClobbersPred, bool FClobbersPred,
1761 bool RemoveBranch, bool MergeAddEdges) {
1762
1763 if (TrueBBI.IsDone || FalseBBI.IsDone ||
1764 TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1) {
1765 // Something has changed. It's no longer safe to predicate these blocks.
1766 BBI.IsAnalyzed = false;
1767 TrueBBI.IsAnalyzed = false;
1768 FalseBBI.IsAnalyzed = false;
1769 return false;
1770 }
1771
1772 if (TrueBBI.BB->hasAddressTaken() || FalseBBI.BB->hasAddressTaken())
1773 // Conservatively abort if-conversion if either BB has its address taken.
1774 return false;
1775
1776 // Put the predicated instructions from the 'true' block before the
1777 // instructions from the 'false' block, unless the true block would clobber
1778 // the predicate, in which case, do the opposite.
1779 BBInfo *BBI1 = &TrueBBI;
1780 BBInfo *BBI2 = &FalseBBI;
1781 SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
1782 if (TII->reverseBranchCondition(RevCond))
1783 llvm_unreachable("Unable to reverse branch condition!");
1784 SmallVector<MachineOperand, 4> *Cond1 = &BBI.BrCond;
1785 SmallVector<MachineOperand, 4> *Cond2 = &RevCond;
1786
1787 // Figure out the more profitable ordering.
1788 bool DoSwap = false;
1789 if (TClobbersPred && !FClobbersPred)
1790 DoSwap = true;
1791 else if (!TClobbersPred && !FClobbersPred) {
1792 if (TrueBBI.NonPredSize > FalseBBI.NonPredSize)
1793 DoSwap = true;
1794 } else if (TClobbersPred && FClobbersPred)
1795 llvm_unreachable("Predicate info cannot be clobbered by both sides.");
1796 if (DoSwap) {
1797 std::swap(BBI1, BBI2);
1798 std::swap(Cond1, Cond2);
1799 }
1800
1801 // Remove the conditional branch from entry to the blocks.
1802 BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
1803
1804 MachineBasicBlock &MBB1 = *BBI1->BB;
1805 MachineBasicBlock &MBB2 = *BBI2->BB;
1806
1807 // Initialize the Redefs:
1808 // - BB2 live-in regs need implicit uses before being redefined by BB1
1809 // instructions.
1810 // - BB1 live-out regs need implicit uses before being redefined by BB2
1811 // instructions. We start with BB1 live-ins so we have the live-out regs
1812 // after tracking the BB1 instructions.
1813 Redefs.init(*TRI);
1814 if (MRI->tracksLiveness()) {
1815 Redefs.addLiveInsNoPristines(MBB1);
1816 Redefs.addLiveInsNoPristines(MBB2);
1817 }
1818
1819 // Remove the duplicated instructions at the beginnings of both paths.
1820 // Skip dbg_value instructions.
1823 BBI1->NonPredSize -= NumDups1;
1824 BBI2->NonPredSize -= NumDups1;
1825
1826 // Skip past the dups on each side separately since there may be
1827 // differing dbg_value entries. NumDups1 can include a "return"
1828 // instruction, if it's not marked as "branch".
1829 for (unsigned i = 0; i < NumDups1; ++DI1) {
1830 if (DI1 == MBB1.end())
1831 break;
1832 if (!DI1->isDebugInstr())
1833 ++i;
1834 }
1835 while (NumDups1 != 0) {
1836 // Since this instruction is going to be deleted, update call
1837 // site info state if the instruction is call instruction.
1838 if (DI2->shouldUpdateCallSiteInfo())
1839 MBB2.getParent()->eraseCallSiteInfo(&*DI2);
1840
1841 ++DI2;
1842 if (DI2 == MBB2.end())
1843 break;
1844 if (!DI2->isDebugInstr())
1845 --NumDups1;
1846 }
1847
1848 if (MRI->tracksLiveness()) {
1849 for (const MachineInstr &MI : make_range(MBB1.begin(), DI1)) {
1851 Redefs.stepForward(MI, Dummy);
1852 }
1853 }
1854
1855 BBI.BB->splice(BBI.BB->end(), &MBB1, MBB1.begin(), DI1);
1856 MBB2.erase(MBB2.begin(), DI2);
1857
1858 // The branches have been checked to match, so it is safe to remove the
1859 // branch in BB1 and rely on the copy in BB2. The complication is that
1860 // the blocks may end with a return instruction, which may or may not
1861 // be marked as "branch". If it's not, then it could be included in
1862 // "dups1", leaving the blocks potentially empty after moving the common
1863 // duplicates.
1864#ifndef NDEBUG
1865 // Unanalyzable branches must match exactly. Check that now.
1866 if (!BBI1->IsBrAnalyzable)
1867 verifySameBranchInstructions(&MBB1, &MBB2);
1868#endif
1869 // Remove duplicated instructions from the tail of MBB1: any branch
1870 // instructions, and the common instructions counted by NumDups2.
1871 DI1 = MBB1.end();
1872 while (DI1 != MBB1.begin()) {
1873 MachineBasicBlock::iterator Prev = std::prev(DI1);
1874 if (!Prev->isBranch() && !Prev->isDebugInstr())
1875 break;
1876 DI1 = Prev;
1877 }
1878 for (unsigned i = 0; i != NumDups2; ) {
1879 // NumDups2 only counted non-dbg_value instructions, so this won't
1880 // run off the head of the list.
1881 assert(DI1 != MBB1.begin());
1882
1883 --DI1;
1884
1885 // Since this instruction is going to be deleted, update call
1886 // site info state if the instruction is call instruction.
1887 if (DI1->shouldUpdateCallSiteInfo())
1888 MBB1.getParent()->eraseCallSiteInfo(&*DI1);
1889
1890 // skip dbg_value instructions
1891 if (!DI1->isDebugInstr())
1892 ++i;
1893 }
1894 MBB1.erase(DI1, MBB1.end());
1895
1896 DI2 = BBI2->BB->end();
1897 // The branches have been checked to match. Skip over the branch in the false
1898 // block so that we don't try to predicate it.
1899 if (RemoveBranch)
1900 BBI2->NonPredSize -= TII->removeBranch(*BBI2->BB);
1901 else {
1902 // Make DI2 point to the end of the range where the common "tail"
1903 // instructions could be found.
1904 while (DI2 != MBB2.begin()) {
1905 MachineBasicBlock::iterator Prev = std::prev(DI2);
1906 if (!Prev->isBranch() && !Prev->isDebugInstr())
1907 break;
1908 DI2 = Prev;
1909 }
1910 }
1911 while (NumDups2 != 0) {
1912 // NumDups2 only counted non-dbg_value instructions, so this won't
1913 // run off the head of the list.
1914 assert(DI2 != MBB2.begin());
1915 --DI2;
1916 // skip dbg_value instructions
1917 if (!DI2->isDebugInstr())
1918 --NumDups2;
1919 }
1920
1921 // Remember which registers would later be defined by the false block.
1922 // This allows us not to predicate instructions in the true block that would
1923 // later be re-defined. That is, rather than
1924 // subeq r0, r1, #1
1925 // addne r0, r1, #1
1926 // generate:
1927 // sub r0, r1, #1
1928 // addne r0, r1, #1
1929 SmallSet<MCPhysReg, 4> RedefsByFalse;
1930 SmallSet<MCPhysReg, 4> ExtUses;
1931 if (TII->isProfitableToUnpredicate(MBB1, MBB2)) {
1932 for (const MachineInstr &FI : make_range(MBB2.begin(), DI2)) {
1933 if (FI.isDebugInstr())
1934 continue;
1936 for (const MachineOperand &MO : FI.operands()) {
1937 if (!MO.isReg())
1938 continue;
1939 Register Reg = MO.getReg();
1940 if (!Reg)
1941 continue;
1942 if (MO.isDef()) {
1943 Defs.push_back(Reg);
1944 } else if (!RedefsByFalse.count(Reg)) {
1945 // These are defined before ctrl flow reach the 'false' instructions.
1946 // They cannot be modified by the 'true' instructions.
1947 for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg))
1948 ExtUses.insert(SubReg);
1949 }
1950 }
1951
1952 for (MCPhysReg Reg : Defs) {
1953 if (!ExtUses.count(Reg)) {
1954 for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg))
1955 RedefsByFalse.insert(SubReg);
1956 }
1957 }
1958 }
1959 }
1960
1961 // Predicate the 'true' block.
1962 PredicateBlock(*BBI1, MBB1.end(), *Cond1, &RedefsByFalse);
1963
1964 // After predicating BBI1, if there is a predicated terminator in BBI1 and
1965 // a non-predicated in BBI2, then we don't want to predicate the one from
1966 // BBI2. The reason is that if we merged these blocks, we would end up with
1967 // two predicated terminators in the same block.
1968 // Also, if the branches in MBB1 and MBB2 were non-analyzable, then don't
1969 // predicate them either. They were checked to be identical, and so the
1970 // same branch would happen regardless of which path was taken.
1971 if (!MBB2.empty() && (DI2 == MBB2.end())) {
1974 bool BB1Predicated = BBI1T != MBB1.end() && TII->isPredicated(*BBI1T);
1975 bool BB2NonPredicated = BBI2T != MBB2.end() && !TII->isPredicated(*BBI2T);
1976 if (BB2NonPredicated && (BB1Predicated || !BBI2->IsBrAnalyzable))
1977 --DI2;
1978 }
1979
1980 // Predicate the 'false' block.
1981 PredicateBlock(*BBI2, DI2, *Cond2);
1982
1983 // Merge the true block into the entry of the diamond.
1984 MergeBlocks(BBI, *BBI1, MergeAddEdges);
1985 MergeBlocks(BBI, *BBI2, MergeAddEdges);
1986 return true;
1987}
1988
1989/// If convert an almost-diamond sub-CFG where the true
1990/// and false blocks share a common tail.
1991bool IfConverter::IfConvertForkedDiamond(
1992 BBInfo &BBI, IfcvtKind Kind,
1993 unsigned NumDups1, unsigned NumDups2,
1994 bool TClobbersPred, bool FClobbersPred) {
1995 BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
1996 BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
1997
1998 // Save the debug location for later.
1999 DebugLoc dl;
2000 MachineBasicBlock::iterator TIE = TrueBBI.BB->getFirstTerminator();
2001 if (TIE != TrueBBI.BB->end())
2002 dl = TIE->getDebugLoc();
2003 // Removing branches from both blocks is safe, because we have already
2004 // determined that both blocks have the same branch instructions. The branch
2005 // will be added back at the end, unpredicated.
2006 if (!IfConvertDiamondCommon(
2007 BBI, TrueBBI, FalseBBI,
2008 NumDups1, NumDups2,
2009 TClobbersPred, FClobbersPred,
2010 /* RemoveBranch */ true, /* MergeAddEdges */ true))
2011 return false;
2012
2013 // Add back the branch.
2014 // Debug location saved above when removing the branch from BBI2
2015 TII->insertBranch(*BBI.BB, TrueBBI.TrueBB, TrueBBI.FalseBB,
2016 TrueBBI.BrCond, dl);
2017
2018 // Update block info.
2019 BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true;
2020 InvalidatePreds(*BBI.BB);
2021
2022 // FIXME: Must maintain LiveIns.
2023 return true;
2024}
2025
2026/// If convert a diamond sub-CFG.
2027bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
2028 unsigned NumDups1, unsigned NumDups2,
2029 bool TClobbersPred, bool FClobbersPred) {
2030 BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
2031 BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
2032 MachineBasicBlock *TailBB = TrueBBI.TrueBB;
2033
2034 // True block must fall through or end with an unanalyzable terminator.
2035 if (!TailBB) {
2036 if (blockAlwaysFallThrough(TrueBBI))
2037 TailBB = FalseBBI.TrueBB;
2038 assert((TailBB || !TrueBBI.IsBrAnalyzable) && "Unexpected!");
2039 }
2040
2041 if (!IfConvertDiamondCommon(
2042 BBI, TrueBBI, FalseBBI,
2043 NumDups1, NumDups2,
2044 TClobbersPred, FClobbersPred,
2045 /* RemoveBranch */ TrueBBI.IsBrAnalyzable,
2046 /* MergeAddEdges */ TailBB == nullptr))
2047 return false;
2048
2049 // If the if-converted block falls through or unconditionally branches into
2050 // the tail block, and the tail block does not have other predecessors, then
2051 // fold the tail block in as well. Otherwise, unless it falls through to the
2052 // tail, add a unconditional branch to it.
2053 if (TailBB) {
2054 // We need to remove the edges to the true and false blocks manually since
2055 // we didn't let IfConvertDiamondCommon update the CFG.
2056 BBI.BB->removeSuccessor(TrueBBI.BB);
2057 BBI.BB->removeSuccessor(FalseBBI.BB, true);
2058
2059 BBInfo &TailBBI = BBAnalysis[TailBB->getNumber()];
2060 bool CanMergeTail = !TailBBI.HasFallThrough &&
2061 !TailBBI.BB->hasAddressTaken();
2062 // The if-converted block can still have a predicated terminator
2063 // (e.g. a predicated return). If that is the case, we cannot merge
2064 // it with the tail block.
2065 MachineBasicBlock::const_iterator TI = BBI.BB->getFirstTerminator();
2066 if (TI != BBI.BB->end() && TII->isPredicated(*TI))
2067 CanMergeTail = false;
2068 // There may still be a fall-through edge from BBI1 or BBI2 to TailBB;
2069 // check if there are any other predecessors besides those.
2070 unsigned NumPreds = TailBB->pred_size();
2071 if (NumPreds > 1)
2072 CanMergeTail = false;
2073 else if (NumPreds == 1 && CanMergeTail) {
2075 if (*PI != TrueBBI.BB && *PI != FalseBBI.BB)
2076 CanMergeTail = false;
2077 }
2078 if (CanMergeTail) {
2079 MergeBlocks(BBI, TailBBI);
2080 TailBBI.IsDone = true;
2081 } else {
2082 BBI.BB->addSuccessor(TailBB, BranchProbability::getOne());
2083 InsertUncondBranch(*BBI.BB, *TailBB, TII);
2084 BBI.HasFallThrough = false;
2085 }
2086 }
2087
2088 // Update block info.
2089 BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true;
2090 InvalidatePreds(*BBI.BB);
2091
2092 // FIXME: Must maintain LiveIns.
2093 return true;
2094}
2095
2096static bool MaySpeculate(const MachineInstr &MI,
2097 SmallSet<MCPhysReg, 4> &LaterRedefs) {
2098 bool SawStore = true;
2099 if (!MI.isSafeToMove(nullptr, SawStore))
2100 return false;
2101
2102 for (const MachineOperand &MO : MI.operands()) {
2103 if (!MO.isReg())
2104 continue;
2105 Register Reg = MO.getReg();
2106 if (!Reg)
2107 continue;
2108 if (MO.isDef() && !LaterRedefs.count(Reg))
2109 return false;
2110 }
2111
2112 return true;
2113}
2114
2115/// Predicate instructions from the start of the block to the specified end with
2116/// the specified condition.
2117void IfConverter::PredicateBlock(BBInfo &BBI,
2120 SmallSet<MCPhysReg, 4> *LaterRedefs) {
2121 bool AnyUnpred = false;
2122 bool MaySpec = LaterRedefs != nullptr;
2123 for (MachineInstr &I : make_range(BBI.BB->begin(), E)) {
2124 if (I.isDebugInstr() || TII->isPredicated(I))
2125 continue;
2126 // It may be possible not to predicate an instruction if it's the 'true'
2127 // side of a diamond and the 'false' side may re-define the instruction's
2128 // defs.
2129 if (MaySpec && MaySpeculate(I, *LaterRedefs)) {
2130 AnyUnpred = true;
2131 continue;
2132 }
2133 // If any instruction is predicated, then every instruction after it must
2134 // be predicated.
2135 MaySpec = false;
2136 if (!TII->PredicateInstruction(I, Cond)) {
2137#ifndef NDEBUG
2138 dbgs() << "Unable to predicate " << I << "!\n";
2139#endif
2140 llvm_unreachable(nullptr);
2141 }
2142
2143 // If the predicated instruction now redefines a register as the result of
2144 // if-conversion, add an implicit kill.
2145 UpdatePredRedefs(I, Redefs);
2146 }
2147
2148 BBI.Predicate.append(Cond.begin(), Cond.end());
2149
2150 BBI.IsAnalyzed = false;
2151 BBI.NonPredSize = 0;
2152
2153 ++NumIfConvBBs;
2154 if (AnyUnpred)
2155 ++NumUnpred;
2156}
2157
2158/// Copy and predicate instructions from source BB to the destination block.
2159/// Skip end of block branches if IgnoreBr is true.
2160void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
2162 bool IgnoreBr) {
2163 MachineFunction &MF = *ToBBI.BB->getParent();
2164
2165 MachineBasicBlock &FromMBB = *FromBBI.BB;
2166 for (MachineInstr &I : FromMBB) {
2167 // Do not copy the end of the block branches.
2168 if (IgnoreBr && I.isBranch())
2169 break;
2170
2172 // Make a copy of the call site info.
2173 if (I.isCandidateForCallSiteEntry())
2174 MF.copyCallSiteInfo(&I, MI);
2175
2176 ToBBI.BB->insert(ToBBI.BB->end(), MI);
2177 ToBBI.NonPredSize++;
2178 unsigned ExtraPredCost = TII->getPredicationCost(I);
2179 unsigned NumCycles = SchedModel.computeInstrLatency(&I, false);
2180 if (NumCycles > 1)
2181 ToBBI.ExtraCost += NumCycles-1;
2182 ToBBI.ExtraCost2 += ExtraPredCost;
2183
2184 if (!TII->isPredicated(I) && !MI->isDebugInstr()) {
2185 if (!TII->PredicateInstruction(*MI, Cond)) {
2186#ifndef NDEBUG
2187 dbgs() << "Unable to predicate " << I << "!\n";
2188#endif
2189 llvm_unreachable(nullptr);
2190 }
2191 }
2192
2193 // If the predicated instruction now redefines a register as the result of
2194 // if-conversion, add an implicit kill.
2195 UpdatePredRedefs(*MI, Redefs);
2196 }
2197
2198 if (!IgnoreBr) {
2199 std::vector<MachineBasicBlock *> Succs(FromMBB.succ_begin(),
2200 FromMBB.succ_end());
2201 MachineBasicBlock *NBB = getNextBlock(FromMBB);
2202 MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : nullptr;
2203
2204 for (MachineBasicBlock *Succ : Succs) {
2205 // Fallthrough edge can't be transferred.
2206 if (Succ == FallThrough)
2207 continue;
2208 ToBBI.BB->addSuccessor(Succ);
2209 }
2210 }
2211
2212 ToBBI.Predicate.append(FromBBI.Predicate.begin(), FromBBI.Predicate.end());
2213 ToBBI.Predicate.append(Cond.begin(), Cond.end());
2214
2215 ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
2216 ToBBI.IsAnalyzed = false;
2217
2218 ++NumDupBBs;
2219}
2220
2221/// Move all instructions from FromBB to the end of ToBB. This will leave
2222/// FromBB as an empty block, so remove all of its successor edges and move it
2223/// to the end of the function. If AddEdges is true, i.e., when FromBBI's
2224/// branch is being moved, add those successor edges to ToBBI and remove the old
2225/// edge from ToBBI to FromBBI.
2226void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
2227 MachineBasicBlock &FromMBB = *FromBBI.BB;
2228 assert(!FromMBB.hasAddressTaken() &&
2229 "Removing a BB whose address is taken!");
2230
2231 // If we're about to splice an INLINEASM_BR from FromBBI, we need to update
2232 // ToBBI's successor list accordingly.
2233 if (FromMBB.mayHaveInlineAsmBr())
2234 for (MachineInstr &MI : FromMBB)
2235 if (MI.getOpcode() == TargetOpcode::INLINEASM_BR)
2236 for (MachineOperand &MO : MI.operands())
2237 if (MO.isMBB() && !ToBBI.BB->isSuccessor(MO.getMBB()))
2238 ToBBI.BB->addSuccessor(MO.getMBB(), BranchProbability::getZero());
2239
2240 // In case FromMBB contains terminators (e.g. return instruction),
2241 // first move the non-terminator instructions, then the terminators.
2242 MachineBasicBlock::iterator FromTI = FromMBB.getFirstTerminator();
2243 MachineBasicBlock::iterator ToTI = ToBBI.BB->getFirstTerminator();
2244 ToBBI.BB->splice(ToTI, &FromMBB, FromMBB.begin(), FromTI);
2245
2246 // If FromBB has non-predicated terminator we should copy it at the end.
2247 if (FromTI != FromMBB.end() && !TII->isPredicated(*FromTI))
2248 ToTI = ToBBI.BB->end();
2249 ToBBI.BB->splice(ToTI, &FromMBB, FromTI, FromMBB.end());
2250
2251 // Force normalizing the successors' probabilities of ToBBI.BB to convert all
2252 // unknown probabilities into known ones.
2253 // FIXME: This usage is too tricky and in the future we would like to
2254 // eliminate all unknown probabilities in MBB.
2255 if (ToBBI.IsBrAnalyzable)
2256 ToBBI.BB->normalizeSuccProbs();
2257
2258 SmallVector<MachineBasicBlock *, 4> FromSuccs(FromMBB.successors());
2259 MachineBasicBlock *NBB = getNextBlock(FromMBB);
2260 MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : nullptr;
2261 // The edge probability from ToBBI.BB to FromMBB, which is only needed when
2262 // AddEdges is true and FromMBB is a successor of ToBBI.BB.
2263 auto To2FromProb = BranchProbability::getZero();
2264 if (AddEdges && ToBBI.BB->isSuccessor(&FromMBB)) {
2265 // Remove the old edge but remember the edge probability so we can calculate
2266 // the correct weights on the new edges being added further down.
2267 To2FromProb = MBPI->getEdgeProbability(ToBBI.BB, &FromMBB);
2268 ToBBI.BB->removeSuccessor(&FromMBB);
2269 }
2270
2271 for (MachineBasicBlock *Succ : FromSuccs) {
2272 // Fallthrough edge can't be transferred.
2273 if (Succ == FallThrough) {
2274 FromMBB.removeSuccessor(Succ);
2275 continue;
2276 }
2277
2278 auto NewProb = BranchProbability::getZero();
2279 if (AddEdges) {
2280 // Calculate the edge probability for the edge from ToBBI.BB to Succ,
2281 // which is a portion of the edge probability from FromMBB to Succ. The
2282 // portion ratio is the edge probability from ToBBI.BB to FromMBB (if
2283 // FromBBI is a successor of ToBBI.BB. See comment below for exception).
2284 NewProb = MBPI->getEdgeProbability(&FromMBB, Succ);
2285
2286 // To2FromProb is 0 when FromMBB is not a successor of ToBBI.BB. This
2287 // only happens when if-converting a diamond CFG and FromMBB is the
2288 // tail BB. In this case FromMBB post-dominates ToBBI.BB and hence we
2289 // could just use the probabilities on FromMBB's out-edges when adding
2290 // new successors.
2291 if (!To2FromProb.isZero())
2292 NewProb *= To2FromProb;
2293 }
2294
2295 FromMBB.removeSuccessor(Succ);
2296
2297 if (AddEdges) {
2298 // If the edge from ToBBI.BB to Succ already exists, update the
2299 // probability of this edge by adding NewProb to it. An example is shown
2300 // below, in which A is ToBBI.BB and B is FromMBB. In this case we
2301 // don't have to set C as A's successor as it already is. We only need to
2302 // update the edge probability on A->C. Note that B will not be
2303 // immediately removed from A's successors. It is possible that B->D is
2304 // not removed either if D is a fallthrough of B. Later the edge A->D
2305 // (generated here) and B->D will be combined into one edge. To maintain
2306 // correct edge probability of this combined edge, we need to set the edge
2307 // probability of A->B to zero, which is already done above. The edge
2308 // probability on A->D is calculated by scaling the original probability
2309 // on A->B by the probability of B->D.
2310 //
2311 // Before ifcvt: After ifcvt (assume B->D is kept):
2312 //
2313 // A A
2314 // /| /|\
2315 // / B / B|
2316 // | /| | ||
2317 // |/ | | |/
2318 // C D C D
2319 //
2320 if (ToBBI.BB->isSuccessor(Succ))
2321 ToBBI.BB->setSuccProbability(
2322 find(ToBBI.BB->successors(), Succ),
2323 MBPI->getEdgeProbability(ToBBI.BB, Succ) + NewProb);
2324 else
2325 ToBBI.BB->addSuccessor(Succ, NewProb);
2326 }
2327 }
2328
2329 // Move the now empty FromMBB out of the way to the end of the function so
2330 // it doesn't interfere with fallthrough checks done by canFallThroughTo().
2331 MachineBasicBlock *Last = &*FromMBB.getParent()->rbegin();
2332 if (Last != &FromMBB)
2333 FromMBB.moveAfter(Last);
2334
2335 // Normalize the probabilities of ToBBI.BB's successors with all adjustment
2336 // we've done above.
2337 if (ToBBI.IsBrAnalyzable && FromBBI.IsBrAnalyzable)
2338 ToBBI.BB->normalizeSuccProbs();
2339
2340 ToBBI.Predicate.append(FromBBI.Predicate.begin(), FromBBI.Predicate.end());
2341 FromBBI.Predicate.clear();
2342
2343 ToBBI.NonPredSize += FromBBI.NonPredSize;
2344 ToBBI.ExtraCost += FromBBI.ExtraCost;
2345 ToBBI.ExtraCost2 += FromBBI.ExtraCost2;
2346 FromBBI.NonPredSize = 0;
2347 FromBBI.ExtraCost = 0;
2348 FromBBI.ExtraCost2 = 0;
2349
2350 ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
2351 ToBBI.HasFallThrough = FromBBI.HasFallThrough;
2352 ToBBI.IsAnalyzed = false;
2353 FromBBI.IsAnalyzed = false;
2354}
2355
2357llvm::createIfConverter(std::function<bool(const MachineFunction &)> Ftor) {
2358 return new IfConverter(std::move(Ftor));
2359}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock & MBB
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
Early If Converter
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static cl::opt< bool > DisableTriangle("disable-ifcvt-triangle", cl::init(false), cl::Hidden)
static MachineBasicBlock * findFalseBlock(MachineBasicBlock *BB, MachineBasicBlock *TrueBB)
BB has a fallthrough. Find its 'false' successor given its 'true' successor.
static cl::opt< int > IfCvtFnStop("ifcvt-fn-stop", cl::init(-1), cl::Hidden)
static cl::opt< bool > IfCvtBranchFold("ifcvt-branch-fold", cl::init(true), cl::Hidden)
static cl::opt< bool > DisableTriangleF("disable-ifcvt-triangle-false", cl::init(false), cl::Hidden)
static cl::opt< int > IfCvtLimit("ifcvt-limit", cl::init(-1), cl::Hidden)
static void InsertUncondBranch(MachineBasicBlock &MBB, MachineBasicBlock &ToMBB, const TargetInstrInfo *TII)
Inserts an unconditional branch from MBB to ToMBB.
static void verifySameBranchInstructions(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2)
static cl::opt< bool > DisableDiamond("disable-ifcvt-diamond", cl::init(false), cl::Hidden)
static cl::opt< bool > DisableTriangleR("disable-ifcvt-triangle-rev", cl::init(false), cl::Hidden)
static cl::opt< bool > DisableForkedDiamond("disable-ifcvt-forked-diamond", cl::init(false), cl::Hidden)
static MachineBasicBlock * getNextBlock(MachineBasicBlock &MBB)
Returns the next block in the function blocks ordering.
static void UpdatePredRedefs(MachineInstr &MI, LivePhysRegs &Redefs)
Behaves like LiveRegUnits::StepForward() but also adds implicit uses to all values defined in MI whic...
static cl::opt< bool > DisableSimpleF("disable-ifcvt-simple-false", cl::init(false), cl::Hidden)
static cl::opt< bool > DisableSimple("disable-ifcvt-simple", cl::init(false), cl::Hidden)
#define DEBUG_TYPE
static bool canFallThroughTo(MachineBasicBlock &MBB, MachineBasicBlock &ToMBB)
Returns true either if ToMBB is the next block after MBB or that all the intervening blocks are empty...
static cl::opt< int > IfCvtFnStart("ifcvt-fn-start", cl::init(-1), cl::Hidden)
static bool MaySpeculate(const MachineInstr &MI, SmallSet< MCPhysReg, 4 > &LaterRedefs)
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the SparseSet class derived from the version described in Briggs,...
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This file describes how to lower LLVM code to machine code.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
bool OptimizeFunction(MachineFunction &MF, const TargetInstrInfo *tii, const TargetRegisterInfo *tri, MachineLoopInfo *mli=nullptr, bool AfterPlacement=false)
Perhaps branch folding, tail merging and other CFG optimizations on the given function.
static BranchProbability getOne()
BranchProbability getCompl() const
static BranchProbability getZero()
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:678
A possibly irreducible generalization of a Loop.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
Remove the branching code at the end of the specific MBB.
bool isPredicated(const MachineInstr &MI) const override
Returns true if the instruction is already predicated.
bool ClobbersPredicate(MachineInstr &MI, std::vector< MachineOperand > &Pred, bool SkipDead) const override
If the specified instruction defines any predicate or condition code register(s) used for predication...
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
Reverses the branch condition of the specified condition list, returning false on success and true if...
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Insert branch code into the end of the specified MachineBasicBlock.
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, BranchProbability Probability) const override
Return true if it's profitable to predicate instructions with accumulated instruction latency of "Num...
bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, BranchProbability Probability) const override
Return true if it's profitable for if-converter to duplicate instructions of specified accumulated in...
bool SubsumesPredicate(ArrayRef< MachineOperand > Pred1, ArrayRef< MachineOperand > Pred2) const override
Returns true if the first specified predicate subsumes the second, e.g.
bool PredicateInstruction(MachineInstr &MI, ArrayRef< MachineOperand > Cond) const override
Convert the instruction into a predicated instruction.
bool isPredicable(const MachineInstr &MI) const override
Return true if the specified instruction can be predicated.
A set of physical registers with utility functions to track liveness when walking backward/forward th...
Definition: LivePhysRegs.h:52
void stepForward(const MachineInstr &MI, SmallVectorImpl< std::pair< MCPhysReg, const MachineOperand * > > &Clobbers)
Simulates liveness when stepping forward over an instruction(bundle).
void init(const TargetRegisterInfo &TRI)
(re-)initializes and clears the set.
Definition: LivePhysRegs.h:70
void addLiveInsNoPristines(const MachineBasicBlock &MBB)
Adds all live-in registers of basic block MBB but skips pristine registers.
unsigned pred_size() const
reverse_iterator rend()
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
iterator getFirstNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the first non-debug instruction in the basic block, or end().
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
bool hasAddressTaken() const
Test whether this block is used as something other than the target of a terminator,...
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
reverse_iterator rbegin()
iterator_range< pred_iterator > predecessors()
bool mayHaveInlineAsmBr() const
Returns true if this block may have an INLINEASM_BR (overestimate, by checking if any of the successo...
std::vector< MachineBasicBlock * >::iterator pred_iterator
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
void copyCallSiteInfo(const MachineInstr *Old, const MachineInstr *New)
Copy the call site info from Old to \ New.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
void eraseCallSiteInfo(const MachineInstr *MI)
Following functions update call site info.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
reverse_iterator getReverse() const
Get a reverse iterator to the same node.
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
MachineOperand class - Representation of each machine instruction operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
SparseSet - Fast set implementation for objects that can be identified by small unsigned keys.
Definition: SparseSet.h:124
size_type count(const KeyT &Key) const
count - Returns 1 if this set contains an element identified by Key, 0 otherwise.
Definition: SparseSet.h:241
std::pair< iterator, bool > insert(const ValueT &Val)
insert - Attempts to insert a new element.
Definition: SparseSet.h:253
void setUniverse(unsigned U)
setUniverse - Set the universe size which determines the largest key the set can hold.
Definition: SparseSet.h:155
TargetInstrInfo - Interface to description of machine instruction set.
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Provide an instruction scheduling machine model to CodeGen passes.
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
TargetSubtargetInfo - Generic base class for all target subtargets.
self_iterator getIterator()
Definition: ilist_node.h:109
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool isPredicated(const MCInst &MI, const MCInstrInfo *MCII)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void stable_sort(R &&Range)
Definition: STLExtras.h:1995
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
FunctionPass * createIfConverter(std::function< bool(const MachineFunction &)> Ftor)
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
Definition: ScopeExit.h:59
void initializeIfConverterPass(PassRegistry &)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void recomputeLivenessFlags(MachineBasicBlock &MBB)
Recomputes dead and kill flags in MBB.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1849
char & IfConverterID
IfConverter - This pass performs machine code if conversion.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860