LLVM  13.0.0git
SpeculateAroundPHIs.cpp
Go to the documentation of this file.
1 //===- SpeculateAroundPHIs.cpp --------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
11 #include "llvm/ADT/Sequence.h"
12 #include "llvm/ADT/SetVector.h"
13 #include "llvm/ADT/Statistic.h"
16 #include "llvm/IR/BasicBlock.h"
17 #include "llvm/IR/IRBuilder.h"
18 #include "llvm/IR/Instructions.h"
19 #include "llvm/IR/IntrinsicInst.h"
20 #include "llvm/Support/Debug.h"
22 
23 using namespace llvm;
24 
25 #define DEBUG_TYPE "spec-phis"
26 
27 STATISTIC(NumPHIsSpeculated, "Number of PHI nodes we speculated around");
28 STATISTIC(NumEdgesSplit,
29  "Number of critical edges which were split for speculation");
30 STATISTIC(NumSpeculatedInstructions,
31  "Number of instructions we speculated around the PHI nodes");
32 STATISTIC(NumNewRedundantInstructions,
33  "Number of new, redundant instructions inserted");
34 
35 /// Check whether speculating the users of a PHI node around the PHI
36 /// will be safe.
37 ///
38 /// This checks both that all of the users are safe and also that all of their
39 /// operands are either recursively safe or already available along an incoming
40 /// edge to the PHI.
41 ///
42 /// This routine caches both all the safe nodes explored in `PotentialSpecSet`
43 /// and the chain of nodes that definitively reach any unsafe node in
44 /// `UnsafeSet`. By preserving these between repeated calls to this routine for
45 /// PHIs in the same basic block, the exploration here can be reused. However,
46 /// these caches must no be reused for PHIs in a different basic block as they
47 /// reflect what is available along incoming edges.
48 static bool
50  SmallPtrSetImpl<Instruction *> &PotentialSpecSet,
51  SmallPtrSetImpl<Instruction *> &UnsafeSet) {
52  auto *PhiBB = PN.getParent();
55 
56  // Walk each user of the PHI node.
57  for (Use &U : PN.uses()) {
58  auto *UI = cast<Instruction>(U.getUser());
59 
60  // Ensure the use post-dominates the PHI node. This ensures that, in the
61  // absence of unwinding, the use will actually be reached.
62  // FIXME: We use a blunt hammer of requiring them to be in the same basic
63  // block. We should consider using actual post-dominance here in the
64  // future.
65  if (UI->getParent() != PhiBB) {
66  LLVM_DEBUG(dbgs() << " Unsafe: use in a different BB: " << *UI << "\n");
67  return false;
68  }
69 
70  if (const auto *CS = dyn_cast<CallBase>(UI)) {
71  if (CS->isConvergent() || CS->cannotDuplicate()) {
72  LLVM_DEBUG(dbgs() << " Unsafe: convergent "
73  "callsite cannot de duplicated: " << *UI << '\n');
74  return false;
75  }
76  }
77 
78  // FIXME: This check is much too conservative. We're not going to move these
79  // instructions onto new dynamic paths through the program unless there is
80  // a call instruction between the use and the PHI node. And memory isn't
81  // changing unless there is a store in that same sequence. We should
82  // probably change this to do at least a limited scan of the intervening
83  // instructions and allow handling stores in easily proven safe cases.
84  if (mayBeMemoryDependent(*UI)) {
85  LLVM_DEBUG(dbgs() << " Unsafe: can't speculate use: " << *UI << "\n");
86  return false;
87  }
88 
89  // Now do a depth-first search of everything these users depend on to make
90  // sure they are transitively safe. This is a depth-first search, but we
91  // check nodes in preorder to minimize the amount of checking.
92  Visited.insert(UI);
93  DFSStack.push_back({UI, UI->value_op_begin()});
94  do {
96  std::tie(UI, OpIt) = DFSStack.pop_back_val();
97 
98  while (OpIt != UI->value_op_end()) {
99  auto *OpI = dyn_cast<Instruction>(*OpIt);
100  // Increment to the next operand for whenever we continue.
101  ++OpIt;
102  // No need to visit non-instructions, which can't form dependencies.
103  if (!OpI)
104  continue;
105 
106  // Now do the main pre-order checks that this operand is a viable
107  // dependency of something we want to speculate.
108 
109  // First do a few checks for instructions that won't require
110  // speculation at all because they are trivially available on the
111  // incoming edge (either through dominance or through an incoming value
112  // to a PHI).
113  //
114  // The cases in the current block will be trivially dominated by the
115  // edge.
116  auto *ParentBB = OpI->getParent();
117  if (ParentBB == PhiBB) {
118  if (isa<PHINode>(OpI)) {
119  // We can trivially map through phi nodes in the same block.
120  continue;
121  }
122  } else if (DT.dominates(ParentBB, PhiBB)) {
123  // Instructions from dominating blocks are already available.
124  continue;
125  }
126 
127  // Once we know that we're considering speculating the operand, check
128  // if we've already explored this subgraph and found it to be safe.
129  if (PotentialSpecSet.count(OpI))
130  continue;
131 
132  // If we've already explored this subgraph and found it unsafe, bail.
133  // If when we directly test whether this is safe it fails, bail.
134  if (UnsafeSet.count(OpI) || ParentBB != PhiBB ||
135  mayBeMemoryDependent(*OpI)) {
136  LLVM_DEBUG(dbgs() << " Unsafe: can't speculate transitive use: "
137  << *OpI << "\n");
138  // Record the stack of instructions which reach this node as unsafe
139  // so we prune subsequent searches.
140  UnsafeSet.insert(OpI);
141  for (auto &StackPair : DFSStack) {
142  Instruction *I = StackPair.first;
143  UnsafeSet.insert(I);
144  }
145  return false;
146  }
147 
148  // Skip any operands we're already recursively checking.
149  if (!Visited.insert(OpI).second)
150  continue;
151 
152  // Push onto the stack and descend. We can directly continue this
153  // loop when ascending.
154  DFSStack.push_back({UI, OpIt});
155  UI = OpI;
156  OpIt = OpI->value_op_begin();
157  }
158 
159  // This node and all its operands are safe. Go ahead and cache that for
160  // reuse later.
161  PotentialSpecSet.insert(UI);
162 
163  // Continue with the next node on the stack.
164  } while (!DFSStack.empty());
165  }
166 
167 #ifndef NDEBUG
168  // Every visited operand should have been marked as safe for speculation at
169  // this point. Verify this and return success.
170  for (auto *I : Visited)
171  assert(PotentialSpecSet.count(I) &&
172  "Failed to mark a visited instruction as safe!");
173 #endif
174  return true;
175 }
176 
177 /// Check whether, in isolation, a given PHI node is both safe and profitable
178 /// to speculate users around.
179 ///
180 /// This handles checking whether there are any constant operands to a PHI
181 /// which could represent a useful speculation candidate, whether the users of
182 /// the PHI are safe to speculate including all their transitive dependencies,
183 /// and whether after speculation there will be some cost savings (profit) to
184 /// folding the operands into the users of the PHI node. Returns true if both
185 /// safe and profitable with relevant cost savings updated in the map and with
186 /// an update to the `PotentialSpecSet`. Returns false if either safety or
187 /// profitability are absent. Some new entries may be made to the
188 /// `PotentialSpecSet` even when this routine returns false, but they remain
189 /// conservatively correct.
190 ///
191 /// The profitability check here is a local one, but it checks this in an
192 /// interesting way. Beyond checking that the total cost of materializing the
193 /// constants will be less than the cost of folding them into their users, it
194 /// also checks that no one incoming constant will have a higher cost when
195 /// folded into its users rather than materialized. This higher cost could
196 /// result in a dynamic *path* that is more expensive even when the total cost
197 /// is lower. Currently, all of the interesting cases where this optimization
198 /// should fire are ones where it is a no-loss operation in this sense. If we
199 /// ever want to be more aggressive here, we would need to balance the
200 /// different incoming edges' cost by looking at their respective
201 /// probabilities.
204  SmallPtrSetImpl<Instruction *> &PotentialSpecSet,
207  // First see whether there is any cost savings to speculating around this
208  // PHI, and build up a map of the constant inputs to how many times they
209  // occur.
210  bool NonFreeMat = false;
211  struct CostsAndCount {
214  int Count = 0;
215  };
217  SmallPtrSet<BasicBlock *, 16> IncomingConstantBlocks;
218  for (int i : llvm::seq<int>(0, PN.getNumIncomingValues())) {
219  auto *IncomingC = dyn_cast<ConstantInt>(PN.getIncomingValue(i));
220  if (!IncomingC)
221  continue;
222 
223  // Only visit each incoming edge with a constant input once.
224  if (!IncomingConstantBlocks.insert(PN.getIncomingBlock(i)).second)
225  continue;
226 
227  auto InsertResult = CostsAndCounts.insert({IncomingC, {}});
228  // Count how many edges share a given incoming costant.
229  ++InsertResult.first->second.Count;
230  // Only compute the cost the first time we see a particular constant.
231  if (!InsertResult.second)
232  continue;
233 
234  InstructionCost &MatCost = InsertResult.first->second.MatCost;
235  MatCost = TTI.getIntImmCost(IncomingC->getValue(), IncomingC->getType(),
237  NonFreeMat |= MatCost != TTI.TCC_Free;
238  }
239  if (!NonFreeMat) {
240  LLVM_DEBUG(dbgs() << " Free: " << PN << "\n");
241  // No profit in free materialization.
242  return false;
243  }
244 
245  // Now check that the uses of this PHI can actually be speculated,
246  // otherwise we'll still have to materialize the PHI value.
247  if (!isSafeToSpeculatePHIUsers(PN, DT, PotentialSpecSet, UnsafeSet)) {
248  LLVM_DEBUG(dbgs() << " Unsafe PHI: " << PN << "\n");
249  return false;
250  }
251 
252  // Compute how much (if any) savings are available by speculating around this
253  // PHI.
254  for (Use &U : PN.uses()) {
255  auto *UserI = cast<Instruction>(U.getUser());
256  // Now check whether there is any savings to folding the incoming constants
257  // into this use.
258  unsigned Idx = U.getOperandNo();
259 
260  // If we have a binary operator that is commutative, an actual constant
261  // operand would end up on the RHS, so pretend the use of the PHI is on the
262  // RHS.
263  //
264  // Technically, this is a bit weird if *both* operands are PHIs we're
265  // speculating. But if that is the case, giving an "optimistic" cost isn't
266  // a bad thing because after speculation it will constant fold. And
267  // moreover, such cases should likely have been constant folded already by
268  // some other pass, so we shouldn't worry about "modeling" them terribly
269  // accurately here. Similarly, if the other operand is a constant, it still
270  // seems fine to be "optimistic" in our cost modeling, because when the
271  // incoming operand from the PHI node is also a constant, we will end up
272  // constant folding.
273  if (UserI->isBinaryOp() && UserI->isCommutative() && Idx != 1)
274  // Assume we will commute the constant to the RHS to be canonical.
275  Idx = 1;
276 
277  // Get the intrinsic ID if this user is an intrinsic.
279  if (auto *UserII = dyn_cast<IntrinsicInst>(UserI))
280  IID = UserII->getIntrinsicID();
281 
282  for (auto &IncomingConstantAndCostsAndCount : CostsAndCounts) {
283  ConstantInt *IncomingC = IncomingConstantAndCostsAndCount.first;
284  InstructionCost MatCost = IncomingConstantAndCostsAndCount.second.MatCost;
285  InstructionCost &FoldedCost =
286  IncomingConstantAndCostsAndCount.second.FoldedCost;
287  if (IID)
288  FoldedCost +=
289  TTI.getIntImmCostIntrin(IID, Idx, IncomingC->getValue(),
290  IncomingC->getType(),
292  else
293  FoldedCost +=
294  TTI.getIntImmCostInst(UserI->getOpcode(), Idx,
295  IncomingC->getValue(), IncomingC->getType(),
297 
298  // If we accumulate more folded cost for this incoming constant than
299  // materialized cost, then we'll regress any edge with this constant so
300  // just bail. We're only interested in cases where folding the incoming
301  // constants is at least break-even on all paths.
302  if (FoldedCost > MatCost) {
303  LLVM_DEBUG(dbgs() << " Not profitable to fold imm: " << *IncomingC
304  << "\n"
305  " Materializing cost: "
306  << MatCost
307  << "\n"
308  " Accumulated folded cost: "
309  << FoldedCost << "\n");
310  return false;
311  }
312  }
313  }
314 
315  // Compute the total cost savings afforded by this PHI node.
316  InstructionCost TotalMatCost = TTI.TCC_Free, TotalFoldedCost = TTI.TCC_Free;
317  for (auto IncomingConstantAndCostsAndCount : CostsAndCounts) {
318  InstructionCost MatCost = IncomingConstantAndCostsAndCount.second.MatCost;
319  InstructionCost FoldedCost =
320  IncomingConstantAndCostsAndCount.second.FoldedCost;
321  int Count = IncomingConstantAndCostsAndCount.second.Count;
322 
323  TotalMatCost += MatCost * Count;
324  TotalFoldedCost += FoldedCost * Count;
325  }
326  assert(TotalMatCost.isValid() && "Constants must be materializable");
327  assert(TotalFoldedCost <= TotalMatCost && "If each constant's folded cost is "
328  "less that its materialized cost, "
329  "the sum must be as well.");
330  LLVM_DEBUG(dbgs() << " Cost savings " << (TotalMatCost - TotalFoldedCost)
331  << ": " << PN << "\n");
332  CostSavingsMap[&PN] = TotalMatCost - TotalFoldedCost;
333  return true;
334 }
335 
336 /// Simple helper to walk all the users of a list of phis depth first, and call
337 /// a visit function on each one in post-order.
338 ///
339 /// All of the PHIs should be in the same basic block, and this is primarily
340 /// used to make a single depth-first walk across their collective users
341 /// without revisiting any subgraphs. Callers should provide a fast, idempotent
342 /// callable to test whether a node has been visited and the more important
343 /// callable to actually visit a particular node.
344 ///
345 /// Depth-first and postorder here refer to the *operand* graph -- we start
346 /// from a collection of users of PHI nodes and walk "up" the operands
347 /// depth-first.
348 template <typename IsVisitedT, typename VisitT>
350  IsVisitedT IsVisited,
351  VisitT Visit) {
353  for (auto *PN : PNs)
354  for (Use &U : PN->uses()) {
355  auto *UI = cast<Instruction>(U.getUser());
356  if (IsVisited(UI))
357  // Already visited this user, continue across the roots.
358  continue;
359 
360  // Otherwise, walk the operand graph depth-first and visit each
361  // dependency in postorder.
362  DFSStack.push_back({UI, UI->value_op_begin()});
363  do {
365  std::tie(UI, OpIt) = DFSStack.pop_back_val();
366  while (OpIt != UI->value_op_end()) {
367  auto *OpI = dyn_cast<Instruction>(*OpIt);
368  // Increment to the next operand for whenever we continue.
369  ++OpIt;
370  // No need to visit non-instructions, which can't form dependencies,
371  // or instructions outside of our potential dependency set that we
372  // were given. Finally, if we've already visited the node, continue
373  // to the next.
374  if (!OpI || IsVisited(OpI))
375  continue;
376 
377  // Push onto the stack and descend. We can directly continue this
378  // loop when ascending.
379  DFSStack.push_back({UI, OpIt});
380  UI = OpI;
381  OpIt = OpI->value_op_begin();
382  }
383 
384  // Finished visiting children, visit this node.
385  assert(!IsVisited(UI) && "Should not have already visited a node!");
386  Visit(UI);
387  } while (!DFSStack.empty());
388  }
389 }
390 
391 /// Find profitable PHIs to speculate.
392 ///
393 /// For a PHI node to be profitable, we need the cost of speculating its users
394 /// (and their dependencies) to not exceed the savings of folding the PHI's
395 /// constant operands into the speculated users.
396 ///
397 /// Computing this is surprisingly challenging. Because users of two different
398 /// PHI nodes can depend on each other or on common other instructions, it may
399 /// be profitable to speculate two PHI nodes together even though neither one
400 /// in isolation is profitable. The straightforward way to find all the
401 /// profitable PHIs would be to check each combination of PHIs' cost, but this
402 /// is exponential in complexity.
403 ///
404 /// Even if we assume that we only care about cases where we can consider each
405 /// PHI node in isolation (rather than considering cases where none are
406 /// profitable in isolation but some subset are profitable as a set), we still
407 /// have a challenge. The obvious way to find all individually profitable PHIs
408 /// is to iterate until reaching a fixed point, but this will be quadratic in
409 /// complexity. =/
410 ///
411 /// This code currently uses a linear-to-compute order for a greedy approach.
412 /// It won't find cases where a set of PHIs must be considered together, but it
413 /// handles most cases of order dependence without quadratic iteration. The
414 /// specific order used is the post-order across the operand DAG. When the last
415 /// user of a PHI is visited in this postorder walk, we check it for
416 /// profitability.
417 ///
418 /// There is an orthogonal extra complexity to all of this: computing the cost
419 /// itself can easily become a linear computation making everything again (at
420 /// best) quadratic. Using a postorder over the operand graph makes it
421 /// particularly easy to avoid this through dynamic programming. As we do the
422 /// postorder walk, we build the transitive cost of that subgraph. It is also
423 /// straightforward to then update these costs when we mark a PHI for
424 /// speculation so that subsequent PHIs don't re-pay the cost of already
425 /// speculated instructions.
428  const SmallDenseMap<PHINode *, InstructionCost, 16> &CostSavingsMap,
429  const SmallPtrSetImpl<Instruction *> &PotentialSpecSet, int NumPreds,
432 
433  // First, establish a reverse mapping from immediate users of the PHI nodes
434  // to the nodes themselves, and count how many users each PHI node has in
435  // a way we can update while processing them.
437  SmallDenseMap<PHINode *, int, 16> PNUserCountMap;
439  for (auto *PN : PNs) {
440  assert(UserSet.empty() && "Must start with an empty user set!");
441  for (Use &U : PN->uses())
442  UserSet.insert(cast<Instruction>(U.getUser()));
443  PNUserCountMap[PN] = UserSet.size();
444  for (auto *UI : UserSet)
445  UserToPNMap.insert({UI, {}}).first->second.push_back(PN);
446  UserSet.clear();
447  }
448 
449  // Now do a DFS across the operand graph of the users, computing cost as we
450  // go and when all costs for a given PHI are known, checking that PHI for
451  // profitability.
454  PNs,
455  /*IsVisited*/
456  [&](Instruction *I) {
457  // We consider anything that isn't potentially speculated to be
458  // "visited" as it is already handled. Similarly, anything that *is*
459  // potentially speculated but for which we have an entry in our cost
460  // map, we're done.
461  return !PotentialSpecSet.count(I) || SpecCostMap.count(I);
462  },
463  /*Visit*/
464  [&](Instruction *I) {
465  // We've fully visited the operands, so sum their cost with this node
466  // and update the cost map.
468  for (Value *OpV : I->operand_values())
469  if (auto *OpI = dyn_cast<Instruction>(OpV)) {
470  auto CostMapIt = SpecCostMap.find(OpI);
471  if (CostMapIt != SpecCostMap.end())
472  Cost += CostMapIt->second;
473  }
475  bool Inserted = SpecCostMap.insert({I, Cost}).second;
476  (void)Inserted;
477  assert(Inserted && "Must not re-insert a cost during the DFS!");
478 
479  // Now check if this node had a corresponding PHI node using it. If so,
480  // we need to decrement the outstanding user count for it.
481  auto UserPNsIt = UserToPNMap.find(I);
482  if (UserPNsIt == UserToPNMap.end())
483  return;
484  auto &UserPNs = UserPNsIt->second;
485  auto UserPNsSplitIt = std::stable_partition(
486  UserPNs.begin(), UserPNs.end(), [&](PHINode *UserPN) {
487  int &PNUserCount = PNUserCountMap.find(UserPN)->second;
488  assert(
489  PNUserCount > 0 &&
490  "Should never re-visit a PN after its user count hits zero!");
491  --PNUserCount;
492  return PNUserCount != 0;
493  });
494 
495  // FIXME: Rather than one at a time, we should sum the savings as the
496  // cost will be completely shared.
497  SmallVector<Instruction *, 16> SpecWorklist;
498  for (auto *PN : llvm::make_range(UserPNsSplitIt, UserPNs.end())) {
499  InstructionCost SpecCost = TTI.TCC_Free;
500  for (Use &U : PN->uses())
501  SpecCost +=
502  SpecCostMap.find(cast<Instruction>(U.getUser()))->second;
503  SpecCost *= (NumPreds - 1);
504  // When the user count of a PHI node hits zero, we should check its
505  // profitability. If profitable, we should mark it for speculation
506  // and zero out the cost of everything it depends on.
507  InstructionCost CostSavings = CostSavingsMap.find(PN)->second;
508  if (SpecCost > CostSavings) {
509  LLVM_DEBUG(dbgs() << " Not profitable, speculation cost: " << *PN
510  << "\n"
511  " Cost savings: "
512  << CostSavings
513  << "\n"
514  " Speculation cost: "
515  << SpecCost << "\n");
516  continue;
517  }
518 
519  // We're going to speculate this user-associated PHI. Copy it out and
520  // add its users to the worklist to update their cost.
521  SpecPNs.push_back(PN);
522  for (Use &U : PN->uses()) {
523  auto *UI = cast<Instruction>(U.getUser());
524  auto CostMapIt = SpecCostMap.find(UI);
525  if (CostMapIt->second == 0)
526  continue;
527  // Zero out this cost entry to avoid duplicates.
528  CostMapIt->second = 0;
529  SpecWorklist.push_back(UI);
530  }
531  }
532 
533  // Now walk all the operands of the users in the worklist transitively
534  // to zero out all the memoized costs.
535  while (!SpecWorklist.empty()) {
536  Instruction *SpecI = SpecWorklist.pop_back_val();
537  assert(SpecCostMap.find(SpecI)->second == 0 &&
538  "Didn't zero out a cost!");
539 
540  // Walk the operands recursively to zero out their cost as well.
541  for (auto *OpV : SpecI->operand_values()) {
542  auto *OpI = dyn_cast<Instruction>(OpV);
543  if (!OpI)
544  continue;
545  auto CostMapIt = SpecCostMap.find(OpI);
546  if (CostMapIt == SpecCostMap.end() || CostMapIt->second == 0)
547  continue;
548  CostMapIt->second = 0;
549  SpecWorklist.push_back(OpI);
550  }
551  }
552  });
553 
554  return SpecPNs;
555 }
556 
557 /// Speculate users around a set of PHI nodes.
558 ///
559 /// This routine does the actual speculation around a set of PHI nodes where we
560 /// have determined this to be both safe and profitable.
561 ///
562 /// This routine handles any spliting of critical edges necessary to create
563 /// a safe block to speculate into as well as cloning the instructions and
564 /// rewriting all uses.
565 static void speculatePHIs(ArrayRef<PHINode *> SpecPNs,
566  SmallPtrSetImpl<Instruction *> &PotentialSpecSet,
568  DominatorTree &DT) {
569  LLVM_DEBUG(dbgs() << " Speculating around " << SpecPNs.size() << " PHIs!\n");
570  NumPHIsSpeculated += SpecPNs.size();
571 
572  // Split any critical edges so that we have a block to hoist into.
573  auto *ParentBB = SpecPNs[0]->getParent();
575  SpecPreds.reserve(PredSet.size());
576  for (auto *PredBB : PredSet) {
577  auto *NewPredBB = SplitCriticalEdge(
578  PredBB, ParentBB,
579  CriticalEdgeSplittingOptions(&DT).setMergeIdenticalEdges());
580  if (NewPredBB) {
581  ++NumEdgesSplit;
582  LLVM_DEBUG(dbgs() << " Split critical edge from: " << PredBB->getName()
583  << "\n");
584  SpecPreds.push_back(NewPredBB);
585  } else {
586  assert(PredBB->getSingleSuccessor() == ParentBB &&
587  "We need a non-critical predecessor to speculate into.");
588  assert(!isa<InvokeInst>(PredBB->getTerminator()) &&
589  "Cannot have a non-critical invoke!");
590 
591  // Already non-critical, use existing pred.
592  SpecPreds.push_back(PredBB);
593  }
594  }
595 
599  /*IsVisited*/
600  [&](Instruction *I) {
601  // This is visited if we don't need to
602  // speculate it or we already have
603  // speculated it.
604  return !PotentialSpecSet.count(I) ||
605  SpecSet.count(I);
606  },
607  /*Visit*/
608  [&](Instruction *I) {
609  // All operands scheduled, schedule this
610  // node.
611  SpecSet.insert(I);
612  SpecList.push_back(I);
613  });
614 
615  int NumSpecInsts = SpecList.size() * SpecPreds.size();
616  int NumRedundantInsts = NumSpecInsts - SpecList.size();
617  LLVM_DEBUG(dbgs() << " Inserting " << NumSpecInsts
618  << " speculated instructions, " << NumRedundantInsts
619  << " redundancies\n");
620  NumSpeculatedInstructions += NumSpecInsts;
621  NumNewRedundantInstructions += NumRedundantInsts;
622 
623  // Each predecessor is numbered by its index in `SpecPreds`, so for each
624  // instruction we speculate, the speculated instruction is stored in that
625  // index of the vector associated with the original instruction. We also
626  // store the incoming values for each predecessor from any PHIs used.
628 
629  // Inject the synthetic mappings to rewrite PHIs to the appropriate incoming
630  // value. This handles both the PHIs we are speculating around and any other
631  // PHIs that happen to be used.
632  for (auto *OrigI : SpecList)
633  for (auto *OpV : OrigI->operand_values()) {
634  auto *OpPN = dyn_cast<PHINode>(OpV);
635  if (!OpPN || OpPN->getParent() != ParentBB)
636  continue;
637 
638  auto InsertResult = SpeculatedValueMap.insert({OpPN, {}});
639  if (!InsertResult.second)
640  continue;
641 
642  auto &SpeculatedVals = InsertResult.first->second;
643 
644  // Populating our structure for mapping is particularly annoying because
645  // finding an incoming value for a particular predecessor block in a PHI
646  // node is a linear time operation! To avoid quadratic behavior, we build
647  // a map for this PHI node's incoming values and then translate it into
648  // the more compact representation used below.
650  for (int i : llvm::seq<int>(0, OpPN->getNumIncomingValues()))
651  IncomingValueMap[OpPN->getIncomingBlock(i)] = OpPN->getIncomingValue(i);
652 
653  for (auto *PredBB : SpecPreds)
654  SpeculatedVals.push_back(IncomingValueMap.find(PredBB)->second);
655  }
656 
657  // Speculate into each predecessor.
658  for (int PredIdx : llvm::seq<int>(0, SpecPreds.size())) {
659  auto *PredBB = SpecPreds[PredIdx];
660  assert(PredBB->getSingleSuccessor() == ParentBB &&
661  "We need a non-critical predecessor to speculate into.");
662 
663  for (auto *OrigI : SpecList) {
664  auto *NewI = OrigI->clone();
665  NewI->setName(Twine(OrigI->getName()) + "." + Twine(PredIdx));
666  NewI->insertBefore(PredBB->getTerminator());
667 
668  // Rewrite all the operands to the previously speculated instructions.
669  // Because we're walking in-order, the defs must precede the uses and we
670  // should already have these mappings.
671  for (Use &U : NewI->operands()) {
672  auto *OpI = dyn_cast<Instruction>(U.get());
673  if (!OpI)
674  continue;
675  auto MapIt = SpeculatedValueMap.find(OpI);
676  if (MapIt == SpeculatedValueMap.end())
677  continue;
678  const auto &SpeculatedVals = MapIt->second;
679  assert(SpeculatedVals[PredIdx] &&
680  "Must have a speculated value for this predecessor!");
681  assert(SpeculatedVals[PredIdx]->getType() == OpI->getType() &&
682  "Speculated value has the wrong type!");
683 
684  // Rewrite the use to this predecessor's speculated instruction.
685  U.set(SpeculatedVals[PredIdx]);
686  }
687 
688  // Commute instructions which now have a constant in the LHS but not the
689  // RHS.
690  if (NewI->isBinaryOp() && NewI->isCommutative() &&
691  isa<Constant>(NewI->getOperand(0)) &&
692  !isa<Constant>(NewI->getOperand(1)))
693  NewI->getOperandUse(0).swap(NewI->getOperandUse(1));
694 
695  SpeculatedValueMap[OrigI].push_back(NewI);
696  assert(SpeculatedValueMap[OrigI][PredIdx] == NewI &&
697  "Mismatched speculated instruction index!");
698  }
699  }
700 
701  // Walk the speculated instruction list and if they have uses, insert a PHI
702  // for them from the speculated versions, and replace the uses with the PHI.
703  // Then erase the instructions as they have been fully speculated. The walk
704  // needs to be in reverse so that we don't think there are users when we'll
705  // actually eventually remove them later.
706  IRBuilder<> IRB(SpecPNs[0]);
707  for (auto *OrigI : llvm::reverse(SpecList)) {
708  // Check if we need a PHI for any remaining users and if so, insert it.
709  if (!OrigI->use_empty()) {
710  auto *SpecIPN = IRB.CreatePHI(OrigI->getType(), SpecPreds.size(),
711  Twine(OrigI->getName()) + ".phi");
712  // Add the incoming values we speculated.
713  auto &SpeculatedVals = SpeculatedValueMap.find(OrigI)->second;
714  for (int PredIdx : llvm::seq<int>(0, SpecPreds.size()))
715  SpecIPN->addIncoming(SpeculatedVals[PredIdx], SpecPreds[PredIdx]);
716 
717  // And replace the uses with the PHI node.
718  OrigI->replaceAllUsesWith(SpecIPN);
719  }
720 
721  // It is important to immediately erase this so that it stops using other
722  // instructions. This avoids inserting needless PHIs of them.
723  OrigI->eraseFromParent();
724  }
725 
726  // All of the uses of the speculated phi nodes should be removed at this
727  // point, so erase them.
728  for (auto *SpecPN : SpecPNs) {
729  assert(SpecPN->use_empty() && "All users should have been speculated!");
730  SpecPN->eraseFromParent();
731  }
732 }
733 
734 /// Try to speculate around a series of PHIs from a single basic block.
735 ///
736 /// This routine checks whether any of these PHIs are profitable to speculate
737 /// users around. If safe and profitable, it does the speculation. It returns
738 /// true when at least some speculation occurs.
741  LLVM_DEBUG(dbgs() << "Evaluating phi nodes for speculation:\n");
742 
743  // Savings in cost from speculating around a PHI node.
745 
746  // Remember the set of instructions that are candidates for speculation so
747  // that we can quickly walk things within that space. This prunes out
748  // instructions already available along edges, etc.
749  SmallPtrSet<Instruction *, 16> PotentialSpecSet;
750 
751  // Remember the set of instructions that are (transitively) unsafe to
752  // speculate into the incoming edges of this basic block. This avoids
753  // recomputing them for each PHI node we check. This set is specific to this
754  // block though as things are pruned out of it based on what is available
755  // along incoming edges.
757 
758  // For each PHI node in this block, check whether there are immediate folding
759  // opportunities from speculation, and whether that speculation will be
760  // valid. This determise the set of safe PHIs to speculate.
761  llvm::erase_if(PNs, [&](PHINode *PN) {
763  *PN, CostSavingsMap, PotentialSpecSet, UnsafeSet, DT, TTI);
764  });
765  // If no PHIs were profitable, skip.
766  if (PNs.empty()) {
767  LLVM_DEBUG(dbgs() << " No safe and profitable PHIs found!\n");
768  return false;
769  }
770 
771  // We need to know how much speculation will cost which is determined by how
772  // many incoming edges will need a copy of each speculated instruction.
774  for (auto *PredBB : PNs[0]->blocks()) {
775  if (!PredSet.insert(PredBB))
776  continue;
777 
778  // We cannot speculate when a predecessor is an indirect branch.
779  // FIXME: We also can't reliably create a non-critical edge block for
780  // speculation if the predecessor is an invoke. This doesn't seem
781  // fundamental and we should probably be splitting critical edges
782  // differently.
783  const auto *TermInst = PredBB->getTerminator();
784  if (isa<IndirectBrInst>(TermInst) ||
785  isa<InvokeInst>(TermInst) ||
786  isa<CallBrInst>(TermInst)) {
787  LLVM_DEBUG(dbgs() << " Invalid: predecessor terminator: "
788  << PredBB->getName() << "\n");
789  return false;
790  }
791  }
792  if (PredSet.size() < 2) {
793  LLVM_DEBUG(dbgs() << " Unimportant: phi with only one predecessor\n");
794  return false;
795  }
796 
798  PNs, CostSavingsMap, PotentialSpecSet, PredSet.size(), DT, TTI);
799  if (SpecPNs.empty())
800  // Nothing to do.
801  return false;
802 
803  speculatePHIs(SpecPNs, PotentialSpecSet, PredSet, DT);
804  return true;
805 }
806 
809  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
810  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
811 
812  bool Changed = false;
813  for (auto *BB : ReversePostOrderTraversal<Function *>(&F)) {
815  auto BBI = BB->begin();
816  while (auto *PN = dyn_cast<PHINode>(&*BBI)) {
817  PNs.push_back(PN);
818  ++BBI;
819  }
820 
821  if (PNs.empty())
822  continue;
823 
824  Changed |= tryToSpeculatePHIs(PNs, DT, TTI);
825  }
826 
827  if (!Changed)
828  return PreservedAnalyses::all();
829 
831  return PA;
832 }
i
i
Definition: README.txt:29
llvm::InstructionCost
Definition: InstructionCost.h:26
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2320
llvm
Definition: AllocatorList.h:23
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::ConstantInt::getType
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
Definition: Constants.h:171
IntrinsicInst.h
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:769
llvm::Function
Definition: Function.h:61
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::size
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:131
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1167
Statistic.h
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:167
llvm::IRBuilder<>
llvm::erase_if
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:1656
llvm::SmallDenseMap
Definition: DenseMap.h:880
ValueTracking.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
findProfitablePHIs
static SmallVector< PHINode *, 16 > findProfitablePHIs(ArrayRef< PHINode * > PNs, const SmallDenseMap< PHINode *, InstructionCost, 16 > &CostSavingsMap, const SmallPtrSetImpl< Instruction * > &PotentialSpecSet, int NumPreds, DominatorTree &DT, TargetTransformInfo &TTI)
Find profitable PHIs to speculate.
Definition: SpeculateAroundPHIs.cpp:426
llvm::SplitCriticalEdge
BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
Definition: BreakCriticalEdges.cpp:103
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:329
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
llvm::SmallPtrSet< Instruction *, 4 >
llvm::TargetTransformInfo::getIntImmCostIntrin
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Definition: TargetTransformInfo.cpp:566
llvm::Intrinsic::not_intrinsic
@ not_intrinsic
Definition: Intrinsics.h:45
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:634
llvm::TargetTransformInfo::getIntImmCostInst
InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind, Instruction *Inst=nullptr) const
Return the expected cost of materialization for the given integer immediate of the specified type for...
Definition: TargetTransformInfo.cpp:556
Sequence.h
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
llvm::DominatorTree::dominates
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:115
llvm::User::value_op_iterator
Iterator for directly iterating over the operand Values.
Definition: User.h:250
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:77
llvm::Intrinsic::getType
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
Definition: Function.cpp:1285
llvm::PHINode::getIncomingValue
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
Definition: Instructions.h:2696
visitPHIUsersAndDepsInPostOrder
static void visitPHIUsersAndDepsInPostOrder(ArrayRef< PHINode * > PNs, IsVisitedT IsVisited, VisitT Visit)
Simple helper to walk all the users of a list of phis depth first, and call a visit function on each ...
Definition: SpeculateAroundPHIs.cpp:349
llvm::TargetTransformInfo::getIntImmCost
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Return the expected cost of materializing for the given integer immediate of the specified type.
Definition: TargetTransformInfo.cpp:549
llvm::CriticalEdgeSplittingOptions
Option class for critical edge splitting.
Definition: BasicBlockUtils.h:136
llvm::Value::uses
iterator_range< use_iterator > uses()
Definition: Value.h:377
llvm::Instruction
Definition: Instruction.h:45
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::PHINode::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition: Instructions.h:2692
llvm::mayBeMemoryDependent
bool mayBeMemoryDependent(const Instruction &I)
Returns true if the result or effects of the given instructions I depend on or influence global memor...
Definition: ValueTracking.cpp:4626
BasicBlock.h
IncomingValueMap
DenseMap< BasicBlock *, Value * > IncomingValueMap
Definition: Local.cpp:874
llvm::DenseMap
Definition: DenseMap.h:714
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::SpeculateAroundPHIsPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
Definition: SpeculateAroundPHIs.cpp:807
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::TargetTransformInfo::TCC_Free
@ TCC_Free
Expected to fold away in lowering.
Definition: TargetTransformInfo.h:261
llvm::IRBuilderBase::CreatePHI
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2344
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
speculatePHIs
static void speculatePHIs(ArrayRef< PHINode * > SpecPNs, SmallPtrSetImpl< Instruction * > &PotentialSpecSet, SmallSetVector< BasicBlock *, 16 > &PredSet, DominatorTree &DT)
Speculate users around a set of PHI nodes.
Definition: SpeculateAroundPHIs.cpp:565
llvm::TargetTransformInfo::TCK_SizeAndLatency
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Definition: TargetTransformInfo.h:214
llvm::TargetTransformInfo::getUserCost
InstructionCost getUserCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
Definition: TargetTransformInfo.cpp:223
SpeculateAroundPHIs.h
llvm::InstructionCost::isValid
bool isValid() const
Definition: InstructionCost.h:60
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::SmallPtrSetImplBase::size
size_type size() const
Definition: SmallPtrSet.h:92
isSafeToSpeculatePHIUsers
static bool isSafeToSpeculatePHIUsers(PHINode &PN, DominatorTree &DT, SmallPtrSetImpl< Instruction * > &PotentialSpecSet, SmallPtrSetImpl< Instruction * > &UnsafeSet)
Check whether speculating the users of a PHI node around the PHI will be safe.
Definition: SpeculateAroundPHIs.cpp:49
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:83
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::User::operand_values
iterator_range< value_op_iterator > operand_values()
Definition: User.h:266
llvm::ReversePostOrderTraversal
Definition: PostOrderIterator.h:290
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:252
Instructions.h
PostOrderIterator.h
llvm::SmallPtrSetImplBase::empty
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:91
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::PHINode::getIncomingBlock
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Definition: Instructions.h:2716
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
TargetTransformInfo.h
llvm::PHINode
Definition: Instructions.h:2600
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
llvm::SmallPtrSetImpl< Instruction * >
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition: SmallVector.h:623
BasicBlockUtils.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
isSafeAndProfitableToSpeculateAroundPHI
static bool isSafeAndProfitableToSpeculateAroundPHI(PHINode &PN, SmallDenseMap< PHINode *, InstructionCost, 16 > &CostSavingsMap, SmallPtrSetImpl< Instruction * > &PotentialSpecSet, SmallPtrSetImpl< Instruction * > &UnsafeSet, DominatorTree &DT, TargetTransformInfo &TTI)
Check whether, in isolation, a given PHI node is both safe and profitable to speculate users around.
Definition: SpeculateAroundPHIs.cpp:202
Debug.h
SetVector.h
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
tryToSpeculatePHIs
static bool tryToSpeculatePHIs(SmallVectorImpl< PHINode * > &PNs, DominatorTree &DT, TargetTransformInfo &TTI)
Try to speculate around a series of PHIs from a single basic block.
Definition: SpeculateAroundPHIs.cpp:739