LLVM  9.0.0svn
SpeculateAroundPHIs.cpp
Go to the documentation of this file.
1 //===- SpeculateAroundPHIs.cpp --------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
11 #include "llvm/ADT/Sequence.h"
12 #include "llvm/ADT/SetVector.h"
13 #include "llvm/ADT/Statistic.h"
16 #include "llvm/IR/BasicBlock.h"
17 #include "llvm/IR/IRBuilder.h"
18 #include "llvm/IR/Instructions.h"
19 #include "llvm/IR/IntrinsicInst.h"
20 #include "llvm/Support/Debug.h"
22 
23 using namespace llvm;
24 
25 #define DEBUG_TYPE "spec-phis"
26 
27 STATISTIC(NumPHIsSpeculated, "Number of PHI nodes we speculated around");
28 STATISTIC(NumEdgesSplit,
29  "Number of critical edges which were split for speculation");
30 STATISTIC(NumSpeculatedInstructions,
31  "Number of instructions we speculated around the PHI nodes");
32 STATISTIC(NumNewRedundantInstructions,
33  "Number of new, redundant instructions inserted");
34 
35 /// Check whether speculating the users of a PHI node around the PHI
36 /// will be safe.
37 ///
38 /// This checks both that all of the users are safe and also that all of their
39 /// operands are either recursively safe or already available along an incoming
40 /// edge to the PHI.
41 ///
42 /// This routine caches both all the safe nodes explored in `PotentialSpecSet`
43 /// and the chain of nodes that definitively reach any unsafe node in
44 /// `UnsafeSet`. By preserving these between repeated calls to this routine for
45 /// PHIs in the same basic block, the exploration here can be reused. However,
46 /// these caches must no be reused for PHIs in a different basic block as they
47 /// reflect what is available along incoming edges.
48 static bool
50  SmallPtrSetImpl<Instruction *> &PotentialSpecSet,
51  SmallPtrSetImpl<Instruction *> &UnsafeSet) {
52  auto *PhiBB = PN.getParent();
55 
56  // Walk each user of the PHI node.
57  for (Use &U : PN.uses()) {
58  auto *UI = cast<Instruction>(U.getUser());
59 
60  // Ensure the use post-dominates the PHI node. This ensures that, in the
61  // absence of unwinding, the use will actually be reached.
62  // FIXME: We use a blunt hammer of requiring them to be in the same basic
63  // block. We should consider using actual post-dominance here in the
64  // future.
65  if (UI->getParent() != PhiBB) {
66  LLVM_DEBUG(dbgs() << " Unsafe: use in a different BB: " << *UI << "\n");
67  return false;
68  }
69 
70  if (auto CS = ImmutableCallSite(UI)) {
71  if (CS.isConvergent() || CS.cannotDuplicate()) {
72  LLVM_DEBUG(dbgs() << " Unsafe: convergent "
73  "callsite cannot de duplicated: " << *UI << '\n');
74  return false;
75  }
76  }
77 
78  // FIXME: This check is much too conservative. We're not going to move these
79  // instructions onto new dynamic paths through the program unless there is
80  // a call instruction between the use and the PHI node. And memory isn't
81  // changing unless there is a store in that same sequence. We should
82  // probably change this to do at least a limited scan of the intervening
83  // instructions and allow handling stores in easily proven safe cases.
84  if (mayBeMemoryDependent(*UI)) {
85  LLVM_DEBUG(dbgs() << " Unsafe: can't speculate use: " << *UI << "\n");
86  return false;
87  }
88 
89  // Now do a depth-first search of everything these users depend on to make
90  // sure they are transitively safe. This is a depth-first search, but we
91  // check nodes in preorder to minimize the amount of checking.
92  Visited.insert(UI);
93  DFSStack.push_back({UI, UI->value_op_begin()});
94  do {
96  std::tie(UI, OpIt) = DFSStack.pop_back_val();
97 
98  while (OpIt != UI->value_op_end()) {
99  auto *OpI = dyn_cast<Instruction>(*OpIt);
100  // Increment to the next operand for whenever we continue.
101  ++OpIt;
102  // No need to visit non-instructions, which can't form dependencies.
103  if (!OpI)
104  continue;
105 
106  // Now do the main pre-order checks that this operand is a viable
107  // dependency of something we want to speculate.
108 
109  // First do a few checks for instructions that won't require
110  // speculation at all because they are trivially available on the
111  // incoming edge (either through dominance or through an incoming value
112  // to a PHI).
113  //
114  // The cases in the current block will be trivially dominated by the
115  // edge.
116  auto *ParentBB = OpI->getParent();
117  if (ParentBB == PhiBB) {
118  if (isa<PHINode>(OpI)) {
119  // We can trivially map through phi nodes in the same block.
120  continue;
121  }
122  } else if (DT.dominates(ParentBB, PhiBB)) {
123  // Instructions from dominating blocks are already available.
124  continue;
125  }
126 
127  // Once we know that we're considering speculating the operand, check
128  // if we've already explored this subgraph and found it to be safe.
129  if (PotentialSpecSet.count(OpI))
130  continue;
131 
132  // If we've already explored this subgraph and found it unsafe, bail.
133  // If when we directly test whether this is safe it fails, bail.
134  if (UnsafeSet.count(OpI) || ParentBB != PhiBB ||
135  mayBeMemoryDependent(*OpI)) {
136  LLVM_DEBUG(dbgs() << " Unsafe: can't speculate transitive use: "
137  << *OpI << "\n");
138  // Record the stack of instructions which reach this node as unsafe
139  // so we prune subsequent searches.
140  UnsafeSet.insert(OpI);
141  for (auto &StackPair : DFSStack) {
142  Instruction *I = StackPair.first;
143  UnsafeSet.insert(I);
144  }
145  return false;
146  }
147 
148  // Skip any operands we're already recursively checking.
149  if (!Visited.insert(OpI).second)
150  continue;
151 
152  // Push onto the stack and descend. We can directly continue this
153  // loop when ascending.
154  DFSStack.push_back({UI, OpIt});
155  UI = OpI;
156  OpIt = OpI->value_op_begin();
157  }
158 
159  // This node and all its operands are safe. Go ahead and cache that for
160  // reuse later.
161  PotentialSpecSet.insert(UI);
162 
163  // Continue with the next node on the stack.
164  } while (!DFSStack.empty());
165  }
166 
167 #ifndef NDEBUG
168  // Every visited operand should have been marked as safe for speculation at
169  // this point. Verify this and return success.
170  for (auto *I : Visited)
171  assert(PotentialSpecSet.count(I) &&
172  "Failed to mark a visited instruction as safe!");
173 #endif
174  return true;
175 }
176 
177 /// Check whether, in isolation, a given PHI node is both safe and profitable
178 /// to speculate users around.
179 ///
180 /// This handles checking whether there are any constant operands to a PHI
181 /// which could represent a useful speculation candidate, whether the users of
182 /// the PHI are safe to speculate including all their transitive dependencies,
183 /// and whether after speculation there will be some cost savings (profit) to
184 /// folding the operands into the users of the PHI node. Returns true if both
185 /// safe and profitable with relevant cost savings updated in the map and with
186 /// an update to the `PotentialSpecSet`. Returns false if either safety or
187 /// profitability are absent. Some new entries may be made to the
188 /// `PotentialSpecSet` even when this routine returns false, but they remain
189 /// conservatively correct.
190 ///
191 /// The profitability check here is a local one, but it checks this in an
192 /// interesting way. Beyond checking that the total cost of materializing the
193 /// constants will be less than the cost of folding them into their users, it
194 /// also checks that no one incoming constant will have a higher cost when
195 /// folded into its users rather than materialized. This higher cost could
196 /// result in a dynamic *path* that is more expensive even when the total cost
197 /// is lower. Currently, all of the interesting cases where this optimization
198 /// should fire are ones where it is a no-loss operation in this sense. If we
199 /// ever want to be more aggressive here, we would need to balance the
200 /// different incoming edges' cost by looking at their respective
201 /// probabilities.
203  PHINode &PN, SmallDenseMap<PHINode *, int, 16> &CostSavingsMap,
204  SmallPtrSetImpl<Instruction *> &PotentialSpecSet,
206  TargetTransformInfo &TTI) {
207  // First see whether there is any cost savings to speculating around this
208  // PHI, and build up a map of the constant inputs to how many times they
209  // occur.
210  bool NonFreeMat = false;
211  struct CostsAndCount {
212  int MatCost = TargetTransformInfo::TCC_Free;
213  int FoldedCost = TargetTransformInfo::TCC_Free;
214  int Count = 0;
215  };
217  SmallPtrSet<BasicBlock *, 16> IncomingConstantBlocks;
218  for (int i : llvm::seq<int>(0, PN.getNumIncomingValues())) {
219  auto *IncomingC = dyn_cast<ConstantInt>(PN.getIncomingValue(i));
220  if (!IncomingC)
221  continue;
222 
223  // Only visit each incoming edge with a constant input once.
224  if (!IncomingConstantBlocks.insert(PN.getIncomingBlock(i)).second)
225  continue;
226 
227  auto InsertResult = CostsAndCounts.insert({IncomingC, {}});
228  // Count how many edges share a given incoming costant.
229  ++InsertResult.first->second.Count;
230  // Only compute the cost the first time we see a particular constant.
231  if (!InsertResult.second)
232  continue;
233 
234  int &MatCost = InsertResult.first->second.MatCost;
235  MatCost = TTI.getIntImmCost(IncomingC->getValue(), IncomingC->getType());
236  NonFreeMat |= MatCost != TTI.TCC_Free;
237  }
238  if (!NonFreeMat) {
239  LLVM_DEBUG(dbgs() << " Free: " << PN << "\n");
240  // No profit in free materialization.
241  return false;
242  }
243 
244  // Now check that the uses of this PHI can actually be speculated,
245  // otherwise we'll still have to materialize the PHI value.
246  if (!isSafeToSpeculatePHIUsers(PN, DT, PotentialSpecSet, UnsafeSet)) {
247  LLVM_DEBUG(dbgs() << " Unsafe PHI: " << PN << "\n");
248  return false;
249  }
250 
251  // Compute how much (if any) savings are available by speculating around this
252  // PHI.
253  for (Use &U : PN.uses()) {
254  auto *UserI = cast<Instruction>(U.getUser());
255  // Now check whether there is any savings to folding the incoming constants
256  // into this use.
257  unsigned Idx = U.getOperandNo();
258 
259  // If we have a binary operator that is commutative, an actual constant
260  // operand would end up on the RHS, so pretend the use of the PHI is on the
261  // RHS.
262  //
263  // Technically, this is a bit weird if *both* operands are PHIs we're
264  // speculating. But if that is the case, giving an "optimistic" cost isn't
265  // a bad thing because after speculation it will constant fold. And
266  // moreover, such cases should likely have been constant folded already by
267  // some other pass, so we shouldn't worry about "modeling" them terribly
268  // accurately here. Similarly, if the other operand is a constant, it still
269  // seems fine to be "optimistic" in our cost modeling, because when the
270  // incoming operand from the PHI node is also a constant, we will end up
271  // constant folding.
272  if (UserI->isBinaryOp() && UserI->isCommutative() && Idx != 1)
273  // Assume we will commute the constant to the RHS to be canonical.
274  Idx = 1;
275 
276  // Get the intrinsic ID if this user is an intrinsic.
278  if (auto *UserII = dyn_cast<IntrinsicInst>(UserI))
279  IID = UserII->getIntrinsicID();
280 
281  for (auto &IncomingConstantAndCostsAndCount : CostsAndCounts) {
282  ConstantInt *IncomingC = IncomingConstantAndCostsAndCount.first;
283  int MatCost = IncomingConstantAndCostsAndCount.second.MatCost;
284  int &FoldedCost = IncomingConstantAndCostsAndCount.second.FoldedCost;
285  if (IID)
286  FoldedCost += TTI.getIntImmCost(IID, Idx, IncomingC->getValue(),
287  IncomingC->getType());
288  else
289  FoldedCost +=
290  TTI.getIntImmCost(UserI->getOpcode(), Idx, IncomingC->getValue(),
291  IncomingC->getType());
292 
293  // If we accumulate more folded cost for this incoming constant than
294  // materialized cost, then we'll regress any edge with this constant so
295  // just bail. We're only interested in cases where folding the incoming
296  // constants is at least break-even on all paths.
297  if (FoldedCost > MatCost) {
298  LLVM_DEBUG(dbgs() << " Not profitable to fold imm: " << *IncomingC
299  << "\n"
300  " Materializing cost: "
301  << MatCost
302  << "\n"
303  " Accumulated folded cost: "
304  << FoldedCost << "\n");
305  return false;
306  }
307  }
308  }
309 
310  // Compute the total cost savings afforded by this PHI node.
311  int TotalMatCost = TTI.TCC_Free, TotalFoldedCost = TTI.TCC_Free;
312  for (auto IncomingConstantAndCostsAndCount : CostsAndCounts) {
313  int MatCost = IncomingConstantAndCostsAndCount.second.MatCost;
314  int FoldedCost = IncomingConstantAndCostsAndCount.second.FoldedCost;
315  int Count = IncomingConstantAndCostsAndCount.second.Count;
316 
317  TotalMatCost += MatCost * Count;
318  TotalFoldedCost += FoldedCost * Count;
319  }
320  assert(TotalFoldedCost <= TotalMatCost && "If each constant's folded cost is "
321  "less that its materialized cost, "
322  "the sum must be as well.");
323 
324  LLVM_DEBUG(dbgs() << " Cost savings " << (TotalMatCost - TotalFoldedCost)
325  << ": " << PN << "\n");
326  CostSavingsMap[&PN] = TotalMatCost - TotalFoldedCost;
327  return true;
328 }
329 
330 /// Simple helper to walk all the users of a list of phis depth first, and call
331 /// a visit function on each one in post-order.
332 ///
333 /// All of the PHIs should be in the same basic block, and this is primarily
334 /// used to make a single depth-first walk across their collective users
335 /// without revisiting any subgraphs. Callers should provide a fast, idempotent
336 /// callable to test whether a node has been visited and the more important
337 /// callable to actually visit a particular node.
338 ///
339 /// Depth-first and postorder here refer to the *operand* graph -- we start
340 /// from a collection of users of PHI nodes and walk "up" the operands
341 /// depth-first.
342 template <typename IsVisitedT, typename VisitT>
344  IsVisitedT IsVisited,
345  VisitT Visit) {
347  for (auto *PN : PNs)
348  for (Use &U : PN->uses()) {
349  auto *UI = cast<Instruction>(U.getUser());
350  if (IsVisited(UI))
351  // Already visited this user, continue across the roots.
352  continue;
353 
354  // Otherwise, walk the operand graph depth-first and visit each
355  // dependency in postorder.
356  DFSStack.push_back({UI, UI->value_op_begin()});
357  do {
359  std::tie(UI, OpIt) = DFSStack.pop_back_val();
360  while (OpIt != UI->value_op_end()) {
361  auto *OpI = dyn_cast<Instruction>(*OpIt);
362  // Increment to the next operand for whenever we continue.
363  ++OpIt;
364  // No need to visit non-instructions, which can't form dependencies,
365  // or instructions outside of our potential dependency set that we
366  // were given. Finally, if we've already visited the node, continue
367  // to the next.
368  if (!OpI || IsVisited(OpI))
369  continue;
370 
371  // Push onto the stack and descend. We can directly continue this
372  // loop when ascending.
373  DFSStack.push_back({UI, OpIt});
374  UI = OpI;
375  OpIt = OpI->value_op_begin();
376  }
377 
378  // Finished visiting children, visit this node.
379  assert(!IsVisited(UI) && "Should not have already visited a node!");
380  Visit(UI);
381  } while (!DFSStack.empty());
382  }
383 }
384 
385 /// Find profitable PHIs to speculate.
386 ///
387 /// For a PHI node to be profitable, we need the cost of speculating its users
388 /// (and their dependencies) to not exceed the savings of folding the PHI's
389 /// constant operands into the speculated users.
390 ///
391 /// Computing this is surprisingly challenging. Because users of two different
392 /// PHI nodes can depend on each other or on common other instructions, it may
393 /// be profitable to speculate two PHI nodes together even though neither one
394 /// in isolation is profitable. The straightforward way to find all the
395 /// profitable PHIs would be to check each combination of PHIs' cost, but this
396 /// is exponential in complexity.
397 ///
398 /// Even if we assume that we only care about cases where we can consider each
399 /// PHI node in isolation (rather than considering cases where none are
400 /// profitable in isolation but some subset are profitable as a set), we still
401 /// have a challenge. The obvious way to find all individually profitable PHIs
402 /// is to iterate until reaching a fixed point, but this will be quadratic in
403 /// complexity. =/
404 ///
405 /// This code currently uses a linear-to-compute order for a greedy approach.
406 /// It won't find cases where a set of PHIs must be considered together, but it
407 /// handles most cases of order dependence without quadratic iteration. The
408 /// specific order used is the post-order across the operand DAG. When the last
409 /// user of a PHI is visited in this postorder walk, we check it for
410 /// profitability.
411 ///
412 /// There is an orthogonal extra complexity to all of this: computing the cost
413 /// itself can easily become a linear computation making everything again (at
414 /// best) quadratic. Using a postorder over the operand graph makes it
415 /// particularly easy to avoid this through dynamic programming. As we do the
416 /// postorder walk, we build the transitive cost of that subgraph. It is also
417 /// straightforward to then update these costs when we mark a PHI for
418 /// speculation so that subsequent PHIs don't re-pay the cost of already
419 /// speculated instructions.
422  const SmallDenseMap<PHINode *, int, 16> &CostSavingsMap,
423  const SmallPtrSetImpl<Instruction *> &PotentialSpecSet,
424  int NumPreds, DominatorTree &DT, TargetTransformInfo &TTI) {
426 
427  // First, establish a reverse mapping from immediate users of the PHI nodes
428  // to the nodes themselves, and count how many users each PHI node has in
429  // a way we can update while processing them.
431  SmallDenseMap<PHINode *, int, 16> PNUserCountMap;
433  for (auto *PN : PNs) {
434  assert(UserSet.empty() && "Must start with an empty user set!");
435  for (Use &U : PN->uses())
436  UserSet.insert(cast<Instruction>(U.getUser()));
437  PNUserCountMap[PN] = UserSet.size();
438  for (auto *UI : UserSet)
439  UserToPNMap.insert({UI, {}}).first->second.push_back(PN);
440  UserSet.clear();
441  }
442 
443  // Now do a DFS across the operand graph of the users, computing cost as we
444  // go and when all costs for a given PHI are known, checking that PHI for
445  // profitability.
448  PNs,
449  /*IsVisited*/
450  [&](Instruction *I) {
451  // We consider anything that isn't potentially speculated to be
452  // "visited" as it is already handled. Similarly, anything that *is*
453  // potentially speculated but for which we have an entry in our cost
454  // map, we're done.
455  return !PotentialSpecSet.count(I) || SpecCostMap.count(I);
456  },
457  /*Visit*/
458  [&](Instruction *I) {
459  // We've fully visited the operands, so sum their cost with this node
460  // and update the cost map.
461  int Cost = TTI.TCC_Free;
462  for (Value *OpV : I->operand_values())
463  if (auto *OpI = dyn_cast<Instruction>(OpV)) {
464  auto CostMapIt = SpecCostMap.find(OpI);
465  if (CostMapIt != SpecCostMap.end())
466  Cost += CostMapIt->second;
467  }
468  Cost += TTI.getUserCost(I);
469  bool Inserted = SpecCostMap.insert({I, Cost}).second;
470  (void)Inserted;
471  assert(Inserted && "Must not re-insert a cost during the DFS!");
472 
473  // Now check if this node had a corresponding PHI node using it. If so,
474  // we need to decrement the outstanding user count for it.
475  auto UserPNsIt = UserToPNMap.find(I);
476  if (UserPNsIt == UserToPNMap.end())
477  return;
478  auto &UserPNs = UserPNsIt->second;
479  auto UserPNsSplitIt = std::stable_partition(
480  UserPNs.begin(), UserPNs.end(), [&](PHINode *UserPN) {
481  int &PNUserCount = PNUserCountMap.find(UserPN)->second;
482  assert(
483  PNUserCount > 0 &&
484  "Should never re-visit a PN after its user count hits zero!");
485  --PNUserCount;
486  return PNUserCount != 0;
487  });
488 
489  // FIXME: Rather than one at a time, we should sum the savings as the
490  // cost will be completely shared.
491  SmallVector<Instruction *, 16> SpecWorklist;
492  for (auto *PN : llvm::make_range(UserPNsSplitIt, UserPNs.end())) {
493  int SpecCost = TTI.TCC_Free;
494  for (Use &U : PN->uses())
495  SpecCost +=
496  SpecCostMap.find(cast<Instruction>(U.getUser()))->second;
497  SpecCost *= (NumPreds - 1);
498  // When the user count of a PHI node hits zero, we should check its
499  // profitability. If profitable, we should mark it for speculation
500  // and zero out the cost of everything it depends on.
501  int CostSavings = CostSavingsMap.find(PN)->second;
502  if (SpecCost > CostSavings) {
503  LLVM_DEBUG(dbgs() << " Not profitable, speculation cost: " << *PN
504  << "\n"
505  " Cost savings: "
506  << CostSavings
507  << "\n"
508  " Speculation cost: "
509  << SpecCost << "\n");
510  continue;
511  }
512 
513  // We're going to speculate this user-associated PHI. Copy it out and
514  // add its users to the worklist to update their cost.
515  SpecPNs.push_back(PN);
516  for (Use &U : PN->uses()) {
517  auto *UI = cast<Instruction>(U.getUser());
518  auto CostMapIt = SpecCostMap.find(UI);
519  if (CostMapIt->second == 0)
520  continue;
521  // Zero out this cost entry to avoid duplicates.
522  CostMapIt->second = 0;
523  SpecWorklist.push_back(UI);
524  }
525  }
526 
527  // Now walk all the operands of the users in the worklist transitively
528  // to zero out all the memoized costs.
529  while (!SpecWorklist.empty()) {
530  Instruction *SpecI = SpecWorklist.pop_back_val();
531  assert(SpecCostMap.find(SpecI)->second == 0 &&
532  "Didn't zero out a cost!");
533 
534  // Walk the operands recursively to zero out their cost as well.
535  for (auto *OpV : SpecI->operand_values()) {
536  auto *OpI = dyn_cast<Instruction>(OpV);
537  if (!OpI)
538  continue;
539  auto CostMapIt = SpecCostMap.find(OpI);
540  if (CostMapIt == SpecCostMap.end() || CostMapIt->second == 0)
541  continue;
542  CostMapIt->second = 0;
543  SpecWorklist.push_back(OpI);
544  }
545  }
546  });
547 
548  return SpecPNs;
549 }
550 
551 /// Speculate users around a set of PHI nodes.
552 ///
553 /// This routine does the actual speculation around a set of PHI nodes where we
554 /// have determined this to be both safe and profitable.
555 ///
556 /// This routine handles any spliting of critical edges necessary to create
557 /// a safe block to speculate into as well as cloning the instructions and
558 /// rewriting all uses.
559 static void speculatePHIs(ArrayRef<PHINode *> SpecPNs,
560  SmallPtrSetImpl<Instruction *> &PotentialSpecSet,
562  DominatorTree &DT) {
563  LLVM_DEBUG(dbgs() << " Speculating around " << SpecPNs.size() << " PHIs!\n");
564  NumPHIsSpeculated += SpecPNs.size();
565 
566  // Split any critical edges so that we have a block to hoist into.
567  auto *ParentBB = SpecPNs[0]->getParent();
569  SpecPreds.reserve(PredSet.size());
570  for (auto *PredBB : PredSet) {
571  auto *NewPredBB = SplitCriticalEdge(
572  PredBB, ParentBB,
573  CriticalEdgeSplittingOptions(&DT).setMergeIdenticalEdges());
574  if (NewPredBB) {
575  ++NumEdgesSplit;
576  LLVM_DEBUG(dbgs() << " Split critical edge from: " << PredBB->getName()
577  << "\n");
578  SpecPreds.push_back(NewPredBB);
579  } else {
580  assert(PredBB->getSingleSuccessor() == ParentBB &&
581  "We need a non-critical predecessor to speculate into.");
582  assert(!isa<InvokeInst>(PredBB->getTerminator()) &&
583  "Cannot have a non-critical invoke!");
584 
585  // Already non-critical, use existing pred.
586  SpecPreds.push_back(PredBB);
587  }
588  }
589 
593  /*IsVisited*/
594  [&](Instruction *I) {
595  // This is visited if we don't need to
596  // speculate it or we already have
597  // speculated it.
598  return !PotentialSpecSet.count(I) ||
599  SpecSet.count(I);
600  },
601  /*Visit*/
602  [&](Instruction *I) {
603  // All operands scheduled, schedule this
604  // node.
605  SpecSet.insert(I);
606  SpecList.push_back(I);
607  });
608 
609  int NumSpecInsts = SpecList.size() * SpecPreds.size();
610  int NumRedundantInsts = NumSpecInsts - SpecList.size();
611  LLVM_DEBUG(dbgs() << " Inserting " << NumSpecInsts
612  << " speculated instructions, " << NumRedundantInsts
613  << " redundancies\n");
614  NumSpeculatedInstructions += NumSpecInsts;
615  NumNewRedundantInstructions += NumRedundantInsts;
616 
617  // Each predecessor is numbered by its index in `SpecPreds`, so for each
618  // instruction we speculate, the speculated instruction is stored in that
619  // index of the vector associated with the original instruction. We also
620  // store the incoming values for each predecessor from any PHIs used.
622 
623  // Inject the synthetic mappings to rewrite PHIs to the appropriate incoming
624  // value. This handles both the PHIs we are speculating around and any other
625  // PHIs that happen to be used.
626  for (auto *OrigI : SpecList)
627  for (auto *OpV : OrigI->operand_values()) {
628  auto *OpPN = dyn_cast<PHINode>(OpV);
629  if (!OpPN || OpPN->getParent() != ParentBB)
630  continue;
631 
632  auto InsertResult = SpeculatedValueMap.insert({OpPN, {}});
633  if (!InsertResult.second)
634  continue;
635 
636  auto &SpeculatedVals = InsertResult.first->second;
637 
638  // Populating our structure for mapping is particularly annoying because
639  // finding an incoming value for a particular predecessor block in a PHI
640  // node is a linear time operation! To avoid quadratic behavior, we build
641  // a map for this PHI node's incoming values and then translate it into
642  // the more compact representation used below.
644  for (int i : llvm::seq<int>(0, OpPN->getNumIncomingValues()))
645  IncomingValueMap[OpPN->getIncomingBlock(i)] = OpPN->getIncomingValue(i);
646 
647  for (auto *PredBB : SpecPreds)
648  SpeculatedVals.push_back(IncomingValueMap.find(PredBB)->second);
649  }
650 
651  // Speculate into each predecessor.
652  for (int PredIdx : llvm::seq<int>(0, SpecPreds.size())) {
653  auto *PredBB = SpecPreds[PredIdx];
654  assert(PredBB->getSingleSuccessor() == ParentBB &&
655  "We need a non-critical predecessor to speculate into.");
656 
657  for (auto *OrigI : SpecList) {
658  auto *NewI = OrigI->clone();
659  NewI->setName(Twine(OrigI->getName()) + "." + Twine(PredIdx));
660  NewI->insertBefore(PredBB->getTerminator());
661 
662  // Rewrite all the operands to the previously speculated instructions.
663  // Because we're walking in-order, the defs must precede the uses and we
664  // should already have these mappings.
665  for (Use &U : NewI->operands()) {
666  auto *OpI = dyn_cast<Instruction>(U.get());
667  if (!OpI)
668  continue;
669  auto MapIt = SpeculatedValueMap.find(OpI);
670  if (MapIt == SpeculatedValueMap.end())
671  continue;
672  const auto &SpeculatedVals = MapIt->second;
673  assert(SpeculatedVals[PredIdx] &&
674  "Must have a speculated value for this predecessor!");
675  assert(SpeculatedVals[PredIdx]->getType() == OpI->getType() &&
676  "Speculated value has the wrong type!");
677 
678  // Rewrite the use to this predecessor's speculated instruction.
679  U.set(SpeculatedVals[PredIdx]);
680  }
681 
682  // Commute instructions which now have a constant in the LHS but not the
683  // RHS.
684  if (NewI->isBinaryOp() && NewI->isCommutative() &&
685  isa<Constant>(NewI->getOperand(0)) &&
686  !isa<Constant>(NewI->getOperand(1)))
687  NewI->getOperandUse(0).swap(NewI->getOperandUse(1));
688 
689  SpeculatedValueMap[OrigI].push_back(NewI);
690  assert(SpeculatedValueMap[OrigI][PredIdx] == NewI &&
691  "Mismatched speculated instruction index!");
692  }
693  }
694 
695  // Walk the speculated instruction list and if they have uses, insert a PHI
696  // for them from the speculated versions, and replace the uses with the PHI.
697  // Then erase the instructions as they have been fully speculated. The walk
698  // needs to be in reverse so that we don't think there are users when we'll
699  // actually eventually remove them later.
700  IRBuilder<> IRB(SpecPNs[0]);
701  for (auto *OrigI : llvm::reverse(SpecList)) {
702  // Check if we need a PHI for any remaining users and if so, insert it.
703  if (!OrigI->use_empty()) {
704  auto *SpecIPN = IRB.CreatePHI(OrigI->getType(), SpecPreds.size(),
705  Twine(OrigI->getName()) + ".phi");
706  // Add the incoming values we speculated.
707  auto &SpeculatedVals = SpeculatedValueMap.find(OrigI)->second;
708  for (int PredIdx : llvm::seq<int>(0, SpecPreds.size()))
709  SpecIPN->addIncoming(SpeculatedVals[PredIdx], SpecPreds[PredIdx]);
710 
711  // And replace the uses with the PHI node.
712  OrigI->replaceAllUsesWith(SpecIPN);
713  }
714 
715  // It is important to immediately erase this so that it stops using other
716  // instructions. This avoids inserting needless PHIs of them.
717  OrigI->eraseFromParent();
718  }
719 
720  // All of the uses of the speculated phi nodes should be removed at this
721  // point, so erase them.
722  for (auto *SpecPN : SpecPNs) {
723  assert(SpecPN->use_empty() && "All users should have been speculated!");
724  SpecPN->eraseFromParent();
725  }
726 }
727 
728 /// Try to speculate around a series of PHIs from a single basic block.
729 ///
730 /// This routine checks whether any of these PHIs are profitable to speculate
731 /// users around. If safe and profitable, it does the speculation. It returns
732 /// true when at least some speculation occurs.
735  LLVM_DEBUG(dbgs() << "Evaluating phi nodes for speculation:\n");
736 
737  // Savings in cost from speculating around a PHI node.
738  SmallDenseMap<PHINode *, int, 16> CostSavingsMap;
739 
740  // Remember the set of instructions that are candidates for speculation so
741  // that we can quickly walk things within that space. This prunes out
742  // instructions already available along edges, etc.
743  SmallPtrSet<Instruction *, 16> PotentialSpecSet;
744 
745  // Remember the set of instructions that are (transitively) unsafe to
746  // speculate into the incoming edges of this basic block. This avoids
747  // recomputing them for each PHI node we check. This set is specific to this
748  // block though as things are pruned out of it based on what is available
749  // along incoming edges.
751 
752  // For each PHI node in this block, check whether there are immediate folding
753  // opportunities from speculation, and whether that speculation will be
754  // valid. This determise the set of safe PHIs to speculate.
755  PNs.erase(llvm::remove_if(PNs,
756  [&](PHINode *PN) {
758  *PN, CostSavingsMap, PotentialSpecSet,
759  UnsafeSet, DT, TTI);
760  }),
761  PNs.end());
762  // If no PHIs were profitable, skip.
763  if (PNs.empty()) {
764  LLVM_DEBUG(dbgs() << " No safe and profitable PHIs found!\n");
765  return false;
766  }
767 
768  // We need to know how much speculation will cost which is determined by how
769  // many incoming edges will need a copy of each speculated instruction.
771  for (auto *PredBB : PNs[0]->blocks()) {
772  if (!PredSet.insert(PredBB))
773  continue;
774 
775  // We cannot speculate when a predecessor is an indirect branch.
776  // FIXME: We also can't reliably create a non-critical edge block for
777  // speculation if the predecessor is an invoke. This doesn't seem
778  // fundamental and we should probably be splitting critical edges
779  // differently.
780  if (isa<IndirectBrInst>(PredBB->getTerminator()) ||
781  isa<InvokeInst>(PredBB->getTerminator())) {
782  LLVM_DEBUG(dbgs() << " Invalid: predecessor terminator: "
783  << PredBB->getName() << "\n");
784  return false;
785  }
786  }
787  if (PredSet.size() < 2) {
788  LLVM_DEBUG(dbgs() << " Unimportant: phi with only one predecessor\n");
789  return false;
790  }
791 
793  PNs, CostSavingsMap, PotentialSpecSet, PredSet.size(), DT, TTI);
794  if (SpecPNs.empty())
795  // Nothing to do.
796  return false;
797 
798  speculatePHIs(SpecPNs, PotentialSpecSet, PredSet, DT);
799  return true;
800 }
801 
804  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
805  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
806 
807  bool Changed = false;
808  for (auto *BB : ReversePostOrderTraversal<Function *>(&F)) {
810  auto BBI = BB->begin();
811  while (auto *PN = dyn_cast<PHINode>(&*BBI)) {
812  PNs.push_back(PN);
813  ++BBI;
814  }
815 
816  if (PNs.empty())
817  continue;
818 
819  Changed |= tryToSpeculatePHIs(PNs, DT, TTI);
820  }
821 
822  if (!Changed)
823  return PreservedAnalyses::all();
824 
826  return PA;
827 }
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
Definition: Constants.h:171
This routine provides some synthesis utilities to produce sequences of values.
iterator_range< use_iterator > uses()
Definition: Value.h:354
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:776
This class represents lattice values for constants.
Definition: AllocatorList.h:23
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
Analysis pass providing the TargetTransformInfo.
unsigned second
STATISTIC(NumFunctions, "Total number of functions")
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:230
F(f)
void reserve(size_type N)
Definition: SmallVector.h:369
Option class for critical edge splitting.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
static bool isSafeToSpeculatePHIUsers(PHINode &PN, DominatorTree &DT, SmallPtrSetImpl< Instruction *> &PotentialSpecSet, SmallPtrSetImpl< Instruction *> &UnsafeSet)
Check whether speculating the users of a PHI node around the PHI will be safe.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:742
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:273
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
DenseMap< BasicBlock *, Value * > IncomingValueMap
Definition: Local.cpp:827
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:137
BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions())
If this edge is a critical edge, insert a new node to split the critical edge.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
static SmallVector< PHINode *, 16 > findProfitablePHIs(ArrayRef< PHINode *> PNs, const SmallDenseMap< PHINode *, int, 16 > &CostSavingsMap, const SmallPtrSetImpl< Instruction *> &PotentialSpecSet, int NumPreds, DominatorTree &DT, TargetTransformInfo &TTI)
Find profitable PHIs to speculate.
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:91
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
static bool tryToSpeculatePHIs(SmallVectorImpl< PHINode *> &PNs, DominatorTree &DT, TargetTransformInfo &TTI)
Try to speculate around a series of PHIs from a single basic block.
Expected to fold away in lowering.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
auto remove_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::remove_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1232
static void visitPHIUsersAndDepsInPostOrder(ArrayRef< PHINode *> PNs, IsVisitedT IsVisited, VisitT Visit)
Simple helper to walk all the users of a list of phis depth first, and call a visit function on each ...
iterator erase(const_iterator CI)
Definition: SmallVector.h:434
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
size_t size() const
Definition: SmallVector.h:52
static wasm::ValType getType(const TargetRegisterClass *RC)
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned first
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2046
Iterator for directly iterating over the operand Values.
Definition: User.h:245
size_type size() const
Definition: SmallPtrSet.h:92
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:297
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:248
int getIntImmCost(const APInt &Imm, Type *Ty) const
Return the expected cost of materializing for the given integer immediate of the specified type...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
unsigned getNumIncomingValues() const
Return the number of incoming edges.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
bool mayBeMemoryDependent(const Instruction &I)
Returns true if the result or effects of the given instructions I depend on or influence global memor...
int getUserCost(const User *U, ArrayRef< const Value *> Operands) const
Estimate the cost of a given IR user when lowered.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
Establish a view to a call site for examination.
Definition: CallSite.h:897
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
#define I(x, y, z)
Definition: MD5.cpp:58
iterator_range< value_op_iterator > operand_values()
Definition: User.h:261
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
static bool isSafeAndProfitableToSpeculateAroundPHI(PHINode &PN, SmallDenseMap< PHINode *, int, 16 > &CostSavingsMap, SmallPtrSetImpl< Instruction *> &PotentialSpecSet, SmallPtrSetImpl< Instruction *> &UnsafeSet, DominatorTree &DT, TargetTransformInfo &TTI)
Check whether, in isolation, a given PHI node is both safe and profitable to speculate users around...
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:171
static void speculatePHIs(ArrayRef< PHINode *> SpecPNs, SmallPtrSetImpl< Instruction *> &PotentialSpecSet, SmallSetVector< BasicBlock *, 16 > &PredSet, DominatorTree &DT)
Speculate users around a set of PHI nodes.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:72
A container for analyses that lazily runs them and caches their results.
This pass exposes codegen information to IR-level passes.
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const BasicBlock * getParent() const
Definition: Instruction.h:66