LLVM 17.0.0git
GVNSink.cpp
Go to the documentation of this file.
1//===- GVNSink.cpp - sink expressions into successors ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file GVNSink.cpp
10/// This pass attempts to sink instructions into successors, reducing static
11/// instruction count and enabling if-conversion.
12///
13/// We use a variant of global value numbering to decide what can be sunk.
14/// Consider:
15///
16/// [ %a1 = add i32 %b, 1 ] [ %c1 = add i32 %d, 1 ]
17/// [ %a2 = xor i32 %a1, 1 ] [ %c2 = xor i32 %c1, 1 ]
18/// \ /
19/// [ %e = phi i32 %a2, %c2 ]
20/// [ add i32 %e, 4 ]
21///
22///
23/// GVN would number %a1 and %c1 differently because they compute different
24/// results - the VN of an instruction is a function of its opcode and the
25/// transitive closure of its operands. This is the key property for hoisting
26/// and CSE.
27///
28/// What we want when sinking however is for a numbering that is a function of
29/// the *uses* of an instruction, which allows us to answer the question "if I
30/// replace %a1 with %c1, will it contribute in an equivalent way to all
31/// successive instructions?". The PostValueTable class in GVN provides this
32/// mapping.
33//
34//===----------------------------------------------------------------------===//
35
36#include "llvm/ADT/ArrayRef.h"
37#include "llvm/ADT/DenseMap.h"
38#include "llvm/ADT/DenseSet.h"
39#include "llvm/ADT/Hashing.h"
41#include "llvm/ADT/STLExtras.h"
44#include "llvm/ADT/Statistic.h"
46#include "llvm/IR/BasicBlock.h"
47#include "llvm/IR/CFG.h"
48#include "llvm/IR/Constants.h"
49#include "llvm/IR/Function.h"
50#include "llvm/IR/InstrTypes.h"
51#include "llvm/IR/Instruction.h"
53#include "llvm/IR/PassManager.h"
54#include "llvm/IR/Type.h"
55#include "llvm/IR/Use.h"
56#include "llvm/IR/Value.h"
58#include "llvm/Pass.h"
64#include "llvm/Support/Debug.h"
71#include <algorithm>
72#include <cassert>
73#include <cstddef>
74#include <cstdint>
75#include <iterator>
76#include <utility>
77
78using namespace llvm;
79
80#define DEBUG_TYPE "gvn-sink"
81
82STATISTIC(NumRemoved, "Number of instructions removed");
83
84namespace llvm {
85namespace GVNExpression {
86
88 print(dbgs());
89 dbgs() << "\n";
90}
91
92} // end namespace GVNExpression
93} // end namespace llvm
94
95namespace {
96
97static bool isMemoryInst(const Instruction *I) {
98 return isa<LoadInst>(I) || isa<StoreInst>(I) ||
99 (isa<InvokeInst>(I) && !cast<InvokeInst>(I)->doesNotAccessMemory()) ||
100 (isa<CallInst>(I) && !cast<CallInst>(I)->doesNotAccessMemory());
101}
102
103/// Iterates through instructions in a set of blocks in reverse order from the
104/// first non-terminator. For example (assume all blocks have size n):
105/// LockstepReverseIterator I([B1, B2, B3]);
106/// *I-- = [B1[n], B2[n], B3[n]];
107/// *I-- = [B1[n-1], B2[n-1], B3[n-1]];
108/// *I-- = [B1[n-2], B2[n-2], B3[n-2]];
109/// ...
110///
111/// It continues until all blocks have been exhausted. Use \c getActiveBlocks()
112/// to
113/// determine which blocks are still going and the order they appear in the
114/// list returned by operator*.
115class LockstepReverseIterator {
119 bool Fail;
120
121public:
122 LockstepReverseIterator(ArrayRef<BasicBlock *> Blocks) : Blocks(Blocks) {
123 reset();
124 }
125
126 void reset() {
127 Fail = false;
128 ActiveBlocks.clear();
129 for (BasicBlock *BB : Blocks)
130 ActiveBlocks.insert(BB);
131 Insts.clear();
132 for (BasicBlock *BB : Blocks) {
133 if (BB->size() <= 1) {
134 // Block wasn't big enough - only contained a terminator.
135 ActiveBlocks.remove(BB);
136 continue;
137 }
138 Insts.push_back(BB->getTerminator()->getPrevNode());
139 }
140 if (Insts.empty())
141 Fail = true;
142 }
143
144 bool isValid() const { return !Fail; }
145 ArrayRef<Instruction *> operator*() const { return Insts; }
146
147 // Note: This needs to return a SmallSetVector as the elements of
148 // ActiveBlocks will be later copied to Blocks using std::copy. The
149 // resultant order of elements in Blocks needs to be deterministic.
150 // Using SmallPtrSet instead causes non-deterministic order while
151 // copying. And we cannot simply sort Blocks as they need to match the
152 // corresponding Values.
153 SmallSetVector<BasicBlock *, 4> &getActiveBlocks() { return ActiveBlocks; }
154
155 void restrictToBlocks(SmallSetVector<BasicBlock *, 4> &Blocks) {
156 for (auto II = Insts.begin(); II != Insts.end();) {
157 if (!llvm::is_contained(Blocks, (*II)->getParent())) {
158 ActiveBlocks.remove((*II)->getParent());
159 II = Insts.erase(II);
160 } else {
161 ++II;
162 }
163 }
164 }
165
166 void operator--() {
167 if (Fail)
168 return;
170 for (auto *Inst : Insts) {
171 if (Inst == &Inst->getParent()->front())
172 ActiveBlocks.remove(Inst->getParent());
173 else
174 NewInsts.push_back(Inst->getPrevNode());
175 }
176 if (NewInsts.empty()) {
177 Fail = true;
178 return;
179 }
180 Insts = NewInsts;
181 }
182};
183
184//===----------------------------------------------------------------------===//
185
186/// Candidate solution for sinking. There may be different ways to
187/// sink instructions, differing in the number of instructions sunk,
188/// the number of predecessors sunk from and the number of PHIs
189/// required.
190struct SinkingInstructionCandidate {
191 unsigned NumBlocks;
192 unsigned NumInstructions;
193 unsigned NumPHIs;
194 unsigned NumMemoryInsts;
195 int Cost = -1;
197
198 void calculateCost(unsigned NumOrigPHIs, unsigned NumOrigBlocks) {
199 unsigned NumExtraPHIs = NumPHIs - NumOrigPHIs;
200 unsigned SplitEdgeCost = (NumOrigBlocks > NumBlocks) ? 2 : 0;
201 Cost = (NumInstructions * (NumBlocks - 1)) -
202 (NumExtraPHIs *
203 NumExtraPHIs) // PHIs are expensive, so make sure they're worth it.
204 - SplitEdgeCost;
205 }
206
207 bool operator>(const SinkingInstructionCandidate &Other) const {
208 return Cost > Other.Cost;
209 }
210};
211
212#ifndef NDEBUG
213raw_ostream &operator<<(raw_ostream &OS, const SinkingInstructionCandidate &C) {
214 OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks
215 << " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">";
216 return OS;
217}
218#endif
219
220//===----------------------------------------------------------------------===//
221
222/// Describes a PHI node that may or may not exist. These track the PHIs
223/// that must be created if we sunk a sequence of instructions. It provides
224/// a hash function for efficient equality comparisons.
225class ModelledPHI {
228
229public:
230 ModelledPHI() = default;
231
232 ModelledPHI(const PHINode *PN) {
233 // BasicBlock comes first so we sort by basic block pointer order, then by value pointer order.
235 for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I)
237 llvm::sort(Ops);
238 for (auto &P : Ops) {
239 Blocks.push_back(P.first);
240 Values.push_back(P.second);
241 }
242 }
243
244 /// Create a dummy ModelledPHI that will compare unequal to any other ModelledPHI
245 /// without the same ID.
246 /// \note This is specifically for DenseMapInfo - do not use this!
247 static ModelledPHI createDummy(size_t ID) {
248 ModelledPHI M;
249 M.Values.push_back(reinterpret_cast<Value*>(ID));
250 return M;
251 }
252
253 /// Create a PHI from an array of incoming values and incoming blocks.
254 template <typename VArray, typename BArray>
255 ModelledPHI(const VArray &V, const BArray &B) {
256 llvm::copy(V, std::back_inserter(Values));
257 llvm::copy(B, std::back_inserter(Blocks));
258 }
259
260 /// Create a PHI from [I[OpNum] for I in Insts].
261 template <typename BArray>
262 ModelledPHI(ArrayRef<Instruction *> Insts, unsigned OpNum, const BArray &B) {
263 llvm::copy(B, std::back_inserter(Blocks));
264 for (auto *I : Insts)
265 Values.push_back(I->getOperand(OpNum));
266 }
267
268 /// Restrict the PHI's contents down to only \c NewBlocks.
269 /// \c NewBlocks must be a subset of \c this->Blocks.
270 void restrictToBlocks(const SmallSetVector<BasicBlock *, 4> &NewBlocks) {
271 auto BI = Blocks.begin();
272 auto VI = Values.begin();
273 while (BI != Blocks.end()) {
274 assert(VI != Values.end());
275 if (!llvm::is_contained(NewBlocks, *BI)) {
276 BI = Blocks.erase(BI);
277 VI = Values.erase(VI);
278 } else {
279 ++BI;
280 ++VI;
281 }
282 }
283 assert(Blocks.size() == NewBlocks.size());
284 }
285
286 ArrayRef<Value *> getValues() const { return Values; }
287
288 bool areAllIncomingValuesSame() const {
289 return llvm::all_equal(Values);
290 }
291
292 bool areAllIncomingValuesSameType() const {
293 return llvm::all_of(
294 Values, [&](Value *V) { return V->getType() == Values[0]->getType(); });
295 }
296
297 bool areAnyIncomingValuesConstant() const {
298 return llvm::any_of(Values, [&](Value *V) { return isa<Constant>(V); });
299 }
300
301 // Hash functor
302 unsigned hash() const {
303 return (unsigned)hash_combine_range(Values.begin(), Values.end());
304 }
305
306 bool operator==(const ModelledPHI &Other) const {
307 return Values == Other.Values && Blocks == Other.Blocks;
308 }
309};
310
311template <typename ModelledPHI> struct DenseMapInfo {
312 static inline ModelledPHI &getEmptyKey() {
313 static ModelledPHI Dummy = ModelledPHI::createDummy(0);
314 return Dummy;
315 }
316
317 static inline ModelledPHI &getTombstoneKey() {
318 static ModelledPHI Dummy = ModelledPHI::createDummy(1);
319 return Dummy;
320 }
321
322 static unsigned getHashValue(const ModelledPHI &V) { return V.hash(); }
323
324 static bool isEqual(const ModelledPHI &LHS, const ModelledPHI &RHS) {
325 return LHS == RHS;
326 }
327};
328
330
331//===----------------------------------------------------------------------===//
332// ValueTable
333//===----------------------------------------------------------------------===//
334// This is a value number table where the value number is a function of the
335// *uses* of a value, rather than its operands. Thus, if VN(A) == VN(B) we know
336// that the program would be equivalent if we replaced A with PHI(A, B).
337//===----------------------------------------------------------------------===//
338
339/// A GVN expression describing how an instruction is used. The operands
340/// field of BasicExpression is used to store uses, not operands.
341///
342/// This class also contains fields for discriminators used when determining
343/// equivalence of instructions with sideeffects.
344class InstructionUseExpr : public GVNExpression::BasicExpression {
345 unsigned MemoryUseOrder = -1;
346 bool Volatile = false;
347 ArrayRef<int> ShuffleMask;
348
349public:
350 InstructionUseExpr(Instruction *I, ArrayRecycler<Value *> &R,
352 : GVNExpression::BasicExpression(I->getNumUses()) {
354 setOpcode(I->getOpcode());
355 setType(I->getType());
356
357 if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I))
358 ShuffleMask = SVI->getShuffleMask().copy(A);
359
360 for (auto &U : I->uses())
361 op_push_back(U.getUser());
363 }
364
365 void setMemoryUseOrder(unsigned MUO) { MemoryUseOrder = MUO; }
366 void setVolatile(bool V) { Volatile = V; }
367
368 hash_code getHashValue() const override {
370 MemoryUseOrder, Volatile, ShuffleMask);
371 }
372
373 template <typename Function> hash_code getHashValue(Function MapFn) {
374 hash_code H = hash_combine(getOpcode(), getType(), MemoryUseOrder, Volatile,
375 ShuffleMask);
376 for (auto *V : operands())
377 H = hash_combine(H, MapFn(V));
378 return H;
379 }
380};
381
382using BasicBlocksSet = SmallPtrSet<const BasicBlock *, 32>;
383
384class ValueTable {
385 DenseMap<Value *, uint32_t> ValueNumbering;
387 DenseMap<size_t, uint32_t> HashNumbering;
390 uint32_t nextValueNumber = 1;
391 BasicBlocksSet ReachableBBs;
392
393 /// Create an expression for I based on its opcode and its uses. If I
394 /// touches or reads memory, the expression is also based upon its memory
395 /// order - see \c getMemoryUseOrder().
396 InstructionUseExpr *createExpr(Instruction *I) {
397 InstructionUseExpr *E =
398 new (Allocator) InstructionUseExpr(I, Recycler, Allocator);
399 if (isMemoryInst(I))
400 E->setMemoryUseOrder(getMemoryUseOrder(I));
401
402 if (CmpInst *C = dyn_cast<CmpInst>(I)) {
403 CmpInst::Predicate Predicate = C->getPredicate();
404 E->setOpcode((C->getOpcode() << 8) | Predicate);
405 }
406 return E;
407 }
408
409 /// Helper to compute the value number for a memory instruction
410 /// (LoadInst/StoreInst), including checking the memory ordering and
411 /// volatility.
412 template <class Inst> InstructionUseExpr *createMemoryExpr(Inst *I) {
413 if (isStrongerThanUnordered(I->getOrdering()) || I->isAtomic())
414 return nullptr;
415 InstructionUseExpr *E = createExpr(I);
416 E->setVolatile(I->isVolatile());
417 return E;
418 }
419
420public:
421 ValueTable() = default;
422
423 /// Set basic blocks reachable from entry block.
424 void setReachableBBs(const BasicBlocksSet &ReachableBBs) {
425 this->ReachableBBs = ReachableBBs;
426 }
427
428 /// Returns the value number for the specified value, assigning
429 /// it a new number if it did not have one before.
430 uint32_t lookupOrAdd(Value *V) {
431 auto VI = ValueNumbering.find(V);
432 if (VI != ValueNumbering.end())
433 return VI->second;
434
435 if (!isa<Instruction>(V)) {
436 ValueNumbering[V] = nextValueNumber;
437 return nextValueNumber++;
438 }
439
440 Instruction *I = cast<Instruction>(V);
441 if (!ReachableBBs.contains(I->getParent()))
442 return ~0U;
443
444 InstructionUseExpr *exp = nullptr;
445 switch (I->getOpcode()) {
446 case Instruction::Load:
447 exp = createMemoryExpr(cast<LoadInst>(I));
448 break;
449 case Instruction::Store:
450 exp = createMemoryExpr(cast<StoreInst>(I));
451 break;
452 case Instruction::Call:
453 case Instruction::Invoke:
454 case Instruction::FNeg:
455 case Instruction::Add:
456 case Instruction::FAdd:
457 case Instruction::Sub:
458 case Instruction::FSub:
459 case Instruction::Mul:
460 case Instruction::FMul:
461 case Instruction::UDiv:
462 case Instruction::SDiv:
463 case Instruction::FDiv:
464 case Instruction::URem:
465 case Instruction::SRem:
466 case Instruction::FRem:
467 case Instruction::Shl:
468 case Instruction::LShr:
469 case Instruction::AShr:
470 case Instruction::And:
471 case Instruction::Or:
472 case Instruction::Xor:
473 case Instruction::ICmp:
474 case Instruction::FCmp:
475 case Instruction::Trunc:
476 case Instruction::ZExt:
477 case Instruction::SExt:
478 case Instruction::FPToUI:
479 case Instruction::FPToSI:
480 case Instruction::UIToFP:
481 case Instruction::SIToFP:
482 case Instruction::FPTrunc:
483 case Instruction::FPExt:
484 case Instruction::PtrToInt:
485 case Instruction::IntToPtr:
486 case Instruction::BitCast:
487 case Instruction::AddrSpaceCast:
488 case Instruction::Select:
489 case Instruction::ExtractElement:
490 case Instruction::InsertElement:
491 case Instruction::ShuffleVector:
492 case Instruction::InsertValue:
493 case Instruction::GetElementPtr:
494 exp = createExpr(I);
495 break;
496 default:
497 break;
498 }
499
500 if (!exp) {
501 ValueNumbering[V] = nextValueNumber;
502 return nextValueNumber++;
503 }
504
505 uint32_t e = ExpressionNumbering[exp];
506 if (!e) {
507 hash_code H = exp->getHashValue([=](Value *V) { return lookupOrAdd(V); });
508 auto I = HashNumbering.find(H);
509 if (I != HashNumbering.end()) {
510 e = I->second;
511 } else {
512 e = nextValueNumber++;
513 HashNumbering[H] = e;
514 ExpressionNumbering[exp] = e;
515 }
516 }
517 ValueNumbering[V] = e;
518 return e;
519 }
520
521 /// Returns the value number of the specified value. Fails if the value has
522 /// not yet been numbered.
523 uint32_t lookup(Value *V) const {
524 auto VI = ValueNumbering.find(V);
525 assert(VI != ValueNumbering.end() && "Value not numbered?");
526 return VI->second;
527 }
528
529 /// Removes all value numberings and resets the value table.
530 void clear() {
531 ValueNumbering.clear();
532 ExpressionNumbering.clear();
533 HashNumbering.clear();
534 Recycler.clear(Allocator);
535 nextValueNumber = 1;
536 }
537
538 /// \c Inst uses or touches memory. Return an ID describing the memory state
539 /// at \c Inst such that if getMemoryUseOrder(I1) == getMemoryUseOrder(I2),
540 /// the exact same memory operations happen after I1 and I2.
541 ///
542 /// This is a very hard problem in general, so we use domain-specific
543 /// knowledge that we only ever check for equivalence between blocks sharing a
544 /// single immediate successor that is common, and when determining if I1 ==
545 /// I2 we will have already determined that next(I1) == next(I2). This
546 /// inductive property allows us to simply return the value number of the next
547 /// instruction that defines memory.
548 uint32_t getMemoryUseOrder(Instruction *Inst) {
549 auto *BB = Inst->getParent();
550 for (auto I = std::next(Inst->getIterator()), E = BB->end();
551 I != E && !I->isTerminator(); ++I) {
552 if (!isMemoryInst(&*I))
553 continue;
554 if (isa<LoadInst>(&*I))
555 continue;
556 CallInst *CI = dyn_cast<CallInst>(&*I);
557 if (CI && CI->onlyReadsMemory())
558 continue;
559 InvokeInst *II = dyn_cast<InvokeInst>(&*I);
560 if (II && II->onlyReadsMemory())
561 continue;
562 return lookupOrAdd(&*I);
563 }
564 return 0;
565 }
566};
567
568//===----------------------------------------------------------------------===//
569
570class GVNSink {
571public:
572 GVNSink() = default;
573
574 bool run(Function &F) {
575 LLVM_DEBUG(dbgs() << "GVNSink: running on function @" << F.getName()
576 << "\n");
577
578 unsigned NumSunk = 0;
580 VN.setReachableBBs(BasicBlocksSet(RPOT.begin(), RPOT.end()));
581 for (auto *N : RPOT)
582 NumSunk += sinkBB(N);
583
584 return NumSunk > 0;
585 }
586
587private:
588 ValueTable VN;
589
590 bool shouldAvoidSinkingInstruction(Instruction *I) {
591 // These instructions may change or break semantics if moved.
592 if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
593 I->getType()->isTokenTy())
594 return true;
595 return false;
596 }
597
598 /// The main heuristic function. Analyze the set of instructions pointed to by
599 /// LRI and return a candidate solution if these instructions can be sunk, or
600 /// std::nullopt otherwise.
601 std::optional<SinkingInstructionCandidate> analyzeInstructionForSinking(
602 LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum,
603 ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents);
604
605 /// Create a ModelledPHI for each PHI in BB, adding to PHIs.
606 void analyzeInitialPHIs(BasicBlock *BB, ModelledPHISet &PHIs,
607 SmallPtrSetImpl<Value *> &PHIContents) {
608 for (PHINode &PN : BB->phis()) {
609 auto MPHI = ModelledPHI(&PN);
610 PHIs.insert(MPHI);
611 for (auto *V : MPHI.getValues())
612 PHIContents.insert(V);
613 }
614 }
615
616 /// The main instruction sinking driver. Set up state and try and sink
617 /// instructions into BBEnd from its predecessors.
618 unsigned sinkBB(BasicBlock *BBEnd);
619
620 /// Perform the actual mechanics of sinking an instruction from Blocks into
621 /// BBEnd, which is their only successor.
623
624 /// Remove PHIs that all have the same incoming value.
625 void foldPointlessPHINodes(BasicBlock *BB) {
626 auto I = BB->begin();
627 while (PHINode *PN = dyn_cast<PHINode>(I++)) {
628 if (!llvm::all_of(PN->incoming_values(), [&](const Value *V) {
629 return V == PN->getIncomingValue(0);
630 }))
631 continue;
632 if (PN->getIncomingValue(0) != PN)
634 else
636 PN->eraseFromParent();
637 }
638 }
639};
640
641std::optional<SinkingInstructionCandidate>
642GVNSink::analyzeInstructionForSinking(LockstepReverseIterator &LRI,
643 unsigned &InstNum,
644 unsigned &MemoryInstNum,
645 ModelledPHISet &NeededPHIs,
646 SmallPtrSetImpl<Value *> &PHIContents) {
647 auto Insts = *LRI;
648 LLVM_DEBUG(dbgs() << " -- Analyzing instruction set: [\n"; for (auto *I
649 : Insts) {
650 I->dump();
651 } dbgs() << " ]\n";);
652
654 for (auto *I : Insts) {
655 uint32_t N = VN.lookupOrAdd(I);
656 LLVM_DEBUG(dbgs() << " VN=" << Twine::utohexstr(N) << " for" << *I << "\n");
657 if (N == ~0U)
658 return std::nullopt;
659 VNums[N]++;
660 }
661 unsigned VNumToSink =
662 std::max_element(VNums.begin(), VNums.end(), llvm::less_second())->first;
663
664 if (VNums[VNumToSink] == 1)
665 // Can't sink anything!
666 return std::nullopt;
667
668 // Now restrict the number of incoming blocks down to only those with
669 // VNumToSink.
670 auto &ActivePreds = LRI.getActiveBlocks();
671 unsigned InitialActivePredSize = ActivePreds.size();
673 for (auto *I : Insts) {
674 if (VN.lookup(I) != VNumToSink)
675 ActivePreds.remove(I->getParent());
676 else
677 NewInsts.push_back(I);
678 }
679 for (auto *I : NewInsts)
680 if (shouldAvoidSinkingInstruction(I))
681 return std::nullopt;
682
683 // If we've restricted the incoming blocks, restrict all needed PHIs also
684 // to that set.
685 bool RecomputePHIContents = false;
686 if (ActivePreds.size() != InitialActivePredSize) {
687 ModelledPHISet NewNeededPHIs;
688 for (auto P : NeededPHIs) {
689 P.restrictToBlocks(ActivePreds);
690 NewNeededPHIs.insert(P);
691 }
692 NeededPHIs = NewNeededPHIs;
693 LRI.restrictToBlocks(ActivePreds);
694 RecomputePHIContents = true;
695 }
696
697 // The sunk instruction's results.
698 ModelledPHI NewPHI(NewInsts, ActivePreds);
699
700 // Does sinking this instruction render previous PHIs redundant?
701 if (NeededPHIs.erase(NewPHI))
702 RecomputePHIContents = true;
703
704 if (RecomputePHIContents) {
705 // The needed PHIs have changed, so recompute the set of all needed
706 // values.
707 PHIContents.clear();
708 for (auto &PHI : NeededPHIs)
709 PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
710 }
711
712 // Is this instruction required by a later PHI that doesn't match this PHI?
713 // if so, we can't sink this instruction.
714 for (auto *V : NewPHI.getValues())
715 if (PHIContents.count(V))
716 // V exists in this PHI, but the whole PHI is different to NewPHI
717 // (else it would have been removed earlier). We cannot continue
718 // because this isn't representable.
719 return std::nullopt;
720
721 // Which operands need PHIs?
722 // FIXME: If any of these fail, we should partition up the candidates to
723 // try and continue making progress.
724 Instruction *I0 = NewInsts[0];
725
726 // If all instructions that are going to participate don't have the same
727 // number of operands, we can't do any useful PHI analysis for all operands.
728 auto hasDifferentNumOperands = [&I0](Instruction *I) {
729 return I->getNumOperands() != I0->getNumOperands();
730 };
731 if (any_of(NewInsts, hasDifferentNumOperands))
732 return std::nullopt;
733
734 for (unsigned OpNum = 0, E = I0->getNumOperands(); OpNum != E; ++OpNum) {
735 ModelledPHI PHI(NewInsts, OpNum, ActivePreds);
736 if (PHI.areAllIncomingValuesSame())
737 continue;
738 if (!canReplaceOperandWithVariable(I0, OpNum))
739 // We can 't create a PHI from this instruction!
740 return std::nullopt;
741 if (NeededPHIs.count(PHI))
742 continue;
743 if (!PHI.areAllIncomingValuesSameType())
744 return std::nullopt;
745 // Don't create indirect calls! The called value is the final operand.
746 if ((isa<CallInst>(I0) || isa<InvokeInst>(I0)) && OpNum == E - 1 &&
747 PHI.areAnyIncomingValuesConstant())
748 return std::nullopt;
749
750 NeededPHIs.reserve(NeededPHIs.size());
751 NeededPHIs.insert(PHI);
752 PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
753 }
754
755 if (isMemoryInst(NewInsts[0]))
756 ++MemoryInstNum;
757
758 SinkingInstructionCandidate Cand;
759 Cand.NumInstructions = ++InstNum;
760 Cand.NumMemoryInsts = MemoryInstNum;
761 Cand.NumBlocks = ActivePreds.size();
762 Cand.NumPHIs = NeededPHIs.size();
763 append_range(Cand.Blocks, ActivePreds);
764
765 return Cand;
766}
767
768unsigned GVNSink::sinkBB(BasicBlock *BBEnd) {
769 LLVM_DEBUG(dbgs() << "GVNSink: running on basic block ";
770 BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
772 for (auto *B : predecessors(BBEnd)) {
773 auto *T = B->getTerminator();
774 if (isa<BranchInst>(T) || isa<SwitchInst>(T))
775 Preds.push_back(B);
776 else
777 return 0;
778 }
779 if (Preds.size() < 2)
780 return 0;
781 llvm::sort(Preds);
782
783 unsigned NumOrigPreds = Preds.size();
784 // We can only sink instructions through unconditional branches.
785 llvm::erase_if(Preds, [](BasicBlock *BB) {
786 return BB->getTerminator()->getNumSuccessors() != 1;
787 });
788
789 LockstepReverseIterator LRI(Preds);
791 unsigned InstNum = 0, MemoryInstNum = 0;
792 ModelledPHISet NeededPHIs;
793 SmallPtrSet<Value *, 4> PHIContents;
794 analyzeInitialPHIs(BBEnd, NeededPHIs, PHIContents);
795 unsigned NumOrigPHIs = NeededPHIs.size();
796
797 while (LRI.isValid()) {
798 auto Cand = analyzeInstructionForSinking(LRI, InstNum, MemoryInstNum,
799 NeededPHIs, PHIContents);
800 if (!Cand)
801 break;
802 Cand->calculateCost(NumOrigPHIs, Preds.size());
803 Candidates.emplace_back(*Cand);
804 --LRI;
805 }
806
807 llvm::stable_sort(Candidates, std::greater<SinkingInstructionCandidate>());
808 LLVM_DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C
809 : Candidates) dbgs()
810 << " " << C << "\n";);
811
812 // Pick the top candidate, as long it is positive!
813 if (Candidates.empty() || Candidates.front().Cost <= 0)
814 return 0;
815 auto C = Candidates.front();
816
817 LLVM_DEBUG(dbgs() << " -- Sinking: " << C << "\n");
818 BasicBlock *InsertBB = BBEnd;
819 if (C.Blocks.size() < NumOrigPreds) {
820 LLVM_DEBUG(dbgs() << " -- Splitting edge to ";
821 BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
822 InsertBB = SplitBlockPredecessors(BBEnd, C.Blocks, ".gvnsink.split");
823 if (!InsertBB) {
824 LLVM_DEBUG(dbgs() << " -- FAILED to split edge!\n");
825 // Edge couldn't be split.
826 return 0;
827 }
828 }
829
830 for (unsigned I = 0; I < C.NumInstructions; ++I)
831 sinkLastInstruction(C.Blocks, InsertBB);
832
833 return C.NumInstructions;
834}
835
836void GVNSink::sinkLastInstruction(ArrayRef<BasicBlock *> Blocks,
837 BasicBlock *BBEnd) {
839 for (BasicBlock *BB : Blocks)
840 Insts.push_back(BB->getTerminator()->getPrevNode());
841 Instruction *I0 = Insts.front();
842
843 SmallVector<Value *, 4> NewOperands;
844 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
845 bool NeedPHI = llvm::any_of(Insts, [&I0, O](const Instruction *I) {
846 return I->getOperand(O) != I0->getOperand(O);
847 });
848 if (!NeedPHI) {
849 NewOperands.push_back(I0->getOperand(O));
850 continue;
851 }
852
853 // Create a new PHI in the successor block and populate it.
854 auto *Op = I0->getOperand(O);
855 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
856 auto *PN = PHINode::Create(Op->getType(), Insts.size(),
857 Op->getName() + ".sink", &BBEnd->front());
858 for (auto *I : Insts)
859 PN->addIncoming(I->getOperand(O), I->getParent());
860 NewOperands.push_back(PN);
861 }
862
863 // Arbitrarily use I0 as the new "common" instruction; remap its operands
864 // and move it to the start of the successor block.
865 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
866 I0->getOperandUse(O).set(NewOperands[O]);
867 I0->moveBefore(&*BBEnd->getFirstInsertionPt());
868
869 // Update metadata and IR flags.
870 for (auto *I : Insts)
871 if (I != I0) {
872 combineMetadataForCSE(I0, I, true);
873 I0->andIRFlags(I);
874 }
875
876 for (auto *I : Insts)
877 if (I != I0)
878 I->replaceAllUsesWith(I0);
879 foldPointlessPHINodes(BBEnd);
880
881 // Finally nuke all instructions apart from the common instruction.
882 for (auto *I : Insts)
883 if (I != I0)
884 I->eraseFromParent();
885
886 NumRemoved += Insts.size() - 1;
887}
888
889////////////////////////////////////////////////////////////////////////////////
890// Pass machinery / boilerplate
891
892class GVNSinkLegacyPass : public FunctionPass {
893public:
894 static char ID;
895
896 GVNSinkLegacyPass() : FunctionPass(ID) {
898 }
899
900 bool runOnFunction(Function &F) override {
901 if (skipFunction(F))
902 return false;
903 GVNSink G;
904 return G.run(F);
905 }
906
907 void getAnalysisUsage(AnalysisUsage &AU) const override {
909 }
910};
911
912} // end anonymous namespace
913
915 GVNSink G;
916 if (!G.run(F))
917 return PreservedAnalyses::all();
919}
920
921char GVNSinkLegacyPass::ID = 0;
922
923INITIALIZE_PASS_BEGIN(GVNSinkLegacyPass, "gvn-sink",
924 "Early GVN sinking of Expressions", false, false)
927INITIALIZE_PASS_END(GVNSinkLegacyPass, "gvn-sink",
928 "Early GVN sinking of Expressions", false, false)
929
930FunctionPass *llvm::createGVNSinkPass() { return new GVNSinkLegacyPass(); }
#define Fail
Rewrite undef for PHI
This file defines the BumpPtrAllocator interface.
Atomic ordering constants.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:492
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static void clear(coro::Shape &Shape)
Definition: Coroutines.cpp:149
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1260
The header file for the GVN pass that contains expression handling classes.
gvn Early GVN Hoisting of Expressions
Definition: GVNHoist.cpp:1265
gvn sink
When an instruction is found to only be used outside of the loop, this function moves it to the exit ...
Definition: GVNSink.cpp:927
This file provides the interface for LLVM's Global Value Numbering pass which eliminates fully redund...
This is the interface for a simple mod/ref and alias analysis over globals.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
static bool lookup(const GsymReader &GR, DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr, uint64_t Addr, SourceLocations &SrcLocs, llvm::Error &Err)
A Lookup helper functions.
Definition: InlineInfo.cpp:109
static M68kRelType getType(unsigned Kind, MCSymbolRefExpr::VariantKind &Modifier, bool &IsPCRel)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
#define H(x, y, z)
Definition: MD5.cpp:57
Machine code sinking
#define P(N)
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
Basic Register Allocator
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
@ VI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
static bool sinkLastInstruction(ArrayRef< BasicBlock * > Blocks)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This defines the Use class.
Value * RHS
Value * LHS
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
Represent the analysis usage information of a pass.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Recycle small arrays allocated from a BumpPtrAllocator.
Definition: ArrayRecycler.h:28
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
MutableArrayRef< T > copy(Allocator &A)
Definition: ArrayRef.h:178
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:314
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:372
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:245
const Instruction & front() const
Definition: BasicBlock.h:326
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:127
Allocate memory in an ever growing pool, as if by bump-pointer.
Definition: Allocator.h:66
bool onlyReadsMemory(unsigned OpNo) const
Definition: InstrTypes.h:1730
This class represents a function call, abstracting a target machine's calling convention.
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:708
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:718
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
iterator end()
Definition: DenseMap.h:84
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:314
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Definition: Pass.cpp:174
void allocateOperands(RecyclerType &Recycler, BumpPtrAllocator &Allocator)
hash_code getHashValue() const override
iterator_range< op_iterator > operands()
LLVM_DUMP_METHOD void dump() const
Definition: GVNSink.cpp:87
void setOpcode(unsigned opcode)
void print(raw_ostream &OS) const
Legacy wrapper pass to provide the GlobalsAAResult object.
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
const BasicBlock * getParent() const
Definition: Instruction.h:90
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:82
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Invoke instruction.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1759
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:155
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
Recycler - This class manages a linked-list of deallocated nodes and facilitates reusing deallocated ...
Definition: Recycler.h:34
void clear(AllocatorType &Allocator)
clear - Release all the tracked allocations to the allocator.
Definition: Recycler.h:68
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:157
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
void clear()
Completely clear the SetVector.
Definition: SetVector.h:213
This instruction constructs a fixed permutation of two input vectors.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:301
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
iterator erase(const_iterator CI)
Definition: SmallVector.h:741
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
static Twine utohexstr(const uint64_t &Val)
Definition: Twine.h:404
void set(Value *Val)
Definition: Value.h:865
const Use & getOperandUse(unsigned i) const
Definition: User.h:182
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
Definition: AsmWriter.cpp:4778
An opaque object representing a hash code.
Definition: Hashing.h:74
self_iterator getIterator()
Definition: ilist_node.h:82
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
std::unique_ptr< ValueIDNum[]> ValueTable
Type for a table of values in a block.
ArchKind & operator--(ArchKind &Kind)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
constexpr double e
Definition: MathExtras.h:31
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void stable_sort(R &&Range)
Definition: STLExtras.h:1948
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1735
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
APInt operator*(APInt a, uint64_t RHS)
Definition: APInt.h:2162
void initializeGVNSinkLegacyPassPass(PassRegistry &)
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:2014
bool isStrongerThanUnordered(AtomicOrdering AO)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
FunctionPass * createGVNSinkPass()
Definition: GVNSink.cpp:930
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
bool operator>(int64_t V1, const APSInt &V2)
Definition: APSInt.h:362
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1683
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:2734
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx)
Given an instruction, is it legal to set operand OpIdx to a non-constant value?
Definition: Local.cpp:3404
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:292
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1837
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:1998
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1869
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition: STLExtras.h:1986
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition: Hashing.h:608
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition: Hashing.h:486
#define N
An information struct used to provide DenseMap with the various necessary components for a given valu...
Definition: DenseMapInfo.h:51
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
Definition: GVNSink.cpp:914
Function object to check whether the second component of a std::pair compares less than the second co...
Definition: STLExtras.h:1485