LLVM 23.0.0git
GVN.cpp
Go to the documentation of this file.
1//===- GVN.cpp - Eliminate redundant values and loads ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs global value numbering to eliminate fully redundant
10// instructions. It also performs simple dead load elimination.
11//
12// Note that this pass does the value numbering itself; it does not use the
13// ValueNumbering analysis passes.
14//
15//===----------------------------------------------------------------------===//
16
18#include "llvm/ADT/DenseMap.h"
20#include "llvm/ADT/Hashing.h"
21#include "llvm/ADT/MapVector.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/SetVector.h"
27#include "llvm/ADT/Statistic.h"
31#include "llvm/Analysis/CFG.h"
36#include "llvm/Analysis/Loads.h"
46#include "llvm/IR/Attributes.h"
47#include "llvm/IR/BasicBlock.h"
48#include "llvm/IR/Constant.h"
49#include "llvm/IR/Constants.h"
50#include "llvm/IR/DebugLoc.h"
51#include "llvm/IR/Dominators.h"
52#include "llvm/IR/Function.h"
53#include "llvm/IR/InstrTypes.h"
54#include "llvm/IR/Instruction.h"
57#include "llvm/IR/LLVMContext.h"
58#include "llvm/IR/Metadata.h"
59#include "llvm/IR/Module.h"
60#include "llvm/IR/PassManager.h"
62#include "llvm/IR/Type.h"
63#include "llvm/IR/Use.h"
64#include "llvm/IR/Value.h"
66#include "llvm/Pass.h"
70#include "llvm/Support/Debug.h"
77#include <algorithm>
78#include <cassert>
79#include <cstdint>
80#include <optional>
81#include <utility>
82
83using namespace llvm;
84using namespace llvm::gvn;
85using namespace llvm::VNCoercion;
86using namespace PatternMatch;
87
88#define DEBUG_TYPE "gvn"
89
90STATISTIC(NumGVNInstr, "Number of instructions deleted");
91STATISTIC(NumGVNLoad, "Number of loads deleted");
92STATISTIC(NumGVNPRE, "Number of instructions PRE'd");
93STATISTIC(NumGVNBlocks, "Number of blocks merged");
94STATISTIC(NumGVNSimpl, "Number of instructions simplified");
95STATISTIC(NumGVNEqProp, "Number of equalities propagated");
96STATISTIC(NumPRELoad, "Number of loads PRE'd");
97STATISTIC(NumPRELoopLoad, "Number of loop loads PRE'd");
98STATISTIC(NumPRELoadMoved2CEPred,
99 "Number of loads moved to predecessor of a critical edge in PRE");
100
101STATISTIC(IsValueFullyAvailableInBlockNumSpeculationsMax,
102 "Number of blocks speculated as available in "
103 "IsValueFullyAvailableInBlock(), max");
104STATISTIC(MaxBBSpeculationCutoffReachedTimes,
105 "Number of times we we reached gvn-max-block-speculations cut-off "
106 "preventing further exploration");
107
108static cl::opt<bool> GVNEnableScalarPRE("enable-scalar-pre", cl::init(true),
109 cl::Hidden);
110static cl::opt<bool> GVNEnableLoadPRE("enable-load-pre", cl::init(true));
111static cl::opt<bool> GVNEnableLoadInLoopPRE("enable-load-in-loop-pre",
112 cl::init(true));
113static cl::opt<bool>
114GVNEnableSplitBackedgeInLoadPRE("enable-split-backedge-in-load-pre",
115 cl::init(false));
116static cl::opt<bool> GVNEnableMemDep("enable-gvn-memdep", cl::init(true));
117static cl::opt<bool> GVNEnableMemorySSA("enable-gvn-memoryssa",
118 cl::init(false));
119
121 "gvn-scan-users-limit", cl::Hidden, cl::init(100),
122 cl::desc("The number of memory accesses to scan in a block in reaching "
123 "memory values analysis (default = 100)"));
124
126 "gvn-max-num-deps", cl::Hidden, cl::init(100),
127 cl::desc("Max number of dependences to attempt Load PRE (default = 100)"));
128
129// This is based on IsValueFullyAvailableInBlockNumSpeculationsMax stat.
131 "gvn-max-block-speculations", cl::Hidden, cl::init(600),
132 cl::desc("Max number of blocks we're willing to speculate on (and recurse "
133 "into) when deducing if a value is fully available or not in GVN "
134 "(default = 600)"));
135
137 "gvn-max-num-visited-insts", cl::Hidden, cl::init(100),
138 cl::desc("Max number of visited instructions when trying to find "
139 "dominating value of select dependency (default = 100)"));
140
142 "gvn-max-num-insns", cl::Hidden, cl::init(100),
143 cl::desc("Max number of instructions to scan in each basic block in GVN "
144 "(default = 100)"));
145
148 bool Commutative = false;
149 // The type is not necessarily the result type of the expression, it may be
150 // any additional type needed to disambiguate the expression.
151 Type *Ty = nullptr;
153
154 AttributeList Attrs;
155
157
158 bool operator==(const Expression &Other) const {
159 if (Opcode != Other.Opcode)
160 return false;
161 if (Opcode == ~0U || Opcode == ~1U)
162 return true;
163 if (Ty != Other.Ty)
164 return false;
165 if (VarArgs != Other.VarArgs)
166 return false;
167 if ((!Attrs.isEmpty() || !Other.Attrs.isEmpty()) &&
168 !Attrs.intersectWith(Ty->getContext(), Other.Attrs).has_value())
169 return false;
170 return true;
171 }
172
174 return hash_combine(Value.Opcode, Value.Ty,
175 hash_combine_range(Value.VarArgs));
176 }
177};
178
180 static unsigned getHashValue(const GVNPass::Expression &E) {
181 using llvm::hash_value;
182
183 return static_cast<unsigned>(hash_value(E));
184 }
185
186 static bool isEqual(const GVNPass::Expression &LHS,
187 const GVNPass::Expression &RHS) {
188 return LHS == RHS;
189 }
190};
191
192/// Represents a particular available value that we know how to materialize.
193/// Materialization of an AvailableValue never fails. An AvailableValue is
194/// implicitly associated with a rematerialization point which is the
195/// location of the instruction from which it was formed.
197 enum class ValType {
198 SimpleVal, // A simple offsetted value that is accessed.
199 LoadVal, // A value produced by a load.
200 MemIntrin, // A memory intrinsic which is loaded from.
201 UndefVal, // A UndefValue representing a value from dead block (which
202 // is not yet physically removed from the CFG).
203 SelectVal, // A pointer select which is loaded from and for which the load
204 // can be replace by a value select.
205 };
206
207 /// Val - The value that is live out of the block.
209 /// Kind of the live-out value.
211
212 /// Offset - The byte offset in Val that is interesting for the load query.
213 unsigned Offset = 0;
214 /// V1, V2 - The dominating non-clobbered values of SelectVal.
215 Value *V1 = nullptr, *V2 = nullptr;
216
217 static AvailableValue get(Value *V, unsigned Offset = 0) {
218 AvailableValue Res;
219 Res.Val = V;
221 Res.Offset = Offset;
222 return Res;
223 }
224
225 static AvailableValue getMI(MemIntrinsic *MI, unsigned Offset = 0) {
226 AvailableValue Res;
227 Res.Val = MI;
229 Res.Offset = Offset;
230 return Res;
231 }
232
233 static AvailableValue getLoad(LoadInst *Load, unsigned Offset = 0) {
234 AvailableValue Res;
235 Res.Val = Load;
237 Res.Offset = Offset;
238 return Res;
239 }
240
242 AvailableValue Res;
243 Res.Val = nullptr;
245 Res.Offset = 0;
246 return Res;
247 }
248
250 AvailableValue Res;
251 Res.Val = Cond;
253 Res.Offset = 0;
254 Res.V1 = V1;
255 Res.V2 = V2;
256 return Res;
257 }
258
259 bool isSimpleValue() const { return Kind == ValType::SimpleVal; }
260 bool isCoercedLoadValue() const { return Kind == ValType::LoadVal; }
261 bool isMemIntrinValue() const { return Kind == ValType::MemIntrin; }
262 bool isUndefValue() const { return Kind == ValType::UndefVal; }
263 bool isSelectValue() const { return Kind == ValType::SelectVal; }
264
266 assert(isSimpleValue() && "Wrong accessor");
267 return Val;
268 }
269
271 assert(isCoercedLoadValue() && "Wrong accessor");
272 return cast<LoadInst>(Val);
273 }
274
276 assert(isMemIntrinValue() && "Wrong accessor");
277 return cast<MemIntrinsic>(Val);
278 }
279
281 assert(isSelectValue() && "Wrong accessor");
282 return Val;
283 }
284
285 /// Emit code at the specified insertion point to adjust the value defined
286 /// here to the specified type. This handles various coercion cases.
287 Value *MaterializeAdjustedValue(LoadInst *Load, Instruction *InsertPt) const;
288};
289
290/// Represents an AvailableValue which can be rematerialized at the end of
291/// the associated BasicBlock.
293 /// BB - The basic block in question.
294 BasicBlock *BB = nullptr;
295
296 /// AV - The actual available value.
298
301 Res.BB = BB;
302 Res.AV = std::move(AV);
303 return Res;
304 }
305
307 unsigned Offset = 0) {
308 return get(BB, AvailableValue::get(V, Offset));
309 }
310
314
315 /// Emit code at the end of this block to adjust the value defined here to
316 /// the specified type. This handles various coercion cases.
318 return AV.MaterializeAdjustedValue(Load, BB->getTerminator());
319 }
320};
321
322//===----------------------------------------------------------------------===//
323// ValueTable Internal Functions
324//===----------------------------------------------------------------------===//
325
326GVNPass::Expression GVNPass::ValueTable::createExpr(Instruction *I) {
327 Expression E;
328 E.Ty = I->getType();
329 E.Opcode = I->getOpcode();
330 if (const GCRelocateInst *GCR = dyn_cast<GCRelocateInst>(I)) {
331 // gc.relocate is 'special' call: its second and third operands are
332 // not real values, but indices into statepoint's argument list.
333 // Use the refered to values for purposes of identity.
334 E.VarArgs.push_back(lookupOrAdd(GCR->getOperand(0)));
335 E.VarArgs.push_back(lookupOrAdd(GCR->getBasePtr()));
336 E.VarArgs.push_back(lookupOrAdd(GCR->getDerivedPtr()));
337 } else {
338 for (Use &Op : I->operands())
339 E.VarArgs.push_back(lookupOrAdd(Op));
340 }
341 if (I->isCommutative()) {
342 // Ensure that commutative instructions that only differ by a permutation
343 // of their operands get the same value number by sorting the operand value
344 // numbers. Since commutative operands are the 1st two operands it is more
345 // efficient to sort by hand rather than using, say, std::sort.
346 assert(I->getNumOperands() >= 2 && "Unsupported commutative instruction!");
347 if (E.VarArgs[0] > E.VarArgs[1])
348 std::swap(E.VarArgs[0], E.VarArgs[1]);
349 E.Commutative = true;
350 }
351
352 if (auto *C = dyn_cast<CmpInst>(I)) {
353 // Sort the operand value numbers so x<y and y>x get the same value number.
354 CmpInst::Predicate Predicate = C->getPredicate();
355 if (E.VarArgs[0] > E.VarArgs[1]) {
356 std::swap(E.VarArgs[0], E.VarArgs[1]);
358 }
359 E.Opcode = (C->getOpcode() << 8) | Predicate;
360 E.Commutative = true;
361 } else if (auto *IVI = dyn_cast<InsertValueInst>(I)) {
362 E.VarArgs.append(IVI->idx_begin(), IVI->idx_end());
363 } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
364 ArrayRef<int> ShuffleMask = SVI->getShuffleMask();
365 E.VarArgs.append(ShuffleMask.begin(), ShuffleMask.end());
366 } else if (auto *CB = dyn_cast<CallBase>(I)) {
367 E.Attrs = CB->getAttributes();
368 }
369
370 return E;
371}
372
373GVNPass::Expression GVNPass::ValueTable::createCmpExpr(
374 unsigned Opcode, CmpInst::Predicate Predicate, Value *LHS, Value *RHS) {
375 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
376 "Not a comparison!");
379 E.VarArgs.push_back(lookupOrAdd(LHS));
380 E.VarArgs.push_back(lookupOrAdd(RHS));
381
382 // Sort the operand value numbers so x<y and y>x get the same value number.
383 if (E.VarArgs[0] > E.VarArgs[1]) {
384 std::swap(E.VarArgs[0], E.VarArgs[1]);
386 }
387 E.Opcode = (Opcode << 8) | Predicate;
388 E.Commutative = true;
389 return E;
390}
391
392GVNPass::Expression
393GVNPass::ValueTable::createExtractvalueExpr(ExtractValueInst *EI) {
394 assert(EI && "Not an ExtractValueInst?");
396 E.Ty = EI->getType();
397 E.Opcode = 0;
398
399 WithOverflowInst *WO = dyn_cast<WithOverflowInst>(EI->getAggregateOperand());
400 if (WO != nullptr && EI->getNumIndices() == 1 && *EI->idx_begin() == 0) {
401 // EI is an extract from one of our with.overflow intrinsics. Synthesize
402 // a semantically equivalent expression instead of an extract value
403 // expression.
404 E.Opcode = WO->getBinaryOp();
405 E.VarArgs.push_back(lookupOrAdd(WO->getLHS()));
406 E.VarArgs.push_back(lookupOrAdd(WO->getRHS()));
407 return E;
408 }
409
410 // Not a recognised intrinsic. Fall back to producing an extract value
411 // expression.
412 E.Opcode = EI->getOpcode();
413 for (Use &Op : EI->operands())
414 E.VarArgs.push_back(lookupOrAdd(Op));
415
416 append_range(E.VarArgs, EI->indices());
417
418 return E;
419}
420
421GVNPass::Expression GVNPass::ValueTable::createGEPExpr(GetElementPtrInst *GEP) {
423 Type *PtrTy = GEP->getType()->getScalarType();
424 const DataLayout &DL = GEP->getDataLayout();
425 unsigned BitWidth = DL.getIndexTypeSizeInBits(PtrTy);
426 SmallMapVector<Value *, APInt, 4> VariableOffsets;
427 APInt ConstantOffset(BitWidth, 0);
428 if (GEP->collectOffset(DL, BitWidth, VariableOffsets, ConstantOffset)) {
429 // Convert into offset representation, to recognize equivalent address
430 // calculations that use different type encoding.
431 LLVMContext &Context = GEP->getContext();
432 E.Opcode = GEP->getOpcode();
433 E.Ty = nullptr;
434 E.VarArgs.push_back(lookupOrAdd(GEP->getPointerOperand()));
435 for (const auto &[V, Scale] : VariableOffsets) {
436 E.VarArgs.push_back(lookupOrAdd(V));
437 E.VarArgs.push_back(lookupOrAdd(ConstantInt::get(Context, Scale)));
438 }
439 if (!ConstantOffset.isZero())
440 E.VarArgs.push_back(
441 lookupOrAdd(ConstantInt::get(Context, ConstantOffset)));
442 } else {
443 // If converting to offset representation fails (for scalable vectors),
444 // fall back to type-based implementation.
445 E.Opcode = GEP->getOpcode();
446 E.Ty = GEP->getSourceElementType();
447 for (Use &Op : GEP->operands())
448 E.VarArgs.push_back(lookupOrAdd(Op));
449 }
450 return E;
451}
452
453//===----------------------------------------------------------------------===//
454// ValueTable External Functions
455//===----------------------------------------------------------------------===//
456
457GVNPass::ValueTable::ValueTable() = default;
458GVNPass::ValueTable::ValueTable(const ValueTable &) = default;
459GVNPass::ValueTable::ValueTable(ValueTable &&) = default;
460GVNPass::ValueTable::~ValueTable() = default;
461GVNPass::ValueTable &
462GVNPass::ValueTable::operator=(const GVNPass::ValueTable &Arg) = default;
463
464/// add - Insert a value into the table with a specified value number.
465void GVNPass::ValueTable::add(Value *V, uint32_t Num) {
466 ValueNumbering.insert(std::make_pair(V, Num));
467 if (PHINode *PN = dyn_cast<PHINode>(V))
468 NumberingPhi[Num] = PN;
469}
470
471/// Include the incoming memory state into the hash of the expression for the
472/// given instruction. If the incoming memory state is:
473/// * LiveOnEntry, add the value number of the entry block,
474/// * a MemoryPhi, add the value number of the basic block corresponding to that
475/// MemoryPhi,
476/// * a MemoryDef, add the value number of the memory setting instruction.
477void GVNPass::ValueTable::addMemoryStateToExp(Instruction *I, Expression &Exp) {
478 assert(MSSA && "addMemoryStateToExp should not be called without MemorySSA");
479 assert(MSSA->getMemoryAccess(I) && "Instruction does not access memory");
480 MemoryAccess *MA = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(I);
481 Exp.VarArgs.push_back(lookupOrAdd(MA));
482}
483
484uint32_t GVNPass::ValueTable::lookupOrAddCall(CallInst *C) {
485 // FIXME: Currently the calls which may access the thread id may
486 // be considered as not accessing the memory. But this is
487 // problematic for coroutines, since coroutines may resume in a
488 // different thread. So we disable the optimization here for the
489 // correctness. However, it may block many other correct
490 // optimizations. Revert this one when we detect the memory
491 // accessing kind more precisely.
492 if (C->getFunction()->isPresplitCoroutine()) {
493 ValueNumbering[C] = NextValueNumber;
494 return NextValueNumber++;
495 }
496
497 // Do not combine convergent calls since they implicitly depend on the set of
498 // threads that is currently executing, and they might be in different basic
499 // blocks.
500 if (C->isConvergent()) {
501 ValueNumbering[C] = NextValueNumber;
502 return NextValueNumber++;
503 }
504
505 if (AA->doesNotAccessMemory(C)) {
506 Expression Exp = createExpr(C);
507 uint32_t E = assignExpNewValueNum(Exp).first;
508 ValueNumbering[C] = E;
509 return E;
510 }
511
512 if (MD && AA->onlyReadsMemory(C)) {
513 Expression Exp = createExpr(C);
514 auto [E, IsValNumNew] = assignExpNewValueNum(Exp);
515 if (IsValNumNew) {
516 ValueNumbering[C] = E;
517 return E;
518 }
519
520 MemDepResult LocalDep = MD->getDependency(C);
521
522 if (!LocalDep.isDef() && !LocalDep.isNonLocal()) {
523 ValueNumbering[C] = NextValueNumber;
524 return NextValueNumber++;
525 }
526
527 if (LocalDep.isDef()) {
528 // For masked load/store intrinsics, the local_dep may actually be
529 // a normal load or store instruction.
530 CallInst *LocalDepCall = dyn_cast<CallInst>(LocalDep.getInst());
531
532 if (!LocalDepCall || LocalDepCall->arg_size() != C->arg_size()) {
533 ValueNumbering[C] = NextValueNumber;
534 return NextValueNumber++;
535 }
536
537 for (unsigned I = 0, E = C->arg_size(); I < E; ++I) {
538 uint32_t CVN = lookupOrAdd(C->getArgOperand(I));
539 uint32_t LocalDepCallVN = lookupOrAdd(LocalDepCall->getArgOperand(I));
540 if (CVN != LocalDepCallVN) {
541 ValueNumbering[C] = NextValueNumber;
542 return NextValueNumber++;
543 }
544 }
545
546 uint32_t V = lookupOrAdd(LocalDepCall);
547 ValueNumbering[C] = V;
548 return V;
549 }
550
551 // Non-local case.
553 MD->getNonLocalCallDependency(C);
554 // FIXME: Move the checking logic to MemDep!
555 CallInst *CDep = nullptr;
556
557 // Check to see if we have a single dominating call instruction that is
558 // identical to C.
559 for (const NonLocalDepEntry &I : Deps) {
560 if (I.getResult().isNonLocal())
561 continue;
562
563 // We don't handle non-definitions. If we already have a call, reject
564 // instruction dependencies.
565 if (!I.getResult().isDef() || CDep != nullptr) {
566 CDep = nullptr;
567 break;
568 }
569
570 CallInst *NonLocalDepCall = dyn_cast<CallInst>(I.getResult().getInst());
571 // FIXME: All duplicated with non-local case.
572 if (NonLocalDepCall && DT->properlyDominates(I.getBB(), C->getParent())) {
573 CDep = NonLocalDepCall;
574 continue;
575 }
576
577 CDep = nullptr;
578 break;
579 }
580
581 if (!CDep) {
582 ValueNumbering[C] = NextValueNumber;
583 return NextValueNumber++;
584 }
585
586 if (CDep->arg_size() != C->arg_size()) {
587 ValueNumbering[C] = NextValueNumber;
588 return NextValueNumber++;
589 }
590 for (unsigned I = 0, E = C->arg_size(); I < E; ++I) {
591 uint32_t CVN = lookupOrAdd(C->getArgOperand(I));
592 uint32_t CDepVN = lookupOrAdd(CDep->getArgOperand(I));
593 if (CVN != CDepVN) {
594 ValueNumbering[C] = NextValueNumber;
595 return NextValueNumber++;
596 }
597 }
598
599 uint32_t V = lookupOrAdd(CDep);
600 ValueNumbering[C] = V;
601 return V;
602 }
603
604 if (MSSA && IsMSSAEnabled && AA->onlyReadsMemory(C)) {
605 Expression Exp = createExpr(C);
606 addMemoryStateToExp(C, Exp);
607 auto [V, _] = assignExpNewValueNum(Exp);
608 ValueNumbering[C] = V;
609 return V;
610 }
611
612 ValueNumbering[C] = NextValueNumber;
613 return NextValueNumber++;
614}
615
616/// Returns the value number for the specified load or store instruction.
617uint32_t GVNPass::ValueTable::computeLoadStoreVN(Instruction *I) {
618 if (!MSSA || !IsMSSAEnabled) {
619 ValueNumbering[I] = NextValueNumber;
620 return NextValueNumber++;
621 }
622
624 Exp.Ty = I->getType();
625 Exp.Opcode = I->getOpcode();
626 for (Use &Op : I->operands())
627 Exp.VarArgs.push_back(lookupOrAdd(Op));
628 addMemoryStateToExp(I, Exp);
629
630 auto [V, _] = assignExpNewValueNum(Exp);
631 ValueNumbering[I] = V;
632 return V;
633}
634
635/// Returns true if a value number exists for the specified value.
636bool GVNPass::ValueTable::exists(Value *V) const {
637 return ValueNumbering.contains(V);
638}
639
640uint32_t GVNPass::ValueTable::lookupOrAdd(MemoryAccess *MA) {
641 return MSSA->isLiveOnEntryDef(MA) || isa<MemoryPhi>(MA)
642 ? lookupOrAdd(MA->getBlock())
643 : lookupOrAdd(cast<MemoryUseOrDef>(MA)->getMemoryInst());
644}
645
646/// lookupOrAdd - Returns the value number for the specified value, assigning
647/// it a new number if it did not have one before.
648uint32_t GVNPass::ValueTable::lookupOrAdd(Value *V) {
649 auto VI = ValueNumbering.find(V);
650 if (VI != ValueNumbering.end())
651 return VI->second;
652
653 auto *I = dyn_cast<Instruction>(V);
654 if (!I) {
655 ValueNumbering[V] = NextValueNumber;
656 if (isa<BasicBlock>(V))
657 NumberingBB[NextValueNumber] = cast<BasicBlock>(V);
658 return NextValueNumber++;
659 }
660
661 Expression Exp;
662 switch (I->getOpcode()) {
663 case Instruction::Call:
664 return lookupOrAddCall(cast<CallInst>(I));
665 case Instruction::FNeg:
666 case Instruction::Add:
667 case Instruction::FAdd:
668 case Instruction::Sub:
669 case Instruction::FSub:
670 case Instruction::Mul:
671 case Instruction::FMul:
672 case Instruction::UDiv:
673 case Instruction::SDiv:
674 case Instruction::FDiv:
675 case Instruction::URem:
676 case Instruction::SRem:
677 case Instruction::FRem:
678 case Instruction::Shl:
679 case Instruction::LShr:
680 case Instruction::AShr:
681 case Instruction::And:
682 case Instruction::Or:
683 case Instruction::Xor:
684 case Instruction::ICmp:
685 case Instruction::FCmp:
686 case Instruction::Trunc:
687 case Instruction::ZExt:
688 case Instruction::SExt:
689 case Instruction::FPToUI:
690 case Instruction::FPToSI:
691 case Instruction::UIToFP:
692 case Instruction::SIToFP:
693 case Instruction::FPTrunc:
694 case Instruction::FPExt:
695 case Instruction::PtrToInt:
696 case Instruction::PtrToAddr:
697 case Instruction::IntToPtr:
698 case Instruction::AddrSpaceCast:
699 case Instruction::BitCast:
700 case Instruction::Select:
701 case Instruction::Freeze:
702 case Instruction::ExtractElement:
703 case Instruction::InsertElement:
704 case Instruction::ShuffleVector:
705 case Instruction::InsertValue:
706 Exp = createExpr(I);
707 break;
708 case Instruction::GetElementPtr:
709 Exp = createGEPExpr(cast<GetElementPtrInst>(I));
710 break;
711 case Instruction::ExtractValue:
712 Exp = createExtractvalueExpr(cast<ExtractValueInst>(I));
713 break;
714 case Instruction::PHI:
715 ValueNumbering[V] = NextValueNumber;
716 NumberingPhi[NextValueNumber] = cast<PHINode>(V);
717 return NextValueNumber++;
718 case Instruction::Load:
719 case Instruction::Store:
720 return computeLoadStoreVN(I);
721 default:
722 ValueNumbering[V] = NextValueNumber;
723 return NextValueNumber++;
724 }
725
726 uint32_t E = assignExpNewValueNum(Exp).first;
727 ValueNumbering[V] = E;
728 return E;
729}
730
731/// Returns the value number of the specified value. Fails if
732/// the value has not yet been numbered.
733uint32_t GVNPass::ValueTable::lookup(Value *V, bool Verify) const {
734 auto VI = ValueNumbering.find(V);
735 if (Verify) {
736 assert(VI != ValueNumbering.end() && "Value not numbered?");
737 return VI->second;
738 }
739 return (VI != ValueNumbering.end()) ? VI->second : 0;
740}
741
742/// Returns the value number of the given comparison,
743/// assigning it a new number if it did not have one before. Useful when
744/// we deduced the result of a comparison, but don't immediately have an
745/// instruction realizing that comparison to hand.
746uint32_t GVNPass::ValueTable::lookupOrAddCmp(unsigned Opcode,
747 CmpInst::Predicate Predicate,
748 Value *LHS, Value *RHS) {
749 Expression Exp = createCmpExpr(Opcode, Predicate, LHS, RHS);
750 return assignExpNewValueNum(Exp).first;
751}
752
753/// Remove all entries from the ValueTable.
755 ValueNumbering.clear();
756 ExpressionNumbering.clear();
757 NumberingPhi.clear();
758 NumberingBB.clear();
759 PhiTranslateTable.clear();
760 NextValueNumber = 1;
761 Expressions.clear();
762 ExprIdx.clear();
763 NextExprNumber = 0;
764}
765
766/// Remove a value from the value numbering.
768 uint32_t Num = ValueNumbering.lookup(V);
769 ValueNumbering.erase(V);
770 // If V is PHINode, V <--> value number is an one-to-one mapping.
771 if (isa<PHINode>(V))
772 NumberingPhi.erase(Num);
773 else if (isa<BasicBlock>(V))
774 NumberingBB.erase(Num);
775}
776
777/// verifyRemoved - Verify that the value is removed from all internal data
778/// structures.
779void GVNPass::ValueTable::verifyRemoved(const Value *V) const {
780 assert(!ValueNumbering.contains(V) &&
781 "Inst still occurs in value numbering map!");
782}
783
784//===----------------------------------------------------------------------===//
785// LeaderMap External Functions
786//===----------------------------------------------------------------------===//
787
788/// Push a new Value to the LeaderTable onto the list for its value number.
789void GVNPass::LeaderMap::insert(uint32_t N, Value *V, const BasicBlock *BB) {
790 const auto &[It, Inserted] = NumToLeaders.try_emplace(N, V, BB, nullptr);
791 if (!Inserted) {
792 // Key already exists: insert new node after the head.
793 auto *NewSlot = TableAllocator.Allocate<LeaderListNode>();
794 new (NewSlot) LeaderListNode(V, BB, It->second.Next);
795 It->second.Next = NewSlot;
796 }
797}
798
799/// Scan the list of values corresponding to a given
800/// value number, and remove the given instruction if encountered.
801void GVNPass::LeaderMap::erase(uint32_t N, Instruction *I,
802 const BasicBlock *BB) {
803 auto It = NumToLeaders.find(N);
804 if (It == NumToLeaders.end())
805 return;
806
807 LeaderListNode *Prev = nullptr;
808 LeaderListNode *Curr = &It->second;
809
810 while (Curr && (Curr->Entry.Val != I || Curr->Entry.BB != BB)) {
811 Prev = Curr;
812 Curr = Curr->Next;
813 }
814
815 if (!Curr)
816 return;
817
818 if (Prev) {
819 // Non-head node: unlink and destroy.
820 Prev->Next = Curr->Next;
821 Curr->~LeaderListNode();
822 TableAllocator.Deallocate<LeaderListNode>(Curr);
823 } else {
824 // Head node (stored by value in DenseMap).
825 if (!Curr->Next) {
826 // Only node; erase from map (DenseMap calls the destructor).
827 NumToLeaders.erase(It);
828 } else {
829 // Move second node's data into head, then destroy second node.
830 LeaderListNode *Next = Curr->Next;
831 Curr->Entry.Val = std::move(Next->Entry.Val);
832 Curr->Entry.BB = Next->Entry.BB;
833 Curr->Next = Next->Next;
834 Next->~LeaderListNode();
835 TableAllocator.Deallocate<LeaderListNode>(Next);
836 }
837 }
838}
839
840//===----------------------------------------------------------------------===//
841// GVN Pass
842//===----------------------------------------------------------------------===//
843
845 return Options.AllowScalarPRE.value_or(GVNEnableScalarPRE);
846}
847
849 return Options.AllowLoadPRE.value_or(GVNEnableLoadPRE);
850}
851
853 return Options.AllowLoadInLoopPRE.value_or(GVNEnableLoadInLoopPRE);
854}
855
857 return Options.AllowLoadPRESplitBackedge.value_or(
859}
860
862 return Options.AllowMemDep.value_or(GVNEnableMemDep);
863}
864
866 return Options.AllowMemorySSA.value_or(GVNEnableMemorySSA);
867}
868
870 // FIXME: The order of evaluation of these 'getResult' calls is very
871 // significant! Re-ordering these variables will cause GVN when run alone to
872 // be less effective! We should fix memdep and basic-aa to not exhibit this
873 // behavior, but until then don't change the order here.
874 auto &AC = AM.getResult<AssumptionAnalysis>(F);
875 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
876 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
877 auto &AA = AM.getResult<AAManager>(F);
878 auto *MemDep =
880 auto &LI = AM.getResult<LoopAnalysis>(F);
881 auto *MSSA = AM.getCachedResult<MemorySSAAnalysis>(F);
882 if (isMemorySSAEnabled() && !MSSA) {
883 assert(!MemDep &&
884 "On-demand computation of MemSSA implies that MemDep is disabled!");
885 MSSA = &AM.getResult<MemorySSAAnalysis>(F);
886 }
888 bool Changed = runImpl(F, AC, DT, TLI, AA, MemDep, LI, &ORE,
889 MSSA ? &MSSA->getMSSA() : nullptr);
890 if (!Changed)
891 return PreservedAnalyses::all();
895 if (MSSA)
898 return PA;
899}
900
902 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
903 static_cast<PassInfoMixin<GVNPass> *>(this)->printPipeline(
904 OS, MapClassName2PassName);
905
906 OS << '<';
907 if (Options.AllowScalarPRE != std::nullopt)
908 OS << (*Options.AllowScalarPRE ? "" : "no-") << "scalar-pre;";
909 if (Options.AllowLoadPRE != std::nullopt)
910 OS << (*Options.AllowLoadPRE ? "" : "no-") << "load-pre;";
911 if (Options.AllowLoadPRESplitBackedge != std::nullopt)
912 OS << (*Options.AllowLoadPRESplitBackedge ? "" : "no-")
913 << "split-backedge-load-pre;";
914 if (Options.AllowMemDep != std::nullopt)
915 OS << (*Options.AllowMemDep ? "" : "no-") << "memdep;";
916 if (Options.AllowMemorySSA != std::nullopt)
917 OS << (*Options.AllowMemorySSA ? "" : "no-") << "memoryssa";
918 OS << '>';
919}
920
922 salvageKnowledge(I, AC);
924 removeInstruction(I);
925}
926
927#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
928LLVM_DUMP_METHOD void GVNPass::dump(DenseMap<uint32_t, Value *> &Map) const {
929 errs() << "{\n";
930 for (const auto &[Num, Exp] : Map) {
931 errs() << Num << "\n";
932 Exp->dump();
933 }
934 errs() << "}\n";
935}
936#endif
937
938enum class AvailabilityState : char {
939 /// We know the block *is not* fully available. This is a fixpoint.
941 /// We know the block *is* fully available. This is a fixpoint.
943 /// We do not know whether the block is fully available or not,
944 /// but we are currently speculating that it will be.
945 /// If it would have turned out that the block was, in fact, not fully
946 /// available, this would have been cleaned up into an Unavailable.
948};
949
950/// Return true if we can prove that the value
951/// we're analyzing is fully available in the specified block. As we go, keep
952/// track of which blocks we know are fully alive in FullyAvailableBlocks. This
953/// map is actually a tri-state map with the following values:
954/// 0) we know the block *is not* fully available.
955/// 1) we know the block *is* fully available.
956/// 2) we do not know whether the block is fully available or not, but we are
957/// currently speculating that it will be.
959 BasicBlock *BB,
960 DenseMap<BasicBlock *, AvailabilityState> &FullyAvailableBlocks) {
962 std::optional<BasicBlock *> UnavailableBB;
963
964 // The number of times we didn't find an entry for a block in a map and
965 // optimistically inserted an entry marking block as speculatively available.
966 unsigned NumNewNewSpeculativelyAvailableBBs = 0;
967
968#ifndef NDEBUG
969 SmallPtrSet<BasicBlock *, 32> NewSpeculativelyAvailableBBs;
971#endif
972
973 Worklist.emplace_back(BB);
974 while (!Worklist.empty()) {
975 BasicBlock *CurrBB = Worklist.pop_back_val(); // LoadFO - depth-first!
976 // Optimistically assume that the block is Speculatively Available and check
977 // to see if we already know about this block in one lookup.
978 std::pair<DenseMap<BasicBlock *, AvailabilityState>::iterator, bool> IV =
979 FullyAvailableBlocks.try_emplace(
981 AvailabilityState &State = IV.first->second;
982
983 // Did the entry already exist for this block?
984 if (!IV.second) {
985 if (State == AvailabilityState::Unavailable) {
986 UnavailableBB = CurrBB;
987 break; // Backpropagate unavailability info.
988 }
989
990#ifndef NDEBUG
991 AvailableBBs.emplace_back(CurrBB);
992#endif
993 continue; // Don't recurse further, but continue processing worklist.
994 }
995
996 // No entry found for block.
997 ++NumNewNewSpeculativelyAvailableBBs;
998 bool OutOfBudget = NumNewNewSpeculativelyAvailableBBs > MaxBBSpeculations;
999
1000 // If we have exhausted our budget, mark this block as unavailable.
1001 // Also, if this block has no predecessors, the value isn't live-in here.
1002 if (OutOfBudget || pred_empty(CurrBB)) {
1003 MaxBBSpeculationCutoffReachedTimes += (int)OutOfBudget;
1005 UnavailableBB = CurrBB;
1006 break; // Backpropagate unavailability info.
1007 }
1008
1009 // Tentatively consider this block as speculatively available.
1010#ifndef NDEBUG
1011 NewSpeculativelyAvailableBBs.insert(CurrBB);
1012#endif
1013 // And further recurse into block's predecessors, in depth-first order!
1014 Worklist.append(pred_begin(CurrBB), pred_end(CurrBB));
1015 }
1016
1017#if LLVM_ENABLE_STATS
1018 IsValueFullyAvailableInBlockNumSpeculationsMax.updateMax(
1019 NumNewNewSpeculativelyAvailableBBs);
1020#endif
1021
1022 // If the block isn't marked as fixpoint yet
1023 // (the Unavailable and Available states are fixpoints).
1024 auto MarkAsFixpointAndEnqueueSuccessors =
1025 [&](BasicBlock *BB, AvailabilityState FixpointState) {
1026 auto It = FullyAvailableBlocks.find(BB);
1027 if (It == FullyAvailableBlocks.end())
1028 return; // Never queried this block, leave as-is.
1029 switch (AvailabilityState &State = It->second) {
1032 return; // Don't backpropagate further, continue processing worklist.
1034 State = FixpointState;
1035#ifndef NDEBUG
1036 assert(NewSpeculativelyAvailableBBs.erase(BB) &&
1037 "Found a speculatively available successor leftover?");
1038#endif
1039 // Queue successors for further processing.
1040 Worklist.append(succ_begin(BB), succ_end(BB));
1041 return;
1042 }
1043 };
1044
1045 if (UnavailableBB) {
1046 // Okay, we have encountered an unavailable block.
1047 // Mark speculatively available blocks reachable from UnavailableBB as
1048 // unavailable as well. Paths are terminated when they reach blocks not in
1049 // FullyAvailableBlocks or they are not marked as speculatively available.
1050 Worklist.clear();
1051 Worklist.append(succ_begin(*UnavailableBB), succ_end(*UnavailableBB));
1052 while (!Worklist.empty())
1053 MarkAsFixpointAndEnqueueSuccessors(Worklist.pop_back_val(),
1055 }
1056
1057#ifndef NDEBUG
1058 Worklist.clear();
1059 for (BasicBlock *AvailableBB : AvailableBBs)
1060 Worklist.append(succ_begin(AvailableBB), succ_end(AvailableBB));
1061 while (!Worklist.empty())
1062 MarkAsFixpointAndEnqueueSuccessors(Worklist.pop_back_val(),
1064
1065 assert(NewSpeculativelyAvailableBBs.empty() &&
1066 "Must have fixed all the new speculatively available blocks.");
1067#endif
1068
1069 return !UnavailableBB;
1070}
1071
1072/// If the specified OldValue exists in ValuesPerBlock, replace its value with
1073/// NewValue.
1075 SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock, Value *OldValue,
1076 Value *NewValue) {
1077 for (AvailableValueInBlock &V : ValuesPerBlock) {
1078 if (V.AV.Val == OldValue)
1079 V.AV.Val = NewValue;
1080 if (V.AV.isSelectValue()) {
1081 if (V.AV.V1 == OldValue)
1082 V.AV.V1 = NewValue;
1083 if (V.AV.V2 == OldValue)
1084 V.AV.V2 = NewValue;
1085 }
1086 }
1087}
1088
1089/// Given a set of loads specified by ValuesPerBlock,
1090/// construct SSA form, allowing us to eliminate Load. This returns the value
1091/// that should be used at Load's definition site.
1092static Value *
1095 GVNPass &GVN) {
1096 // Check for the fully redundant, dominating load case. In this case, we can
1097 // just use the dominating value directly.
1098 if (ValuesPerBlock.size() == 1 &&
1099 GVN.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB,
1100 Load->getParent())) {
1101 assert(!ValuesPerBlock[0].AV.isUndefValue() &&
1102 "Dead BB dominate this block");
1103 return ValuesPerBlock[0].MaterializeAdjustedValue(Load);
1104 }
1105
1106 // Otherwise, we have to construct SSA form.
1108 SSAUpdater SSAUpdate(&NewPHIs);
1109 SSAUpdate.Initialize(Load->getType(), Load->getName());
1110
1111 for (const AvailableValueInBlock &AV : ValuesPerBlock) {
1112 BasicBlock *BB = AV.BB;
1113
1114 if (AV.AV.isUndefValue())
1115 continue;
1116
1117 if (SSAUpdate.HasValueForBlock(BB))
1118 continue;
1119
1120 // If the value is the load that we will be eliminating, and the block it's
1121 // available in is the block that the load is in, then don't add it as
1122 // SSAUpdater will resolve the value to the relevant phi which may let it
1123 // avoid phi construction entirely if there's actually only one value.
1124 if (BB == Load->getParent() &&
1125 ((AV.AV.isSimpleValue() && AV.AV.getSimpleValue() == Load) ||
1126 (AV.AV.isCoercedLoadValue() && AV.AV.getCoercedLoadValue() == Load)))
1127 continue;
1128
1129 SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(Load));
1130 }
1131
1132 // Perform PHI construction.
1133 return SSAUpdate.GetValueInMiddleOfBlock(Load->getParent());
1134}
1135
1137 Instruction *InsertPt) const {
1138 Value *Res;
1139 Type *LoadTy = Load->getType();
1140 const DataLayout &DL = Load->getDataLayout();
1141 if (isSimpleValue()) {
1142 Res = getSimpleValue();
1143 if (Res->getType() != LoadTy) {
1144 Res = getValueForLoad(Res, Offset, LoadTy, InsertPt, Load->getFunction());
1145
1146 LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset
1147 << " " << *getSimpleValue() << '\n'
1148 << *Res << '\n'
1149 << "\n\n\n");
1150 }
1151 } else if (isCoercedLoadValue()) {
1152 LoadInst *CoercedLoad = getCoercedLoadValue();
1153 if (CoercedLoad->getType() == LoadTy && Offset == 0) {
1154 Res = CoercedLoad;
1155 combineMetadataForCSE(CoercedLoad, Load, false);
1156 } else {
1157 Res = getValueForLoad(CoercedLoad, Offset, LoadTy, InsertPt,
1158 Load->getFunction());
1159 // We are adding a new user for this load, for which the original
1160 // metadata may not hold. Additionally, the new load may have a different
1161 // size and type, so their metadata cannot be combined in any
1162 // straightforward way.
1163 // Drop all metadata that is not known to cause immediate UB on violation,
1164 // unless the load has !noundef, in which case all metadata violations
1165 // will be promoted to UB.
1166 // !noalias and !alias.scope are kept: the load is not moved and still
1167 // accesses the same memory, and these are independent of the load type
1168 // and offset, so they remain valid for the coerced result.
1169 if (!CoercedLoad->hasMetadata(LLVMContext::MD_noundef))
1170 CoercedLoad->dropUnknownNonDebugMetadata(
1171 {LLVMContext::MD_dereferenceable,
1172 LLVMContext::MD_dereferenceable_or_null,
1173 LLVMContext::MD_invariant_load, LLVMContext::MD_invariant_group,
1174 LLVMContext::MD_alias_scope, LLVMContext::MD_noalias});
1175 LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset
1176 << " " << *getCoercedLoadValue() << '\n'
1177 << *Res << '\n'
1178 << "\n\n\n");
1179 }
1180 } else if (isMemIntrinValue()) {
1182 InsertPt, DL);
1183 LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
1184 << " " << *getMemIntrinValue() << '\n'
1185 << *Res << '\n'
1186 << "\n\n\n");
1187 } else if (isSelectValue()) {
1188 // Introduce a new value select for a load from an eligible pointer select.
1190 assert(V1 && V2 && "both value operands of the select must be present");
1191 Res = SelectInst::Create(Cond, V1, V2, "", InsertPt->getIterator());
1192 // We use the DebugLoc from the original load here, as this instruction
1193 // materializes the value that would previously have been loaded.
1194 cast<SelectInst>(Res)->setDebugLoc(Load->getDebugLoc());
1195 } else {
1196 llvm_unreachable("Should not materialize value from dead block");
1197 }
1198 assert(Res && "failed to materialize?");
1199 return Res;
1200}
1201
1202static bool isLifetimeStart(const Instruction *Inst) {
1203 if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst))
1204 return II->getIntrinsicID() == Intrinsic::lifetime_start;
1205 return false;
1206}
1207
1208/// Assuming To can be reached from both From and Between, does Between lie on
1209/// every path from From to To?
1210static bool liesBetween(const Instruction *From, Instruction *Between,
1211 const Instruction *To, const DominatorTree *DT) {
1212 if (From->getParent() == Between->getParent())
1213 return DT->dominates(From, Between);
1215 Exclusion.insert(Between->getParent());
1216 return !isPotentiallyReachable(From, To, &Exclusion, DT);
1217}
1218
1220 const DominatorTree *DT) {
1221 Value *PtrOp = Load->getPointerOperand();
1222 if (!PtrOp->hasUseList())
1223 return nullptr;
1224
1225 Instruction *OtherAccess = nullptr;
1226
1227 for (auto *U : PtrOp->users()) {
1228 if (U != Load && (isa<LoadInst>(U) || isa<StoreInst>(U))) {
1229 auto *I = cast<Instruction>(U);
1230 if (I->getFunction() == Load->getFunction() && DT->dominates(I, Load)) {
1231 // Use the most immediately dominating value.
1232 if (OtherAccess) {
1233 if (DT->dominates(OtherAccess, I))
1234 OtherAccess = I;
1235 else
1236 assert(U == OtherAccess || DT->dominates(I, OtherAccess));
1237 } else
1238 OtherAccess = I;
1239 }
1240 }
1241 }
1242
1243 if (OtherAccess)
1244 return OtherAccess;
1245
1246 // There is no dominating use, check if we can find a closest non-dominating
1247 // use that lies between any other potentially available use and Load.
1248 for (auto *U : PtrOp->users()) {
1249 if (U != Load && (isa<LoadInst>(U) || isa<StoreInst>(U))) {
1250 auto *I = cast<Instruction>(U);
1251 if (I->getFunction() == Load->getFunction() &&
1252 isPotentiallyReachable(I, Load, nullptr, DT)) {
1253 if (OtherAccess) {
1254 if (liesBetween(OtherAccess, I, Load, DT)) {
1255 OtherAccess = I;
1256 } else if (!liesBetween(I, OtherAccess, Load, DT)) {
1257 // These uses are both partially available at Load were it not for
1258 // the clobber, but neither lies strictly after the other.
1259 OtherAccess = nullptr;
1260 break;
1261 } // else: keep current OtherAccess since it lies between U and
1262 // Load.
1263 } else {
1264 OtherAccess = I;
1265 }
1266 }
1267 }
1268 }
1269
1270 return OtherAccess;
1271}
1272
1273/// Try to locate the three instruction involved in a missed
1274/// load-elimination case that is due to an intervening store.
1275static void reportMayClobberedLoad(LoadInst *Load, Instruction *DepInst,
1276 const DominatorTree *DT,
1278 using namespace ore;
1279
1280 OptimizationRemarkMissed R(DEBUG_TYPE, "LoadClobbered", Load);
1281 R << "load of type " << NV("Type", Load->getType()) << " not eliminated"
1282 << setExtraArgs();
1283
1284 const Instruction *OtherAccess = findMayClobberedPtrAccess(Load, DT);
1285 if (OtherAccess)
1286 R << " in favor of " << NV("OtherAccess", OtherAccess);
1287
1288 R << " because it is clobbered by " << NV("ClobberedBy", DepInst);
1289
1290 ORE->emit(R);
1291}
1292
1293// Find a dominating value for Loc memory location in the extended basic block
1294// (chain of basic blocks with single predecessors) starting From instruction.
1295// Returns the value from a matching load or a simple store to the same pointer.
1297 Instruction *From, AAResults *AA) {
1298 uint32_t NumVisitedInsts = 0;
1299 BasicBlock *FromBB = From->getParent();
1300 BatchAAResults BatchAA(*AA);
1301 for (BasicBlock *BB = FromBB; BB; BB = BB->getSinglePredecessor())
1302 for (auto *Inst = BB == FromBB ? From : BB->getTerminator();
1303 Inst != nullptr; Inst = Inst->getPrevNode()) {
1304 // Stop the search if limit is reached.
1305 if (++NumVisitedInsts > MaxNumVisitedInsts)
1306 return nullptr;
1307 if (isModSet(BatchAA.getModRefInfo(Inst, Loc))) {
1308 // A simple store to the exact location can forward its value.
1309 if (auto *SI = dyn_cast<StoreInst>(Inst))
1310 if (SI->isSimple() && SI->getPointerOperand() == Loc.Ptr &&
1311 SI->getValueOperand()->getType() == LoadTy)
1312 return SI->getValueOperand();
1313 return nullptr;
1314 }
1315 if (auto *LI = dyn_cast<LoadInst>(Inst))
1316 if (LI->getPointerOperand() == Loc.Ptr && LI->getType() == LoadTy)
1317 return LI;
1318 }
1319 return nullptr;
1320}
1321
1322std::optional<AvailableValue>
1323GVNPass::AnalyzeSelectAvailability(LoadInst *Load, Value *Cond, Value *TrueAddr,
1324 Value *FalseAddr, Instruction *From) {
1325 assert(TrueAddr->getType() == Load->getPointerOperandType() &&
1326 "Invalid address type of true side of select dependency");
1327 assert(FalseAddr->getType() == Load->getPointerOperandType() &&
1328 "Invalid address type of false side of select dependency");
1329 // We can convert a load through a select address into a select of the two
1330 // loaded values only if both sides have a dominating, non-clobbered value of
1331 // the right type in the extended basic block ending at From.
1332 auto Loc = MemoryLocation::get(Load);
1333 Value *V1 = findDominatingValue(Loc.getWithNewPtr(TrueAddr), Load->getType(),
1334 From, getAliasAnalysis());
1335 if (!V1)
1336 return std::nullopt;
1337 Value *V2 = findDominatingValue(Loc.getWithNewPtr(FalseAddr), Load->getType(),
1338 From, getAliasAnalysis());
1339 if (!V2)
1340 return std::nullopt;
1341 return AvailableValue::getSelect(Cond, V1, V2);
1342}
1343
1344std::optional<AvailableValue>
1345GVNPass::AnalyzeLoadAvailability(LoadInst *Load, const ReachingMemVal &Dep,
1346 Value *Address) {
1347 assert(Load->isUnordered() && "rules below are incorrect for ordered access");
1348 assert((Dep.Kind == DepKind::Def || Dep.Kind == DepKind::Clobber) &&
1349 "expected a local dependence");
1350
1351 Instruction *DepInst = Dep.Inst;
1352
1353 const DataLayout &DL = Load->getDataLayout();
1354 if (Dep.Kind == DepKind::Clobber) {
1355 // If the dependence is to a store that writes to a superset of the bits
1356 // read by the load, we can extract the bits we need for the load from the
1357 // stored value.
1358 if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
1359 // Can't forward from non-atomic to atomic without violating memory model.
1360 if (Address && Load->isAtomic() <= DepSI->isAtomic()) {
1361 int Offset =
1362 analyzeLoadFromClobberingStore(Load->getType(), Address, DepSI, DL);
1363 if (Offset != -1)
1364 return AvailableValue::get(DepSI->getValueOperand(), Offset);
1365 }
1366 }
1367
1368 // Check to see if we have something like this:
1369 // load i32* P
1370 // load i8* (P+1)
1371 // if we have this, replace the later with an extraction from the former.
1372 if (LoadInst *DepLoad = dyn_cast<LoadInst>(DepInst)) {
1373 // If this is a clobber and L is the first instruction in its block, then
1374 // we have the first instruction in the entry block.
1375 // Can't forward from non-atomic to atomic without violating memory model.
1376 if (DepLoad != Load && Address &&
1377 Load->isAtomic() <= DepLoad->isAtomic()) {
1378 Type *LoadType = Load->getType();
1379 int Offset = Dep.Offset;
1380
1381 if (!isMemorySSAEnabled()) {
1382 // If MD reported clobber, check it was nested.
1383 if (canCoerceMustAliasedValueToLoad(DepLoad, LoadType,
1384 DepLoad->getFunction())) {
1385 const auto ClobberOff = MD->getClobberOffset(DepLoad);
1386 // GVN has no deal with a negative offset.
1387 Offset = (ClobberOff == std::nullopt || *ClobberOff < 0)
1388 ? -1
1389 : *ClobberOff;
1390 }
1391 } else {
1392 if (!canCoerceMustAliasedValueToLoad(DepLoad, LoadType,
1393 DepLoad->getFunction()) ||
1394 Offset < 0)
1395 Offset = -1;
1396 }
1397 if (Offset == -1)
1398 Offset =
1399 analyzeLoadFromClobberingLoad(LoadType, Address, DepLoad, DL);
1400 if (Offset != -1)
1401 return AvailableValue::getLoad(DepLoad, Offset);
1402 }
1403 }
1404
1405 // If the clobbering value is a memset/memcpy/memmove, see if we can
1406 // forward a value on from it.
1407 if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInst)) {
1408 if (Address && !Load->isAtomic()) {
1410 DepMI, DL);
1411 if (Offset != -1)
1412 return AvailableValue::getMI(DepMI, Offset);
1413 }
1414 }
1415
1416 // Nothing known about this clobber, have to be conservative.
1417 LLVM_DEBUG(
1418 // fast print dep, using operator<< on instruction is too slow.
1419 dbgs() << "GVN: load "; Load->printAsOperand(dbgs());
1420 dbgs() << " is clobbered by " << *DepInst << '\n';);
1421 if (ORE->allowExtraAnalysis(DEBUG_TYPE))
1422 reportMayClobberedLoad(Load, DepInst, DT, ORE);
1423
1424 return std::nullopt;
1425 }
1426 assert(Dep.Kind == DepKind::Def && "follows from above");
1427
1428 // Loading the alloca -> undef.
1429 // Loading immediately after lifetime begin -> undef.
1430 if (isa<AllocaInst>(DepInst) || isLifetimeStart(DepInst))
1431 return AvailableValue::get(UndefValue::get(Load->getType()));
1432
1433 if (Constant *InitVal =
1434 getInitialValueOfAllocation(DepInst, TLI, Load->getType()))
1435 return AvailableValue::get(InitVal);
1436
1437 if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
1438 // Reject loads and stores that are to the same address but are of
1439 // different types if we have to. If the stored value is convertable to
1440 // the loaded value, we can reuse it.
1441 if (!canCoerceMustAliasedValueToLoad(S->getValueOperand(), Load->getType(),
1442 S->getFunction()))
1443 return std::nullopt;
1444
1445 // Can't forward from non-atomic to atomic without violating memory model.
1446 if (S->isAtomic() < Load->isAtomic())
1447 return std::nullopt;
1448
1449 return AvailableValue::get(S->getValueOperand());
1450 }
1451
1452 if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) {
1453 // If the types mismatch and we can't handle it, reject reuse of the load.
1454 // If the stored value is larger or equal to the loaded value, we can reuse
1455 // it.
1456 if (!canCoerceMustAliasedValueToLoad(LD, Load->getType(),
1457 LD->getFunction()))
1458 return std::nullopt;
1459
1460 // Can't forward from non-atomic to atomic without violating memory model.
1461 if (LD->isAtomic() < Load->isAtomic())
1462 return std::nullopt;
1463
1464 return AvailableValue::getLoad(LD);
1465 }
1466
1467 // Check if load with Addr dependent from select can be converted to select
1468 // between load values. There must be no instructions between the found
1469 // loads and DepInst that may clobber the loads.
1470 if (auto *Sel = dyn_cast<SelectInst>(DepInst)) {
1471 assert(Sel->getType() == Load->getPointerOperandType());
1472 if (auto AV = AnalyzeSelectAvailability(Load, Sel->getCondition(),
1473 Sel->getTrueValue(),
1474 Sel->getFalseValue(), DepInst))
1475 return AV;
1476 return std::nullopt;
1477 }
1478
1479 // Unknown def - must be conservative.
1480 LLVM_DEBUG(
1481 // fast print dep, using operator<< on instruction is too slow.
1482 dbgs() << "GVN: load "; Load->printAsOperand(dbgs());
1483 dbgs() << " has unknown def " << *DepInst << '\n';);
1484 return std::nullopt;
1485}
1486
1487void GVNPass::AnalyzeLoadAvailability(LoadInst *Load,
1488 SmallVectorImpl<ReachingMemVal> &Deps,
1489 AvailValInBlkVect &ValuesPerBlock,
1490 UnavailBlkVect &UnavailableBlocks) {
1491 // Filter out useless results (non-locals, etc). Keep track of the blocks
1492 // where we have a value available in repl, also keep track of whether we see
1493 // dependencies that produce an unknown value for the load (such as a call
1494 // that could potentially clobber the load).
1495 for (const auto &Dep : Deps) {
1496 BasicBlock *DepBB = Dep.Block;
1497
1498 if (DeadBlocks.count(DepBB)) {
1499 // Dead dependent mem-op disguise as a load evaluating the same value
1500 // as the load in question.
1501 ValuesPerBlock.push_back(AvailableValueInBlock::getUndef(DepBB));
1502 continue;
1503 }
1504
1505 if (Dep.Kind == DepKind::Other) {
1506 UnavailableBlocks.push_back(DepBB);
1507 continue;
1508 }
1509
1510 // The load address is a select in this block: try to rematerialize the
1511 // load as a select of the two reaching values (one per side). The values
1512 // are searched for at the end of DepBB.
1513 if (Dep.Kind == DepKind::Select) {
1514 if (auto AV = AnalyzeSelectAvailability(
1515 Load, const_cast<Value *>(Dep.SelCond),
1516 const_cast<Value *>(Dep.SelTrueAddr),
1517 const_cast<Value *>(Dep.SelFalseAddr), DepBB->getTerminator())) {
1518 ValuesPerBlock.push_back(
1519 AvailableValueInBlock::get(DepBB, std::move(*AV)));
1520 } else {
1521 UnavailableBlocks.push_back(DepBB);
1522 }
1523 continue;
1524 }
1525
1526 // The address being loaded in this non-local block may not be the same as
1527 // the pointer operand of the load if PHI translation occurs. Make sure
1528 // to consider the right address.
1529 if (auto AV =
1530 AnalyzeLoadAvailability(Load, Dep, const_cast<Value *>(Dep.Addr))) {
1531 // subtlety: because we know this was a non-local dependency, we know
1532 // it's safe to materialize anywhere between the instruction within
1533 // DepInfo and the end of it's block.
1534 ValuesPerBlock.push_back(
1535 AvailableValueInBlock::get(DepBB, std::move(*AV)));
1536 } else {
1537 UnavailableBlocks.push_back(DepBB);
1538 }
1539 }
1540
1541 assert(Deps.size() == ValuesPerBlock.size() + UnavailableBlocks.size() &&
1542 "post condition violation");
1543}
1544
1545/// Given the following code, v1 is partially available on some edges, but not
1546/// available on the edge from PredBB. This function tries to find if there is
1547/// another identical load in the other successor of PredBB.
1548///
1549/// v0 = load %addr
1550/// br %LoadBB
1551///
1552/// LoadBB:
1553/// v1 = load %addr
1554/// ...
1555///
1556/// PredBB:
1557/// ...
1558/// br %cond, label %LoadBB, label %SuccBB
1559///
1560/// SuccBB:
1561/// v2 = load %addr
1562/// ...
1563///
1564LoadInst *GVNPass::findLoadToHoistIntoPred(BasicBlock *Pred, BasicBlock *LoadBB,
1565 LoadInst *Load) {
1566 // For simplicity we handle a Pred has 2 successors only.
1567 auto *Term = Pred->getTerminator();
1568 if (Term->getNumSuccessors() != 2 || Term->isSpecialTerminator())
1569 return nullptr;
1570 auto *SuccBB = Term->getSuccessor(0);
1571 if (SuccBB == LoadBB)
1572 SuccBB = Term->getSuccessor(1);
1573 if (!SuccBB->getSinglePredecessor())
1574 return nullptr;
1575
1576 unsigned int NumInsts = MaxNumInsnsPerBlock;
1577 for (Instruction &Inst : *SuccBB) {
1578 if (Inst.isDebugOrPseudoInst())
1579 continue;
1580 if (--NumInsts == 0)
1581 return nullptr;
1582
1583 if (!Inst.isIdenticalTo(Load))
1584 continue;
1585
1586 bool HasLocalDep = true;
1587 if (!isMemorySSAEnabled()) {
1588 MemDepResult Dep = MD->getDependency(&Inst);
1589 HasLocalDep = !Dep.isNonLocal();
1590 } else {
1591 auto *MSSA = MSSAU->getMemorySSA();
1592 // Do not hoist if the identical load has ordering constraint.
1593 if (auto *MA = MSSA->getMemoryAccess(&Inst); MA && isa<MemoryUse>(MA)) {
1594 auto *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(MA);
1595 HasLocalDep = Clobber->getBlock() == SuccBB;
1596 }
1597 }
1598
1599 // If an identical load doesn't depends on any local instructions, it can
1600 // be safely moved to PredBB.
1601 // Also check for the implicit control flow instructions. See the comments
1602 // in PerformLoadPRE for details.
1603 if (!HasLocalDep && !ICF->isDominatedByICFIFromSameBlock(&Inst))
1604 return cast<LoadInst>(&Inst);
1605
1606 // Otherwise there is something in the same BB clobbers the memory, we can't
1607 // move this and later load to PredBB.
1608 return nullptr;
1609 }
1610
1611 return nullptr;
1612}
1613
1614void GVNPass::eliminatePartiallyRedundantLoad(
1615 LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
1616 MapVector<BasicBlock *, Value *> &AvailableLoads,
1617 MapVector<BasicBlock *, LoadInst *> *CriticalEdgePredAndLoad) {
1618 for (const auto &AvailableLoad : AvailableLoads) {
1619 BasicBlock *UnavailableBlock = AvailableLoad.first;
1620 Value *LoadPtr = AvailableLoad.second;
1621
1622 auto *NewLoad = new LoadInst(
1623 Load->getType(), LoadPtr, Load->getName() + ".pre", Load->isVolatile(),
1624 Load->getAlign(), Load->getOrdering(), Load->getSyncScopeID(),
1625 UnavailableBlock->getTerminator()->getIterator());
1626 NewLoad->setDebugLoc(Load->getDebugLoc());
1627 if (MSSAU) {
1628 auto *NewAccess = MSSAU->createMemoryAccessInBB(
1629 NewLoad, nullptr, NewLoad->getParent(), MemorySSA::BeforeTerminator);
1630 if (auto *NewDef = dyn_cast<MemoryDef>(NewAccess))
1631 MSSAU->insertDef(NewDef, /*RenameUses=*/true);
1632 else
1633 MSSAU->insertUse(cast<MemoryUse>(NewAccess), /*RenameUses=*/true);
1634 }
1635
1636 // Transfer the old load's AA tags to the new load.
1637 AAMDNodes Tags = Load->getAAMetadata();
1638 if (Tags)
1639 NewLoad->setAAMetadata(Tags);
1640
1641 if (auto *MD = Load->getMetadata(LLVMContext::MD_invariant_load))
1642 NewLoad->setMetadata(LLVMContext::MD_invariant_load, MD);
1643 if (auto *InvGroupMD = Load->getMetadata(LLVMContext::MD_invariant_group))
1644 NewLoad->setMetadata(LLVMContext::MD_invariant_group, InvGroupMD);
1645 if (auto *RangeMD = Load->getMetadata(LLVMContext::MD_range))
1646 NewLoad->setMetadata(LLVMContext::MD_range, RangeMD);
1647 if (auto *NoFPClassMD = Load->getMetadata(LLVMContext::MD_nofpclass))
1648 NewLoad->setMetadata(LLVMContext::MD_nofpclass, NoFPClassMD);
1649
1650 if (auto *AccessMD = Load->getMetadata(LLVMContext::MD_access_group))
1651 if (LI->getLoopFor(Load->getParent()) == LI->getLoopFor(UnavailableBlock))
1652 NewLoad->setMetadata(LLVMContext::MD_access_group, AccessMD);
1653
1654 // We do not propagate the old load's debug location, because the new
1655 // load now lives in a different BB, and we want to avoid a jumpy line
1656 // table.
1657 // FIXME: How do we retain source locations without causing poor debugging
1658 // behavior?
1659
1660 // Add the newly created load.
1661 ValuesPerBlock.push_back(
1662 AvailableValueInBlock::get(UnavailableBlock, NewLoad));
1663 if (MD)
1664 MD->invalidateCachedPointerInfo(LoadPtr);
1665 LLVM_DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n');
1666
1667 // For PredBB in CriticalEdgePredAndLoad we need to replace the uses of old
1668 // load instruction with the new created load instruction.
1669 if (CriticalEdgePredAndLoad) {
1670 auto It = CriticalEdgePredAndLoad->find(UnavailableBlock);
1671 if (It != CriticalEdgePredAndLoad->end()) {
1672 ++NumPRELoadMoved2CEPred;
1673 ICF->insertInstructionTo(NewLoad, UnavailableBlock);
1674 LoadInst *OldLoad = It->second;
1675 combineMetadataForCSE(NewLoad, OldLoad, /*DoesKMove=*/true);
1676 OldLoad->replaceAllUsesWith(NewLoad);
1677 replaceValuesPerBlockEntry(ValuesPerBlock, OldLoad, NewLoad);
1678 if (uint32_t ValNo = VN.lookup(OldLoad, false))
1679 LeaderTable.erase(ValNo, OldLoad, OldLoad->getParent());
1680 removeInstruction(OldLoad);
1681 }
1682 }
1683 }
1684
1685 // Perform PHI construction.
1686 Value *V = ConstructSSAForLoadSet(Load, ValuesPerBlock, *this);
1687 // ConstructSSAForLoadSet is responsible for combining metadata.
1688 ICF->removeUsersOf(Load);
1689 Load->replaceAllUsesWith(V);
1690 if (isa<PHINode>(V))
1691 V->takeName(Load);
1692 if (Instruction *I = dyn_cast<Instruction>(V))
1693 I->setDebugLoc(Load->getDebugLoc());
1694 if (MD && V->getType()->isPtrOrPtrVectorTy())
1695 MD->invalidateCachedPointerInfo(V);
1696 ORE->emit([&]() {
1697 return OptimizationRemark(DEBUG_TYPE, "LoadPRE", Load)
1698 << "load eliminated by PRE";
1699 });
1701}
1702
1703bool GVNPass::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
1704 UnavailBlkVect &UnavailableBlocks) {
1705 // Okay, we have *some* definitions of the value. This means that the value
1706 // is available in some of our (transitive) predecessors. Lets think about
1707 // doing PRE of this load. This will involve inserting a new load into the
1708 // predecessor when it's not available. We could do this in general, but
1709 // prefer to not increase code size. As such, we only do this when we know
1710 // that we only have to insert *one* load (which means we're basically moving
1711 // the load, not inserting a new one).
1712
1713 SmallPtrSet<BasicBlock *, 4> Blockers(llvm::from_range, UnavailableBlocks);
1714
1715 // Let's find the first basic block with more than one predecessor. Walk
1716 // backwards through predecessors if needed.
1717 BasicBlock *LoadBB = Load->getParent();
1718 BasicBlock *TmpBB = LoadBB;
1719
1720 // Check that there is no implicit control flow instructions above our load in
1721 // its block. If there is an instruction that doesn't always pass the
1722 // execution to the following instruction, then moving through it may become
1723 // invalid. For example:
1724 //
1725 // int arr[LEN];
1726 // int index = ???;
1727 // ...
1728 // guard(0 <= index && index < LEN);
1729 // use(arr[index]);
1730 //
1731 // It is illegal to move the array access to any point above the guard,
1732 // because if the index is out of bounds we should deoptimize rather than
1733 // access the array.
1734 // Check that there is no guard in this block above our instruction.
1735 bool MustEnsureSafetyOfSpeculativeExecution =
1736 ICF->isDominatedByICFIFromSameBlock(Load);
1737
1738 while (TmpBB->getSinglePredecessor()) {
1739 TmpBB = TmpBB->getSinglePredecessor();
1740 if (TmpBB == LoadBB) // Infinite (unreachable) loop.
1741 return false;
1742 if (Blockers.count(TmpBB))
1743 return false;
1744
1745 // If any of these blocks has more than one successor (i.e. if the edge we
1746 // just traversed was critical), then there are other paths through this
1747 // block along which the load may not be anticipated. Hoisting the load
1748 // above this block would be adding the load to execution paths along
1749 // which it was not previously executed.
1750 if (TmpBB->getTerminator()->getNumSuccessors() != 1)
1751 return false;
1752
1753 // Check that there is no implicit control flow in a block above.
1754 MustEnsureSafetyOfSpeculativeExecution =
1755 MustEnsureSafetyOfSpeculativeExecution || ICF->hasICF(TmpBB);
1756 }
1757
1758 assert(TmpBB);
1759 LoadBB = TmpBB;
1760
1761 // Check to see how many predecessors have the loaded value fully
1762 // available.
1763 MapVector<BasicBlock *, Value *> PredLoads;
1764 DenseMap<BasicBlock *, AvailabilityState> FullyAvailableBlocks;
1765 for (const AvailableValueInBlock &AV : ValuesPerBlock)
1766 FullyAvailableBlocks[AV.BB] = AvailabilityState::Available;
1767 for (BasicBlock *UnavailableBB : UnavailableBlocks)
1768 FullyAvailableBlocks[UnavailableBB] = AvailabilityState::Unavailable;
1769
1770 // The edge from Pred to LoadBB is a critical edge will be splitted.
1771 SmallVector<BasicBlock *, 4> CriticalEdgePredSplit;
1772 // The edge from Pred to LoadBB is a critical edge, another successor of Pred
1773 // contains a load can be moved to Pred. This data structure maps the Pred to
1774 // the movable load.
1775 MapVector<BasicBlock *, LoadInst *> CriticalEdgePredAndLoad;
1776 for (BasicBlock *Pred : predecessors(LoadBB)) {
1777 // If any predecessor block is an EH pad that does not allow non-PHI
1778 // instructions before the terminator, we can't PRE the load.
1779 if (Pred->getTerminator()->isEHPad()) {
1780 LLVM_DEBUG(
1781 dbgs() << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD PREDECESSOR '"
1782 << Pred->getName() << "': " << *Load << '\n');
1783 return false;
1784 }
1785
1786 if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks)) {
1787 continue;
1788 }
1789
1790 if (Pred->getTerminator()->getNumSuccessors() != 1) {
1791 if (isa<IndirectBrInst>(Pred->getTerminator())) {
1792 LLVM_DEBUG(
1793 dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '"
1794 << Pred->getName() << "': " << *Load << '\n');
1795 return false;
1796 }
1797
1798 if (LoadBB->isEHPad()) {
1799 LLVM_DEBUG(
1800 dbgs() << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD CRITICAL EDGE '"
1801 << Pred->getName() << "': " << *Load << '\n');
1802 return false;
1803 }
1804
1805 // Do not split backedge as it will break the canonical loop form.
1807 if (DT->dominates(LoadBB, Pred)) {
1808 LLVM_DEBUG(
1809 dbgs()
1810 << "COULD NOT PRE LOAD BECAUSE OF A BACKEDGE CRITICAL EDGE '"
1811 << Pred->getName() << "': " << *Load << '\n');
1812 return false;
1813 }
1814
1815 if (LoadInst *LI = findLoadToHoistIntoPred(Pred, LoadBB, Load))
1816 CriticalEdgePredAndLoad[Pred] = LI;
1817 else
1818 CriticalEdgePredSplit.push_back(Pred);
1819 } else {
1820 // Only add the predecessors that will not be split for now.
1821 PredLoads[Pred] = nullptr;
1822 }
1823 }
1824
1825 // Decide whether PRE is profitable for this load.
1826 unsigned NumInsertPreds = PredLoads.size() + CriticalEdgePredSplit.size();
1827 unsigned NumUnavailablePreds = NumInsertPreds +
1828 CriticalEdgePredAndLoad.size();
1829 assert(NumUnavailablePreds != 0 &&
1830 "Fully available value should already be eliminated!");
1831 (void)NumUnavailablePreds;
1832
1833 // If we need to insert new load in multiple predecessors, reject it.
1834 // FIXME: If we could restructure the CFG, we could make a common pred with
1835 // all the preds that don't have an available Load and insert a new load into
1836 // that one block.
1837 if (NumInsertPreds > 1)
1838 return false;
1839
1840 // Now we know where we will insert load. We must ensure that it is safe
1841 // to speculatively execute the load at that points.
1842 if (MustEnsureSafetyOfSpeculativeExecution) {
1843 if (CriticalEdgePredSplit.size())
1844 if (!isSafeToSpeculativelyExecute(Load, &*LoadBB->getFirstNonPHIIt(), AC,
1845 DT))
1846 return false;
1847 for (auto &PL : PredLoads)
1848 if (!isSafeToSpeculativelyExecute(Load, PL.first->getTerminator(), AC,
1849 DT))
1850 return false;
1851 for (auto &CEP : CriticalEdgePredAndLoad)
1852 if (!isSafeToSpeculativelyExecute(Load, CEP.first->getTerminator(), AC,
1853 DT))
1854 return false;
1855 }
1856
1857 // Split critical edges, and update the unavailable predecessors accordingly.
1858 for (BasicBlock *OrigPred : CriticalEdgePredSplit) {
1859 BasicBlock *NewPred = splitCriticalEdges(OrigPred, LoadBB);
1860 assert(!PredLoads.count(OrigPred) && "Split edges shouldn't be in map!");
1861 PredLoads[NewPred] = nullptr;
1862 LLVM_DEBUG(dbgs() << "Split critical edge " << OrigPred->getName() << "->"
1863 << LoadBB->getName() << '\n');
1864 }
1865
1866 for (auto &CEP : CriticalEdgePredAndLoad)
1867 PredLoads[CEP.first] = nullptr;
1868
1869 // Check if the load can safely be moved to all the unavailable predecessors.
1870 bool CanDoPRE = true;
1871 const DataLayout &DL = Load->getDataLayout();
1872 SmallVector<Instruction*, 8> NewInsts;
1873 for (auto &PredLoad : PredLoads) {
1874 BasicBlock *UnavailablePred = PredLoad.first;
1875
1876 // Do PHI translation to get its value in the predecessor if necessary. The
1877 // returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
1878 // We do the translation for each edge we skipped by going from Load's block
1879 // to LoadBB, otherwise we might miss pieces needing translation.
1880
1881 // If all preds have a single successor, then we know it is safe to insert
1882 // the load on the pred (?!?), so we can insert code to materialize the
1883 // pointer if it is not available.
1884 Value *LoadPtr = Load->getPointerOperand();
1885 BasicBlock *Cur = Load->getParent();
1886 while (Cur != LoadBB) {
1887 PHITransAddr Address(LoadPtr, DL, AC);
1888 LoadPtr = Address.translateWithInsertion(Cur, Cur->getSinglePredecessor(),
1889 *DT, NewInsts);
1890 if (!LoadPtr) {
1891 CanDoPRE = false;
1892 break;
1893 }
1894 Cur = Cur->getSinglePredecessor();
1895 }
1896
1897 if (LoadPtr) {
1898 PHITransAddr Address(LoadPtr, DL, AC);
1899 LoadPtr = Address.translateWithInsertion(LoadBB, UnavailablePred, *DT,
1900 NewInsts);
1901 }
1902 // If we couldn't find or insert a computation of this phi translated value,
1903 // we fail PRE.
1904 if (!LoadPtr) {
1905 LLVM_DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
1906 << *Load->getPointerOperand() << "\n");
1907 CanDoPRE = false;
1908 break;
1909 }
1910
1911 PredLoad.second = LoadPtr;
1912 }
1913
1914 if (!CanDoPRE) {
1915 while (!NewInsts.empty()) {
1916 // Erase instructions generated by the failed PHI translation before
1917 // trying to number them. PHI translation might insert instructions
1918 // in basic blocks other than the current one, and we delete them
1919 // directly, as salvageAndRemoveInstruction only allows removing from the
1920 // current basic block.
1921 NewInsts.pop_back_val()->eraseFromParent();
1922 }
1923 // HINT: Don't revert the edge-splitting as following transformation may
1924 // also need to split these critical edges.
1925 return !CriticalEdgePredSplit.empty();
1926 }
1927
1928 // Okay, we can eliminate this load by inserting a reload in the predecessor
1929 // and using PHI construction to get the value in the other predecessors, do
1930 // it.
1931 LLVM_DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *Load << '\n');
1932 LLVM_DEBUG(if (!NewInsts.empty()) dbgs() << "INSERTED " << NewInsts.size()
1933 << " INSTS: " << *NewInsts.back()
1934 << '\n');
1935
1936 // Assign value numbers to the new instructions.
1937 for (Instruction *I : NewInsts) {
1938 // Instructions that have been inserted in predecessor(s) to materialize
1939 // the load address do not retain their original debug locations. Doing
1940 // so could lead to confusing (but correct) source attributions.
1941 I->updateLocationAfterHoist();
1942
1943 // FIXME: We really _ought_ to insert these value numbers into their
1944 // parent's availability map. However, in doing so, we risk getting into
1945 // ordering issues. If a block hasn't been processed yet, we would be
1946 // marking a value as AVAIL-IN, which isn't what we intend.
1947 VN.lookupOrAdd(I);
1948 }
1949
1950 eliminatePartiallyRedundantLoad(Load, ValuesPerBlock, PredLoads,
1951 &CriticalEdgePredAndLoad);
1952 ++NumPRELoad;
1953 return true;
1954}
1955
1956bool GVNPass::performLoopLoadPRE(LoadInst *Load,
1957 AvailValInBlkVect &ValuesPerBlock,
1958 UnavailBlkVect &UnavailableBlocks) {
1959 const Loop *L = LI->getLoopFor(Load->getParent());
1960 // TODO: Generalize to other loop blocks that dominate the latch.
1961 if (!L || L->getHeader() != Load->getParent())
1962 return false;
1963
1964 BasicBlock *Preheader = L->getLoopPreheader();
1965 BasicBlock *Latch = L->getLoopLatch();
1966 if (!Preheader || !Latch)
1967 return false;
1968
1969 Value *LoadPtr = Load->getPointerOperand();
1970 // Must be available in preheader.
1971 if (!L->isLoopInvariant(LoadPtr))
1972 return false;
1973
1974 // We plan to hoist the load to preheader without introducing a new fault.
1975 // In order to do it, we need to prove that we cannot side-exit the loop
1976 // once loop header is first entered before execution of the load.
1977 if (ICF->isDominatedByICFIFromSameBlock(Load))
1978 return false;
1979
1980 BasicBlock *LoopBlock = nullptr;
1981 for (auto *Blocker : UnavailableBlocks) {
1982 // Blockers from outside the loop are handled in preheader.
1983 if (!L->contains(Blocker))
1984 continue;
1985
1986 // Only allow one loop block. Loop header is not less frequently executed
1987 // than each loop block, and likely it is much more frequently executed. But
1988 // in case of multiple loop blocks, we need extra information (such as block
1989 // frequency info) to understand whether it is profitable to PRE into
1990 // multiple loop blocks.
1991 if (LoopBlock)
1992 return false;
1993
1994 // Do not sink into inner loops. This may be non-profitable.
1995 if (L != LI->getLoopFor(Blocker))
1996 return false;
1997
1998 // Blocks that dominate the latch execute on every single iteration, maybe
1999 // except the last one. So PREing into these blocks doesn't make much sense
2000 // in most cases. But the blocks that do not necessarily execute on each
2001 // iteration are sometimes much colder than the header, and this is when
2002 // PRE is potentially profitable.
2003 if (DT->dominates(Blocker, Latch))
2004 return false;
2005
2006 // Make sure that the terminator itself doesn't clobber.
2007 if (Blocker->getTerminator()->mayWriteToMemory())
2008 return false;
2009
2010 LoopBlock = Blocker;
2011 }
2012
2013 if (!LoopBlock)
2014 return false;
2015
2016 // Make sure the memory at this pointer cannot be freed, therefore we can
2017 // safely reload from it after clobber.
2018 if (LoadPtr->canBeFreed())
2019 return false;
2020
2021 // TODO: Support critical edge splitting if blocker has more than 1 successor.
2022 MapVector<BasicBlock *, Value *> AvailableLoads;
2023 AvailableLoads[LoopBlock] = LoadPtr;
2024 AvailableLoads[Preheader] = LoadPtr;
2025
2026 LLVM_DEBUG(dbgs() << "GVN REMOVING PRE LOOP LOAD: " << *Load << '\n');
2027 eliminatePartiallyRedundantLoad(Load, ValuesPerBlock, AvailableLoads,
2028 /*CriticalEdgePredAndLoad*/ nullptr);
2029 ++NumPRELoopLoad;
2030 return true;
2031}
2032
2035 using namespace ore;
2036
2037 ORE->emit([&]() {
2038 return OptimizationRemark(DEBUG_TYPE, "LoadElim", Load)
2039 << "load of type " << NV("Type", Load->getType()) << " eliminated"
2040 << setExtraArgs() << " in favor of "
2041 << NV("InfavorOfValue", AvailableValue);
2042 });
2043}
2044
2045/// Attempt to eliminate a load whose dependencies are
2046/// non-local by performing PHI construction.
2047bool GVNPass::processNonLocalLoad(LoadInst *Load) {
2048 // Non-local speculations are not allowed under asan.
2049 if (Load->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) ||
2050 Load->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress))
2051 return false;
2052
2053 // Find the non-local dependencies of the load.
2054 LoadDepVect Deps;
2055 MD->getNonLocalPointerDependency(Load, Deps);
2056
2057 // If we had to process more than one hundred blocks to find the
2058 // dependencies, this load isn't worth worrying about. Optimizing
2059 // it will be too expensive.
2060 unsigned NumDeps = Deps.size();
2061 if (NumDeps > MaxNumDeps)
2062 return false;
2063
2065 MemVals.reserve(Deps.size());
2066
2067 for (const NonLocalDepResult &Dep : Deps) {
2068 const auto &R = Dep.getResult();
2069 SelectAddr SelAddr = Dep.getAddress();
2070 BasicBlock *BB = Dep.getBB();
2071 Instruction *Inst = R.getInst();
2072 if (R.isSelect()) {
2073 auto [Cond, Addrs] = SelAddr.getSelectCondAndAddrs();
2074 MemVals.emplace_back(
2075 ReachingMemVal::getSelect(BB, Cond, Addrs.first, Addrs.second));
2076 continue;
2077 }
2078 Value *Address = SelAddr.getAddr();
2079 if (R.isClobber())
2080 MemVals.emplace_back(ReachingMemVal::getClobber(Address, Inst));
2081 else if (R.isDef())
2082 MemVals.emplace_back(ReachingMemVal::getDef(Address, Inst));
2083 else
2084 MemVals.emplace_back(ReachingMemVal::getUnknown(BB, Address, Inst));
2085 }
2086
2087 return processNonLocalLoad(Load, MemVals);
2088}
2089
2090bool GVNPass::processNonLocalLoad(LoadInst *Load,
2091 SmallVectorImpl<ReachingMemVal> &Deps) {
2092 // If we had a phi translation failure, we'll have a single entry which is a
2093 // clobber in the current block. Reject this early.
2094 if (Deps.size() == 1 && Deps[0].Kind == DepKind::Other) {
2095 LLVM_DEBUG(dbgs() << "GVN: non-local load "; Load->printAsOperand(dbgs());
2096 dbgs() << " has unknown dependencies\n";);
2097 return false;
2098 }
2099
2100 bool Changed = false;
2101 // This is a limited form of scalar PRE for load indices. If this load follows
2102 // a GEP, see if we can PRE the indices before analyzing.
2103 if (isScalarPREEnabled()) {
2104 if (GetElementPtrInst *GEP =
2105 dyn_cast<GetElementPtrInst>(Load->getOperand(0))) {
2106 for (Use &U : GEP->indices())
2107 if (Instruction *I = dyn_cast<Instruction>(U.get()))
2108 Changed |= performScalarPRE(I);
2109 }
2110 }
2111
2112 // Step 1: Analyze the availability of the load.
2113 AvailValInBlkVect ValuesPerBlock;
2114 UnavailBlkVect UnavailableBlocks;
2115 AnalyzeLoadAvailability(Load, Deps, ValuesPerBlock, UnavailableBlocks);
2116
2117 // If we have no predecessors that produce a known value for this load, exit
2118 // early.
2119 if (ValuesPerBlock.empty())
2120 return Changed;
2121
2122 // Step 2: Eliminate fully redundancy.
2123 //
2124 // If all of the instructions we depend on produce a known value for this
2125 // load, then it is fully redundant and we can use PHI insertion to compute
2126 // its value. Insert PHIs and remove the fully redundant value now.
2127 if (UnavailableBlocks.empty()) {
2128 LLVM_DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *Load << '\n');
2129
2130 // Perform PHI construction.
2131 Value *V = ConstructSSAForLoadSet(Load, ValuesPerBlock, *this);
2132 // ConstructSSAForLoadSet is responsible for combining metadata.
2133 ICF->removeUsersOf(Load);
2134 Load->replaceAllUsesWith(V);
2135
2136 if (isa<PHINode>(V))
2137 V->takeName(Load);
2138 if (Instruction *I = dyn_cast<Instruction>(V))
2139 // If instruction I has debug info, then we should not update it.
2140 // Also, if I has a null DebugLoc, then it is still potentially incorrect
2141 // to propagate Load's DebugLoc because Load may not post-dominate I.
2142 if (Load->getDebugLoc() && Load->getParent() == I->getParent())
2143 I->setDebugLoc(Load->getDebugLoc());
2144 if (MD && V->getType()->isPtrOrPtrVectorTy())
2145 MD->invalidateCachedPointerInfo(V);
2146 ++NumGVNLoad;
2147 reportLoadElim(Load, V, ORE);
2149 return true;
2150 }
2151
2152 // Step 3: Eliminate partial redundancy.
2153 if (!isLoadPREEnabled())
2154 return Changed;
2155 if (!isLoadInLoopPREEnabled() && LI->getLoopFor(Load->getParent()))
2156 return Changed;
2157
2158 if (performLoopLoadPRE(Load, ValuesPerBlock, UnavailableBlocks) ||
2159 PerformLoadPRE(Load, ValuesPerBlock, UnavailableBlocks))
2160 return true;
2161
2162 return Changed;
2163}
2164
2165bool GVNPass::processAssumeIntrinsic(AssumeInst *IntrinsicI) {
2166 Value *V = IntrinsicI->getArgOperand(0);
2167
2168 if (ConstantInt *Cond = dyn_cast<ConstantInt>(V)) {
2169 if (Cond->isZero()) {
2170 Type *Int8Ty = Type::getInt8Ty(V->getContext());
2171 Type *PtrTy = PointerType::get(V->getContext(), 0);
2172 // Insert a new store to null instruction before the load to indicate that
2173 // this code is not reachable. FIXME: We could insert unreachable
2174 // instruction directly because we can modify the CFG.
2175 auto *NewS =
2176 new StoreInst(PoisonValue::get(Int8Ty), Constant::getNullValue(PtrTy),
2177 IntrinsicI->getIterator());
2178 if (MSSAU) {
2179 const MemoryUseOrDef *FirstNonDom = nullptr;
2180 const auto *AL =
2181 MSSAU->getMemorySSA()->getBlockAccesses(IntrinsicI->getParent());
2182
2183 // If there are accesses in the current basic block, find the first one
2184 // that does not come before NewS. The new memory access is inserted
2185 // after the found access or before the terminator if no such access is
2186 // found.
2187 if (AL) {
2188 for (const auto &Acc : *AL) {
2189 if (auto *Current = dyn_cast<MemoryUseOrDef>(&Acc))
2190 if (!Current->getMemoryInst()->comesBefore(NewS)) {
2191 FirstNonDom = Current;
2192 break;
2193 }
2194 }
2195 }
2196
2197 auto *NewDef =
2198 FirstNonDom ? MSSAU->createMemoryAccessBefore(
2199 NewS, nullptr,
2200 const_cast<MemoryUseOrDef *>(FirstNonDom))
2201 : MSSAU->createMemoryAccessInBB(
2202 NewS, nullptr,
2203 NewS->getParent(), MemorySSA::BeforeTerminator);
2204
2205 MSSAU->insertDef(cast<MemoryDef>(NewDef), /*RenameUses=*/false);
2206 }
2207 }
2208 if (isAssumeWithEmptyBundle(*IntrinsicI)) {
2209 salvageAndRemoveInstruction(IntrinsicI);
2210 return true;
2211 }
2212 return false;
2213 }
2214
2215 if (isa<Constant>(V)) {
2216 // If it's not false, and constant, it must evaluate to true. This means our
2217 // assume is assume(true), and thus, pointless, and we don't want to do
2218 // anything more here.
2219 return false;
2220 }
2221
2222 Constant *True = ConstantInt::getTrue(V->getContext());
2223 return propagateEquality(V, True, IntrinsicI);
2224}
2225
2228 I->replaceAllUsesWith(Repl);
2229}
2230
2231/// If a load has !invariant.group, try to find the most-dominating instruction
2232/// with the same metadata and equivalent pointer (modulo bitcasts and zero
2233/// GEPs). If one is found that dominates the load, its value can be reused.
2235 Value *PointerOperand = L->getPointerOperand()->stripPointerCasts();
2236
2237 // It's not safe to walk the use list of a global value because function
2238 // passes aren't allowed to look outside their functions.
2239 // FIXME: this could be fixed by filtering instructions from outside of
2240 // current function.
2241 if (isa<Constant>(PointerOperand))
2242 return nullptr;
2243
2244 // Queue to process all pointers that are equivalent to load operand.
2245 SmallVector<Value *, 8> PointerUsesQueue;
2246 PointerUsesQueue.push_back(PointerOperand);
2247
2248 Instruction *MostDominatingInstruction = L;
2249
2250 // FIXME: This loop is potentially O(n^2) due to repeated dominates checks.
2251 while (!PointerUsesQueue.empty()) {
2252 Value *Ptr = PointerUsesQueue.pop_back_val();
2253 assert(Ptr && !isa<GlobalValue>(Ptr) &&
2254 "Null or GlobalValue should not be inserted");
2255
2256 for (User *U : Ptr->users()) {
2257 auto *I = dyn_cast<Instruction>(U);
2258 if (!I || I == L || !DT.dominates(I, MostDominatingInstruction))
2259 continue;
2260
2261 // Add bitcasts and zero GEPs to queue.
2262 // TODO: Should drop bitcast?
2263 if (isa<BitCastInst>(I) ||
2265 cast<GetElementPtrInst>(I)->hasAllZeroIndices())) {
2266 PointerUsesQueue.push_back(I);
2267 continue;
2268 }
2269
2270 // If we hit a load/store with an invariant.group metadata and the same
2271 // pointer operand, we can assume that value pointed to by the pointer
2272 // operand didn't change.
2273 if (I->hasMetadata(LLVMContext::MD_invariant_group) &&
2274 Ptr == getLoadStorePointerOperand(I) && !I->isVolatile())
2275 MostDominatingInstruction = I;
2276 }
2277 }
2278
2279 return MostDominatingInstruction != L ? MostDominatingInstruction : nullptr;
2280}
2281
2282/// Return the memory location accessed by the (masked) load/store instruction
2283/// `I`, if the instruction could potentially provide a useful value for
2284/// eliminating the load.
2285static std::optional<MemoryLocation>
2287 const TargetLibraryInfo *TLI) {
2288 if (auto *LI = dyn_cast<LoadInst>(I))
2289 return MemoryLocation::get(LI);
2290
2291 if (auto *II = dyn_cast<IntrinsicInst>(I)) {
2292 switch (II->getIntrinsicID()) {
2293 case Intrinsic::masked_load:
2294 return MemoryLocation::getForArgument(II, 0, TLI);
2295 case Intrinsic::masked_store:
2296 if (AllowStores)
2297 return MemoryLocation::getForArgument(II, 1, TLI);
2298 return std::nullopt;
2299 default:
2300 break;
2301 }
2302 }
2303
2304 if (!AllowStores)
2305 return std::nullopt;
2306
2307 if (auto *SI = dyn_cast<StoreInst>(I))
2308 return MemoryLocation::get(SI);
2309 return std::nullopt;
2310}
2311
2312/// Scan the users of each MemoryAccess in `ClobbersList` that belong to `BB`,
2313/// looking for memory reads whose location aliases `Loc` and dominates our
2314/// load.
2315std::optional<GVNPass::ReachingMemVal> GVNPass::scanMemoryAccessesUsers(
2316 const MemoryLocation &Loc, bool IsInvariantLoad, BasicBlock *BB,
2317 const SmallVectorImpl<MemoryAccess *> &ClobbersList, MemorySSA &MSSA,
2318 BatchAAResults &AA, LoadInst *L) {
2319
2320 // Prefer a candidate that is closer to the load within the same block.
2321 auto UpdateChoice = [&](std::optional<ReachingMemVal> &Choice,
2322 AliasResult &AR, Instruction *Candidate) {
2323 if (!Choice) {
2324 if (AR == AliasResult::PartialAlias)
2325 Choice = ReachingMemVal::getClobber(Loc.Ptr, Candidate, AR.getOffset());
2326 else
2327 Choice = ReachingMemVal::getDef(Loc.Ptr, Candidate);
2328 return;
2329 }
2330 if (!MSSA.locallyDominates(MSSA.getMemoryAccess(Choice->Inst),
2331 MSSA.getMemoryAccess(Candidate)))
2332 return;
2333
2334 if (AR == AliasResult::PartialAlias) {
2335 Choice->Kind = DepKind::Clobber;
2336 Choice->Offset = AR.getOffset();
2337 } else {
2338 Choice->Kind = DepKind::Def;
2339 Choice->Offset = -1;
2340 }
2341
2342 Choice->Inst = Candidate;
2343 Choice->Block = Candidate->getParent();
2344 };
2345
2346 std::optional<ReachingMemVal> ReachingVal;
2347 for (MemoryAccess *MA : ClobbersList) {
2348 unsigned Scanned = 0;
2349 for (User *U : MA->users()) {
2350 if (++Scanned >= ScanUsersLimit)
2351 return ReachingMemVal::getUnknown(BB, Loc.Ptr);
2352
2353 auto *UseOrDef = dyn_cast<MemoryUseOrDef>(U);
2354 if (!UseOrDef || UseOrDef->getBlock() != BB)
2355 continue;
2356
2357 Instruction *MemI = UseOrDef->getMemoryInst();
2358 if (MemI == L ||
2359 (L && !MSSA.locallyDominates(UseOrDef, MSSA.getMemoryAccess(L))))
2360 continue;
2361
2362 if (auto MaybeLoc = maybeLoadStoreLocation(MemI, IsInvariantLoad, TLI)) {
2363 AliasResult AR = AA.alias(*MaybeLoc, Loc);
2364 // If the locations do not certainly alias, we cannot possibly infer the
2365 // following load loads the same value.
2367 continue;
2368
2369 // Locations partially overlap, but neither is a subset of the other, or
2370 // the second location is before the first.
2371 if (AR == AliasResult::PartialAlias &&
2372 (!AR.hasOffset() || AR.getOffset() < 0))
2373 continue;
2374
2375 // Found candidate, the new load memory location and the given location
2376 // must alias: precise overlap, or subset with non-negative offset.
2377 UpdateChoice(ReachingVal, AR, MemI);
2378 }
2379 }
2380 if (ReachingVal)
2381 break;
2382 }
2383
2384 return ReachingVal;
2385}
2386
2387/// Check if a given MemoryAccess (usually a MemoryDef) actually modifies a
2388/// given location. Returns a ReachingMemVal describing the dependency.
2389std::optional<GVNPass::ReachingMemVal> GVNPass::accessMayModifyLocation(
2390 MemoryAccess *ClobberMA, const MemoryLocation &Loc, bool IsInvariantLoad,
2391 BasicBlock *BB, MemorySSA &MSSA, BatchAAResults &AA) {
2392 assert(ClobberMA->getBlock() == BB);
2393
2394 // If the clobbering access is the entry memory state, we cannot say anything
2395 // about the content of the memory, except when we are accessing a local
2396 // object, which can be turned later into producing `undef`.
2397 if (MSSA.isLiveOnEntryDef(ClobberMA)) {
2399 if (Alloc->getParent() == BB)
2400 return ReachingMemVal::getDef(Loc.Ptr, const_cast<AllocaInst *>(Alloc));
2401 return ReachingMemVal::getUnknown(BB, Loc.Ptr);
2402 }
2403
2404 // Loads from "constant" memory can't be clobbered.
2405 if (IsInvariantLoad || AA.pointsToConstantMemory(Loc))
2406 return std::nullopt;
2407
2408 auto GetOrdering = [](const Instruction *I) {
2409 if (auto *L = dyn_cast<LoadInst>(I))
2410 return L->getOrdering();
2411 return cast<StoreInst>(I)->getOrdering();
2412 };
2413 Instruction *ClobberI = cast<MemoryDef>(ClobberMA)->getMemoryInst();
2414
2415 // Check if the clobbering access is a load or a store that we can reuse.
2416 if (auto MaybeLoc = maybeLoadStoreLocation(ClobberI, true, TLI)) {
2417 AliasResult AR = AA.alias(*MaybeLoc, Loc);
2418 if (AR == AliasResult::MustAlias)
2419 return ReachingMemVal::getDef(Loc.Ptr, ClobberI);
2420
2421 if (AR == AliasResult::NoAlias) {
2422 // If the locations do not alias we may still be able to skip over the
2423 // clobbering instruction, even if it is atomic.
2424 // The original load is either non-atomic or unordered. We can reorder
2425 // these across non-atomic, unordered or monotonic loads or across any
2426 // store.
2427 if (!ClobberI->isAtomic() ||
2428 !isStrongerThan(GetOrdering(ClobberI), AtomicOrdering::Monotonic) ||
2429 isa<StoreInst>(ClobberI))
2430 return std::nullopt;
2431 return ReachingMemVal::getClobber(Loc.Ptr, ClobberI);
2432 }
2433
2434 // Skip over volatile loads (the original load is non-volatile, non-atomic).
2435 if (!ClobberI->isAtomic() && isa<LoadInst>(ClobberI))
2436 return std::nullopt;
2437
2438 if (AR == AliasResult::MayAlias ||
2440 (!AR.hasOffset() || AR.getOffset() < 0)))
2441 return ReachingMemVal::getClobber(Loc.Ptr, ClobberI);
2442
2443 // The only option left is a store of the superset of the required bits.
2445 AR.getOffset() > 0 &&
2446 "Must be the superset/partial overlap case with positive offset");
2447 return ReachingMemVal::getClobber(Loc.Ptr, ClobberI, AR.getOffset());
2448 }
2449
2450 if (auto *II = dyn_cast<IntrinsicInst>(ClobberI)) {
2452 return std::nullopt;
2453 if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
2454 MemoryLocation IIObjLoc = MemoryLocation::getForArgument(II, 0, TLI);
2455 if (AA.isMustAlias(IIObjLoc, Loc))
2456 return ReachingMemVal::getDef(Loc.Ptr, ClobberI);
2457 return std::nullopt;
2458 }
2459 }
2460
2461 // If we are at a malloc-like function call, we can turn the load into `undef`
2462 // or zero.
2463 if (isNoAliasCall(ClobberI)) {
2464 const Value *Obj = getUnderlyingObject(Loc.Ptr);
2465 if (Obj == ClobberI || AA.isMustAlias(ClobberI, Loc.Ptr))
2466 return ReachingMemVal::getDef(Loc.Ptr, ClobberI);
2467 }
2468
2469 // Can reorder loads across a release fence.
2470 if (auto *FI = dyn_cast<FenceInst>(ClobberI))
2471 if (FI->getOrdering() == AtomicOrdering::Release)
2472 return std::nullopt;
2473
2474 // See if the clobber instruction (e.g., a generic call) may modify the
2475 // location.
2476 ModRefInfo MR = AA.getModRefInfo(ClobberI, Loc);
2477 // If may modify the location, analyze deeper, to exclude accesses to
2478 // non-escaping local allocations.
2479 if (MR == ModRefInfo::NoModRef || MR == ModRefInfo::Ref)
2480 return std::nullopt;
2481
2482 // Conservatively assume the clobbering memory access may overwrite the
2483 // location.
2484 return ReachingMemVal::getClobber(Loc.Ptr, ClobberI);
2485}
2486
2487/// Collect the predecessors of block, while doing phi-translation of the memory
2488/// address and the memory clobber. Return false if the block should be marked
2489/// as clobbering the memory location in an unknown way.
2490bool GVNPass::collectPredecessors(BasicBlock *BB, const PHITransAddr &Addr,
2491 MemoryAccess *ClobberMA,
2492 DependencyBlockSet &Blocks,
2493 SmallVectorImpl<BasicBlock *> &Worklist) {
2494 if (Addr.needsPHITranslationFromBlock(BB) &&
2496 return false;
2497
2498 auto *MPhi =
2499 ClobberMA->getBlock() == BB ? dyn_cast<MemoryPhi>(ClobberMA) : nullptr;
2501 for (BasicBlock *Pred : predecessors(BB)) {
2502 // Skip unreachable predecessors.
2503 if (!DT->isReachableFromEntry(Pred))
2504 continue;
2505
2506 // Skip already visited predecessors.
2507 if (llvm::any_of(Preds, [Pred](const auto &P) { return P.first == Pred; }))
2508 continue;
2509
2510 PHITransAddr TransAddr = Addr;
2511 if (TransAddr.needsPHITranslationFromBlock(BB))
2512 TransAddr.translateValue(BB, Pred, DT, false);
2513
2514 auto It = Blocks.find(Pred);
2515 if (It != Blocks.end()) {
2516 // If we reach a visited block with a different address, set the
2517 // current block as clobbering the memory location in an unknown way
2518 // (by returning false).
2519 if (It->second.Addr.getAddr() != TransAddr.getAddr())
2520 return false;
2521 // Otherwise, just stop the traversal.
2522 continue;
2523 }
2524
2525 Preds.emplace_back(
2526 Pred, DependencyBlockInfo(TransAddr,
2527 MPhi ? MPhi->getIncomingValueForBlock(Pred)
2528 : ClobberMA));
2529 }
2530
2531 // We collected the predecessors and stored them in Preds. Now, populate the
2532 // worklist with the predecessors found, and cache the eventual translated
2533 // address for each block.
2534 for (auto &P : Preds) {
2535 [[maybe_unused]] auto It =
2536 Blocks.try_emplace(P.first, std::move(P.second)).first;
2537 Worklist.push_back(P.first);
2538 }
2539
2540 return true;
2541}
2542
2543/// Build a list of MemoryAccesses whose users could potentially alias the
2544/// memory location being queried. Starts from StartInfo's initial clobber,
2545/// walk the use-def chain to the final clobber. If the chain extends beyond
2546/// `BB`, continue into that block but only if it is in the previously collected
2547/// set.
2548void GVNPass::collectClobberList(SmallVectorImpl<MemoryAccess *> &Clobbers,
2549 BasicBlock *BB,
2550 const DependencyBlockInfo &StartInfo,
2551 const DependencyBlockSet &Blocks,
2552 MemorySSA &MSSA) {
2553 MemoryAccess *MA = StartInfo.InitialClobberMA;
2554 MemoryAccess *LastMA = StartInfo.ClobberMA;
2555
2556 for (;;) {
2557 while (MA != LastMA) {
2558 Clobbers.push_back(MA);
2559 MA = cast<MemoryUseOrDef>(MA)->getDefiningAccess();
2560 }
2561 Clobbers.push_back(MA);
2562
2563 if (MSSA.isLiveOnEntryDef(MA) ||
2564 (MA->getBlock() == BB && !isa<MemoryPhi>(MA)))
2565 break;
2566
2567 // If the final clobber in the current block is a MemoryPhi, go to the
2568 // immediate dominator; otherwise, just get to the block containing the
2569 // final clobber.
2570 if (MA->getBlock() == BB)
2571 BB = DT->getNode(BB)->getIDom()->getBlock();
2572 else
2573 BB = MA->getBlock();
2574
2575 auto It = Blocks.find(BB);
2576 if (It == Blocks.end())
2577 break;
2578
2579 MA = It->second.InitialClobberMA;
2580 LastMA = It->second.ClobberMA;
2581 if (MA == Clobbers.back())
2582 Clobbers.pop_back();
2583 }
2584}
2585
2586/// Entrypoint for the MemorySSA-based redundant load elimination algorithm.
2587/// Given as input a load instruction, the function computes the set of reaching
2588/// memory values, one per predecessor path, that AnalyzeLoadAvailability can
2589/// later use to establish whether the load may be eliminated. A reaching value
2590/// may be of the following descriptor kind:
2591/// * Def: a precise instruction that produces the exact bits the load would
2592/// read (e.g., an equivalent load or a MustAlias store);
2593/// * Clobber: a write that clobbers a superset of the bits the load would read
2594/// (e.g., a memset over a larger region);
2595/// * Other: we know which block defines the memory location in some way, but
2596/// could not identify a precise instruction (e.g., memory already live at
2597/// function entry).
2598bool GVNPass::findReachingValuesForLoad(LoadInst *L,
2599 SmallVectorImpl<ReachingMemVal> &Values,
2600 MemorySSA &MSSA, AAResults &AAR) {
2601 EarliestEscapeAnalysis EA(*DT, LI);
2602 BatchAAResults AA(AAR, &EA);
2603 BasicBlock *StartBlock = L->getParent();
2604 bool IsInvariantLoad = L->hasMetadata(LLVMContext::MD_invariant_load);
2605 // TODO: Simplify later work by just getClobberingMemoryAccess().
2606 MemoryAccess *ClobberMA = MSSA.getMemoryAccess(L)->getDefiningAccess();
2607 const MemoryLocation Loc = MemoryLocation::get(L);
2608
2609 // Fast path for load tagged with !invariant.group.
2610 if (L->hasMetadata(LLVMContext::MD_invariant_group)) {
2611 if (Instruction *G = findInvariantGroupValue(L, *DT)) {
2612 Values.emplace_back(
2613 ReachingMemVal::getDef(getLoadStorePointerOperand(G), G));
2614 return true;
2615 }
2616 }
2617
2618 // Phase 1. First off, look for a local dependency to avoid having to
2619 // disambiguate between before the load and after the load of the starting
2620 // block (as the load may be visited from a backedge).
2621 do {
2622 // Scan users of the clobbering memory access.
2623 if (auto RMV = scanMemoryAccessesUsers(
2624 Loc, IsInvariantLoad, StartBlock,
2625 SmallVector<MemoryAccess *, 1>{ClobberMA}, MSSA, AA, L)) {
2626 Values.emplace_back(*RMV);
2627 return true;
2628 }
2629
2630 // Exit from here, and proceed visiting predecessors if the clobbering
2631 // access is non-local or is a MemoryPhi.
2632 if (ClobberMA->getBlock() != StartBlock || isa<MemoryPhi>(ClobberMA))
2633 break;
2634
2635 // Check if the clobber actually aliases the load location.
2636 if (auto RMV = accessMayModifyLocation(ClobberMA, Loc, IsInvariantLoad,
2637 StartBlock, MSSA, AA)) {
2638 Values.emplace_back(*RMV);
2639 return true;
2640 }
2641
2642 // It may happen that the clobbering memory access does not actually
2643 // clobber our load location, transition to its defining memory access.
2644 ClobberMA = cast<MemoryUseOrDef>(ClobberMA)->getDefiningAccess();
2645 } while (ClobberMA->getBlock() == StartBlock);
2646
2647 // Non-local speculations are not allowed under ASan.
2648 if (L->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) ||
2649 L->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress))
2650 return false;
2651
2652 // Phase 2. Walk backwards through the CFG, collecting all the blocks that
2653 // contain an instruction that modifies the load memory location, or that lie
2654 // on a path between a clobbering block and our load. Start off by collecting
2655 // the predecessors of `StartBlock`. All the visited blocks are stored in a
2656 // the set `Blocks`. If possible, the memory address maintained for the block
2657 // visited does get phi-translated.
2658 DependencyBlockSet Blocks;
2659 SmallVector<BasicBlock *, 16> InitialWorklist;
2660 const DataLayout &DL = L->getModule()->getDataLayout();
2661 if (!collectPredecessors(StartBlock,
2662 PHITransAddr(L->getPointerOperand(), DL, AC),
2663 ClobberMA, Blocks, InitialWorklist))
2664 return false;
2665
2666 // Do a bottom-up DFS.
2667 auto Worklist = InitialWorklist;
2668 while (!Worklist.empty()) {
2669 auto *BB = Worklist.pop_back_val();
2670 DependencyBlockInfo &Info = Blocks.find(BB)->second;
2671
2672 // Phi-translation may have failed.
2673 if (!Info.Addr.getAddr())
2674 continue;
2675
2676 // If the clobbering memory access is in the current block and it indeed
2677 // clobbers our load location, record the dependency and do not visit the
2678 // predecessors of this block further, continue with the blocks in the
2679 // worklist.
2680 if (Info.ClobberMA->getBlock() == BB && !isa<MemoryPhi>(Info.ClobberMA)) {
2681 if (auto RMV = accessMayModifyLocation(
2682 Info.ClobberMA, Loc.getWithNewPtr(Info.Addr.getAddr()),
2683 IsInvariantLoad, BB, MSSA, AA)) {
2684 Info.MemVal = RMV;
2685 continue;
2686 }
2687 assert(!MSSA.isLiveOnEntryDef(Info.ClobberMA) &&
2688 "LiveOnEntry aliases everything");
2689
2690 // If, however, the clobbering memory access does not actually clobber
2691 // our load location, transition to its defining memory access, but
2692 // keep examining the same basic block.
2693 Info.ClobberMA =
2694 cast<MemoryUseOrDef>(Info.ClobberMA)->getDefiningAccess();
2695 Worklist.emplace_back(BB);
2696 continue;
2697 }
2698
2699 // At this point we know the current block is "transparent", i.e. the memory
2700 // location is not modified when execution goes through this block.
2701 // Continue to its predecessors, unless a predecessor has already been
2702 // visited with a different address. We currently cannot represent such a
2703 // dependency.
2704 if (BB == StartBlock && Info.Addr.getAddr() != L->getPointerOperand()) {
2705 Info.ForceUnknown = true;
2706 continue;
2707 }
2708 if (BB != StartBlock &&
2709 !collectPredecessors(BB, Info.Addr, Info.ClobberMA, Blocks, Worklist))
2710 Info.ForceUnknown = true;
2711 }
2712
2713 // Phase 3. We have collected all the blocks that either write a value to the
2714 // memory location of the load, or there exists a path to the load, along
2715 // which the memory location is not modified. Perform a second DFS to find
2716 // load-to-load dependencies; namely, look at the dominating memory reads,
2717 // that alias our load. These are the MemoryUses that are users of the
2718 // MemoryDefs we previously identified. If no memory read is encountered,
2719 // either confirm the clobbering write found before or set to unknown.
2720 Worklist = InitialWorklist;
2721 for (BasicBlock *BB : Worklist) {
2722 DependencyBlockInfo &Info = Blocks.find(BB)->second;
2723 Info.Visited = true;
2724 }
2725
2727 while (!Worklist.empty()) {
2728 auto *BB = Worklist.pop_back_val();
2729 DependencyBlockInfo &Info = Blocks.find(BB)->second;
2730
2731 // If phi-translation failed, assume the memory location is modified in
2732 // unknown way.
2733 if (!Info.Addr.getAddr()) {
2734 Values.push_back(ReachingMemVal::getUnknown(BB, nullptr));
2735 continue;
2736 }
2737
2738 Clobbers.clear();
2739 collectClobberList(Clobbers, BB, Info, Blocks, MSSA);
2740 if (auto RMV =
2741 scanMemoryAccessesUsers(Loc.getWithNewPtr(Info.Addr.getAddr()),
2742 IsInvariantLoad, BB, Clobbers, MSSA, AA)) {
2743 Values.push_back(*RMV);
2744 continue;
2745 }
2746
2747 // If no reusable memory use was found, and the current block is not
2748 // transparent, use the already established memory def.
2749 if (Info.MemVal) {
2750 Values.push_back(*Info.MemVal);
2751 continue;
2752 }
2753
2754 if (Info.ForceUnknown) {
2755 Values.push_back(ReachingMemVal::getUnknown(BB, Info.Addr.getAddr()));
2756 continue;
2757 }
2758
2759 // If the current block is transparent, continue to its predecessors.
2760 for (BasicBlock *Pred : predecessors(BB)) {
2761 auto It = Blocks.find(Pred);
2762 if (It == Blocks.end())
2763 continue;
2764 DependencyBlockInfo &PredInfo = It->second;
2765 if (PredInfo.Visited)
2766 continue;
2767 PredInfo.Visited = true;
2768 Worklist.push_back(Pred);
2769 }
2770 }
2771
2772 return true;
2773}
2774
2775/// Attempt to eliminate a load, first by eliminating it
2776/// locally, and then attempting non-local elimination if that fails.
2777bool GVNPass::processLoad(LoadInst *L) {
2778 if (!MD && !isMemorySSAEnabled())
2779 return false;
2780
2781 // This code hasn't been audited for ordered or volatile memory access.
2782 if (!L->isUnordered())
2783 return false;
2784
2785 if (L->getType()->isTokenLikeTy())
2786 return false;
2787
2788 if (L->use_empty()) {
2790 return true;
2791 }
2792
2793 ReachingMemVal MemVal = ReachingMemVal::getUnknown(nullptr, nullptr);
2794 if (!isMemorySSAEnabled()) {
2795 // ... to a pointer that has been loaded from before...
2796 MemDepResult Dep = MD->getDependency(L);
2797
2798 // If it is defined in another block, try harder.
2799 if (Dep.isNonLocal())
2800 return processNonLocalLoad(L);
2801
2802 // Only handle the local case below.
2803 if (Dep.isDef())
2804 MemVal = ReachingMemVal::getDef(L->getPointerOperand(), Dep.getInst());
2805 else if (Dep.isClobber())
2806 MemVal =
2807 ReachingMemVal::getClobber(L->getPointerOperand(), Dep.getInst());
2808 } else {
2810 if (!findReachingValuesForLoad(L, MemVals, *MSSAU->getMemorySSA(), *AA))
2811 return false; // Too many dependencies.
2812 assert(MemVals.size() && "Expected at least an unknown value");
2813 if (MemVals.size() > 1 || MemVals[0].Block != L->getParent())
2814 return processNonLocalLoad(L, MemVals);
2815
2816 MemVal = MemVals[0];
2817 }
2818
2819 if (MemVal.Kind == DepKind::Other) {
2820 // This might be a NonFuncLocal or an Unknown.
2821 LLVM_DEBUG(
2822 // fast print dep, using operator<< on instruction is too slow.
2823 dbgs() << "GVN: load "; L->printAsOperand(dbgs());
2824 dbgs() << " has unknown dependence\n";);
2825 return false;
2826 }
2827
2828 auto AV = AnalyzeLoadAvailability(L, MemVal, L->getPointerOperand());
2829 if (!AV)
2830 return false;
2831
2832 Value *AvailableValue = AV->MaterializeAdjustedValue(L, L);
2833
2834 // MaterializeAdjustedValue is responsible for combining metadata.
2835 ICF->removeUsersOf(L);
2836 L->replaceAllUsesWith(AvailableValue);
2837 if (MSSAU)
2838 MSSAU->removeMemoryAccess(L);
2839 ++NumGVNLoad;
2840 reportLoadElim(L, AvailableValue, ORE);
2842 // Tell MDA to reexamine the reused pointer since we might have more
2843 // information after forwarding it.
2844 if (MD && AvailableValue->getType()->isPtrOrPtrVectorTy())
2845 MD->invalidateCachedPointerInfo(AvailableValue);
2846 return true;
2847}
2848
2849// Attempt to process masked loads which have loaded from
2850// masked stores with the same mask
2851bool GVNPass::processMaskedLoad(IntrinsicInst *I) {
2852 if (!MD)
2853 return false;
2854 MemDepResult Dep = MD->getDependency(I);
2855 Instruction *DepInst = Dep.getInst();
2856 if (!DepInst || !Dep.isLocal() || !Dep.isDef())
2857 return false;
2858
2859 Value *Mask = I->getOperand(1);
2860 Value *Passthrough = I->getOperand(2);
2861 Value *StoreVal;
2862 if (!match(DepInst,
2863 m_MaskedStore(m_Value(StoreVal), m_Value(), m_Specific(Mask))) ||
2864 StoreVal->getType() != I->getType())
2865 return false;
2866
2867 // Remove the load but generate a select for the passthrough
2868 Value *OpToForward = llvm::SelectInst::Create(Mask, StoreVal, Passthrough, "",
2869 I->getIterator());
2870
2871 ICF->removeUsersOf(I);
2872 I->replaceAllUsesWith(OpToForward);
2874 ++NumGVNLoad;
2875 return true;
2876}
2877
2878/// Return a pair the first field showing the value number of \p Exp and the
2879/// second field showing whether it is a value number newly created.
2880std::pair<uint32_t, bool>
2881GVNPass::ValueTable::assignExpNewValueNum(Expression &Exp) {
2882 uint32_t &E = ExpressionNumbering[Exp];
2883 bool CreateNewValNum = !E;
2884 if (CreateNewValNum) {
2885 Expressions.push_back(Exp);
2886 if (ExprIdx.size() < NextValueNumber + 1)
2887 ExprIdx.resize(NextValueNumber * 2);
2888 E = NextValueNumber;
2889 ExprIdx[NextValueNumber++] = NextExprNumber++;
2890 }
2891 return {E, CreateNewValNum};
2892}
2893
2894/// Return whether all the values related with the same \p num are
2895/// defined in \p BB.
2896bool GVNPass::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB,
2897 GVNPass &GVN) {
2898 return all_of(
2899 GVN.LeaderTable.getLeaders(Num),
2900 [=](const LeaderMap::LeaderTableEntry &L) { return L.BB == BB; });
2901}
2902
2903/// Wrap phiTranslateImpl to provide caching functionality.
2904uint32_t GVNPass::ValueTable::phiTranslate(const BasicBlock *Pred,
2905 const BasicBlock *PhiBlock,
2906 uint32_t Num, GVNPass &GVN) {
2907 auto FindRes = PhiTranslateTable.find({Num, Pred});
2908 if (FindRes != PhiTranslateTable.end())
2909 return FindRes->second;
2910 uint32_t NewNum = phiTranslateImpl(Pred, PhiBlock, Num, GVN);
2911 PhiTranslateTable.insert({{Num, Pred}, NewNum});
2912 return NewNum;
2913}
2914
2915// Return true if the value number \p Num and NewNum have equal value.
2916// Return false if the result is unknown.
2917bool GVNPass::ValueTable::areCallValsEqual(uint32_t Num, uint32_t NewNum,
2918 const BasicBlock *Pred,
2919 const BasicBlock *PhiBlock,
2920 GVNPass &GVN) {
2921 CallInst *Call = nullptr;
2922 auto Leaders = GVN.LeaderTable.getLeaders(Num);
2923 for (const auto &Entry : Leaders) {
2924 Call = dyn_cast<CallInst>(&*Entry.Val);
2925 if (Call && Call->getParent() == PhiBlock)
2926 break;
2927 }
2928
2929 if (AA->doesNotAccessMemory(Call))
2930 return true;
2931
2932 if (!MD || !AA->onlyReadsMemory(Call))
2933 return false;
2934
2935 MemDepResult LocalDep = MD->getDependency(Call);
2936 if (!LocalDep.isNonLocal())
2937 return false;
2938
2941
2942 // Check to see if the Call has no function local clobber.
2943 for (const NonLocalDepEntry &D : Deps) {
2944 if (D.getResult().isNonFuncLocal())
2945 return true;
2946 }
2947 return false;
2948}
2949
2950/// Translate value number \p Num using phis, so that it has the values of
2951/// the phis in BB.
2952uint32_t GVNPass::ValueTable::phiTranslateImpl(const BasicBlock *Pred,
2953 const BasicBlock *PhiBlock,
2954 uint32_t Num, GVNPass &GVN) {
2955 // See if we can refine the value number by looking at the PN incoming value
2956 // for the given predecessor.
2957 if (PHINode *PN = NumberingPhi[Num]) {
2958 if (PN->getParent() != PhiBlock)
2959 return Num;
2960 for (unsigned I = 0; I != PN->getNumIncomingValues(); ++I) {
2961 if (PN->getIncomingBlock(I) != Pred)
2962 continue;
2963 if (uint32_t TransVal = lookup(PN->getIncomingValue(I), false))
2964 return TransVal;
2965 }
2966 return Num;
2967 }
2968
2969 if (BasicBlock *BB = NumberingBB[Num]) {
2970 assert(MSSA && "NumberingBB is non-empty only when using MemorySSA");
2971 // Value numbers of basic blocks are used to represent memory state in
2972 // load/store instructions and read-only function calls when said state is
2973 // set by a MemoryPhi.
2974 if (BB != PhiBlock)
2975 return Num;
2976 MemoryPhi *MPhi = MSSA->getMemoryAccess(BB);
2977 for (unsigned i = 0, N = MPhi->getNumIncomingValues(); i != N; ++i) {
2978 if (MPhi->getIncomingBlock(i) != Pred)
2979 continue;
2980 MemoryAccess *MA = MPhi->getIncomingValue(i);
2981 if (auto *PredPhi = dyn_cast<MemoryPhi>(MA))
2982 return lookupOrAdd(PredPhi->getBlock());
2983 if (MSSA->isLiveOnEntryDef(MA))
2984 return lookupOrAdd(&BB->getParent()->getEntryBlock());
2985 return lookupOrAdd(cast<MemoryUseOrDef>(MA)->getMemoryInst());
2986 }
2988 "CFG/MemorySSA mismatch: predecessor not found among incoming blocks");
2989 }
2990
2991 // If there is any value related with Num is defined in a BB other than
2992 // PhiBlock, it cannot depend on a phi in PhiBlock without going through
2993 // a backedge. We can do an early exit in that case to save compile time.
2994 if (!areAllValsInBB(Num, PhiBlock, GVN))
2995 return Num;
2996
2997 if (Num >= ExprIdx.size() || ExprIdx[Num] == 0)
2998 return Num;
2999 Expression Exp = Expressions[ExprIdx[Num]];
3000
3001 for (unsigned I = 0; I < Exp.VarArgs.size(); I++) {
3002 // For InsertValue and ExtractValue, some varargs are index numbers
3003 // instead of value numbers. Those index numbers should not be
3004 // translated.
3005 if ((I > 1 && Exp.Opcode == Instruction::InsertValue) ||
3006 (I > 0 && Exp.Opcode == Instruction::ExtractValue) ||
3007 (I > 1 && Exp.Opcode == Instruction::ShuffleVector))
3008 continue;
3009 Exp.VarArgs[I] = phiTranslate(Pred, PhiBlock, Exp.VarArgs[I], GVN);
3010 }
3011
3012 if (Exp.Commutative) {
3013 assert(Exp.VarArgs.size() >= 2 && "Unsupported commutative instruction!");
3014 if (Exp.VarArgs[0] > Exp.VarArgs[1]) {
3015 std::swap(Exp.VarArgs[0], Exp.VarArgs[1]);
3016 uint32_t Opcode = Exp.Opcode >> 8;
3017 if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp)
3018 Exp.Opcode = (Opcode << 8) |
3020 static_cast<CmpInst::Predicate>(Exp.Opcode & 255));
3021 }
3022 }
3023
3024 if (uint32_t NewNum = ExpressionNumbering[Exp]) {
3025 if (Exp.Opcode == Instruction::Call && NewNum != Num)
3026 return areCallValsEqual(Num, NewNum, Pred, PhiBlock, GVN) ? NewNum : Num;
3027 return NewNum;
3028 }
3029 return Num;
3030}
3031
3032/// Erase stale entry from phiTranslate cache so phiTranslate can be computed
3033/// again.
3034void GVNPass::ValueTable::eraseTranslateCacheEntry(
3035 uint32_t Num, const BasicBlock &CurrBlock) {
3036 for (const BasicBlock *Pred : predecessors(&CurrBlock))
3037 PhiTranslateTable.erase({Num, Pred});
3038}
3039
3040// In order to find a leader for a given value number at a
3041// specific basic block, we first obtain the list of all Values for that number,
3042// and then scan the list to find one whose block dominates the block in
3043// question. This is fast because dominator tree queries consist of only
3044// a few comparisons of DFS numbers.
3045Value *GVNPass::findLeader(const BasicBlock *BB, uint32_t Num) {
3046 auto Leaders = LeaderTable.getLeaders(Num);
3047 if (Leaders.empty())
3048 return nullptr;
3049
3050 Value *Val = nullptr;
3051 for (const auto &Entry : Leaders) {
3052 if (DT->dominates(Entry.BB, BB)) {
3053 Val = Entry.Val;
3054 if (isa<Constant>(Val))
3055 return Val;
3056 }
3057 }
3058
3059 return Val;
3060}
3061
3062/// There is an edge from 'Src' to 'Dst'. Return
3063/// true if every path from the entry block to 'Dst' passes via this edge. In
3064/// particular 'Dst' must not be reachable via another edge from 'Src'.
3066 DominatorTree *DT) {
3067 // While in theory it is interesting to consider the case in which Dst has
3068 // more than one predecessor, because Dst might be part of a loop which is
3069 // only reachable from Src, in practice it is pointless since at the time
3070 // GVN runs all such loops have preheaders, which means that Dst will have
3071 // been changed to have only one predecessor, namely Src.
3072 const BasicBlock *Pred = E.getEnd()->getSinglePredecessor();
3073 assert((!Pred || Pred == E.getStart()) &&
3074 "No edge between these basic blocks!");
3075 return Pred != nullptr;
3076}
3077
3078void GVNPass::assignBlockRPONumber(Function &F) {
3079 BlockRPONumber.clear();
3080 uint32_t NextBlockNumber = 1;
3081 ReversePostOrderTraversal<Function *> RPOT(&F);
3082 for (BasicBlock *BB : RPOT)
3083 BlockRPONumber[BB] = NextBlockNumber++;
3084 InvalidBlockRPONumbers = false;
3085}
3086
3087/// The given values are known to be equal in every use
3088/// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with
3089/// 'RHS' everywhere in the scope. Returns whether a change was made.
3090/// The Root may either be a basic block edge (for conditions) or an
3091/// instruction (for assumes).
3092bool GVNPass::propagateEquality(
3093 Value *LHS, Value *RHS,
3094 const std::variant<BasicBlockEdge, Instruction *> &Root) {
3096 Worklist.push_back(std::make_pair(LHS, RHS));
3097 bool Changed = false;
3098 SmallVector<const BasicBlock *> DominatedBlocks;
3099 if (const BasicBlockEdge *Edge = std::get_if<BasicBlockEdge>(&Root)) {
3100 // For speed, compute a conservative fast approximation to
3101 // DT->dominates(Root, Root.getEnd());
3103 DominatedBlocks.push_back(Edge->getEnd());
3104 } else {
3105 Instruction *I = std::get<Instruction *>(Root);
3106 for (const auto *Node : DT->getNode(I->getParent())->children())
3107 DominatedBlocks.push_back(Node->getBlock());
3108 }
3109
3110 while (!Worklist.empty()) {
3111 std::pair<Value*, Value*> Item = Worklist.pop_back_val();
3112 LHS = Item.first; RHS = Item.second;
3113
3114 if (LHS == RHS)
3115 continue;
3116 assert(LHS->getType() == RHS->getType() && "Equality but unequal types!");
3117
3118 // Don't try to propagate equalities between constants.
3120 continue;
3121
3122 // Prefer a constant on the right-hand side, or an Argument if no constants.
3124 std::swap(LHS, RHS);
3125 assert((isa<Argument>(LHS) || isa<Instruction>(LHS)) && "Unexpected value!");
3126 const DataLayout &DL =
3128 ? cast<Argument>(LHS)->getParent()->getDataLayout()
3129 : cast<Instruction>(LHS)->getDataLayout();
3130
3131 // If there is no obvious reason to prefer the left-hand side over the
3132 // right-hand side, ensure the longest lived term is on the right-hand side,
3133 // so the shortest lived term will be replaced by the longest lived.
3134 // This tends to expose more simplifications.
3135 uint32_t LVN = VN.lookupOrAdd(LHS);
3136 if ((isa<Argument>(LHS) && isa<Argument>(RHS)) ||
3138 // Move the 'oldest' value to the right-hand side, using the value number
3139 // as a proxy for age.
3140 uint32_t RVN = VN.lookupOrAdd(RHS);
3141 if (LVN < RVN) {
3142 std::swap(LHS, RHS);
3143 LVN = RVN;
3144 }
3145 }
3146
3147 // If value numbering later sees that an instruction in the scope is equal
3148 // to 'LHS' then ensure it will be turned into 'RHS'. In order to preserve
3149 // the invariant that instructions only occur in the leader table for their
3150 // own value number (this is used by removeFromLeaderTable), do not do this
3151 // if RHS is an instruction (if an instruction in the scope is morphed into
3152 // LHS then it will be turned into RHS by the next GVN iteration anyway, so
3153 // using the leader table is about compiling faster, not optimizing better).
3154 // The leader table only tracks basic blocks, not edges. Only add to if we
3155 // have the simple case where the edge dominates the end.
3157 for (const BasicBlock *BB : DominatedBlocks)
3158 LeaderTable.insert(LVN, RHS, BB);
3159
3160 // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope. As
3161 // LHS always has at least one use that is not dominated by Root, this will
3162 // never do anything if LHS has only one use.
3163 if (!LHS->hasOneUse()) {
3164 // Create a callback that captures the DL.
3165 auto CanReplacePointersCallBack = [&DL](const Use &U, const Value *To) {
3166 return canReplacePointersInUseIfEqual(U, To, DL);
3167 };
3168 unsigned NumReplacements;
3169 if (const BasicBlockEdge *Edge = std::get_if<BasicBlockEdge>(&Root))
3170 NumReplacements = replaceDominatedUsesWithIf(
3171 LHS, RHS, *DT, *Edge, CanReplacePointersCallBack);
3172 else
3173 NumReplacements = replaceDominatedUsesWithIf(
3174 LHS, RHS, *DT, std::get<Instruction *>(Root),
3175 CanReplacePointersCallBack);
3176
3177 if (NumReplacements > 0) {
3178 Changed = true;
3179 NumGVNEqProp += NumReplacements;
3180 // Cached information for anything that uses LHS will be invalid.
3181 if (MD)
3182 MD->invalidateCachedPointerInfo(LHS);
3183 }
3184 }
3185
3186 // Now try to deduce additional equalities from this one. For example, if
3187 // the known equality was "(A != B)" == "false" then it follows that A and B
3188 // are equal in the scope. Only boolean equalities with an explicit true or
3189 // false RHS are currently supported.
3190 if (!RHS->getType()->isIntegerTy(1))
3191 // Not a boolean equality - bail out.
3192 continue;
3193 ConstantInt *CI = dyn_cast<ConstantInt>(RHS);
3194 if (!CI)
3195 // RHS neither 'true' nor 'false' - bail out.
3196 continue;
3197 // Whether RHS equals 'true'. Otherwise it equals 'false'.
3198 bool IsKnownTrue = CI->isMinusOne();
3199 bool IsKnownFalse = !IsKnownTrue;
3200
3201 // If "A && B" is known true then both A and B are known true. If "A || B"
3202 // is known false then both A and B are known false.
3203 Value *A, *B;
3204 if ((IsKnownTrue && match(LHS, m_LogicalAnd(m_Value(A), m_Value(B)))) ||
3205 (IsKnownFalse && match(LHS, m_LogicalOr(m_Value(A), m_Value(B))))) {
3206 Worklist.push_back(std::make_pair(A, RHS));
3207 Worklist.push_back(std::make_pair(B, RHS));
3208 continue;
3209 }
3210
3211 // If we are propagating an equality like "(A == B)" == "true" then also
3212 // propagate the equality A == B. When propagating a comparison such as
3213 // "(A >= B)" == "true", replace all instances of "A < B" with "false".
3214 if (CmpInst *Cmp = dyn_cast<CmpInst>(LHS)) {
3215 Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1);
3216
3217 // If "A == B" is known true, or "A != B" is known false, then replace
3218 // A with B everywhere in the scope. For floating point operations, we
3219 // have to be careful since equality does not always imply equivalance.
3220 if (Cmp->isEquivalence(IsKnownFalse))
3221 Worklist.push_back(std::make_pair(Op0, Op1));
3222
3223 // If "A >= B" is known true, replace "A < B" with false everywhere.
3224 CmpInst::Predicate NotPred = Cmp->getInversePredicate();
3225 Constant *NotVal = ConstantInt::get(Cmp->getType(), IsKnownFalse);
3226 // Since we don't have the instruction "A < B" immediately to hand, work
3227 // out the value number that it would have and use that to find an
3228 // appropriate instruction (if any).
3229 uint32_t NextNum = VN.getNextUnusedValueNumber();
3230 uint32_t Num = VN.lookupOrAddCmp(Cmp->getOpcode(), NotPred, Op0, Op1);
3231 // If the number we were assigned was brand new then there is no point in
3232 // looking for an instruction realizing it: there cannot be one!
3233 if (Num < NextNum) {
3234 for (const auto &Entry : LeaderTable.getLeaders(Num)) {
3235 // Only look at leaders that either dominate the start of the edge,
3236 // or are dominated by the end. This check is not necessary for
3237 // correctness, it only discards cases for which the following
3238 // use replacement will not work anyway.
3239 if (const BasicBlockEdge *Edge = std::get_if<BasicBlockEdge>(&Root)) {
3240 if (!DT->dominates(Entry.BB, Edge->getStart()) &&
3241 !DT->dominates(Edge->getEnd(), Entry.BB))
3242 continue;
3243 } else {
3244 auto *InstBB = std::get<Instruction *>(Root)->getParent();
3245 if (!DT->dominates(Entry.BB, InstBB) &&
3246 !DT->dominates(InstBB, Entry.BB))
3247 continue;
3248 }
3249
3250 Value *NotCmp = Entry.Val;
3251 if (NotCmp && isa<Instruction>(NotCmp)) {
3252 unsigned NumReplacements;
3253 if (const BasicBlockEdge *Edge = std::get_if<BasicBlockEdge>(&Root))
3254 NumReplacements =
3255 replaceDominatedUsesWith(NotCmp, NotVal, *DT, *Edge);
3256 else
3257 NumReplacements = replaceDominatedUsesWith(
3258 NotCmp, NotVal, *DT, std::get<Instruction *>(Root));
3259 Changed |= NumReplacements > 0;
3260 NumGVNEqProp += NumReplacements;
3261 // Cached information for anything that uses NotCmp will be invalid.
3262 if (MD)
3263 MD->invalidateCachedPointerInfo(NotCmp);
3264 }
3265 }
3266 }
3267 // Ensure that any instruction in scope that gets the "A < B" value number
3268 // is replaced with false.
3269 // The leader table only tracks basic blocks, not edges. Only add to if we
3270 // have the simple case where the edge dominates the end.
3271 for (const BasicBlock *BB : DominatedBlocks)
3272 LeaderTable.insert(Num, NotVal, BB);
3273
3274 continue;
3275 }
3276
3277 // Propagate equalities that results from truncation with no unsigned wrap
3278 // like (trunc nuw i64 %v to i1) == "true" or (trunc nuw i64 %v to i1) ==
3279 // "false"
3280 if (match(LHS, m_NUWTrunc(m_Value(A)))) {
3281 Worklist.emplace_back(A, ConstantInt::get(A->getType(), IsKnownTrue));
3282 continue;
3283 }
3284
3285 if (match(LHS, m_Not(m_Value(A)))) {
3286 Worklist.emplace_back(A, ConstantInt::get(A->getType(), !IsKnownTrue));
3287 continue;
3288 }
3289 }
3290
3291 return Changed;
3292}
3293
3294/// When calculating availability, handle an instruction
3295/// by inserting it into the appropriate sets.
3296bool GVNPass::processInstruction(Instruction *I) {
3297 // If the instruction can be easily simplified then do so now in preference
3298 // to value numbering it. Value numbering often exposes redundancies, for
3299 // example if it determines that %y is equal to %x then the instruction
3300 // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
3301 const DataLayout &DL = I->getDataLayout();
3302 if (Value *V = simplifyInstruction(I, {DL, TLI, DT, AC})) {
3303 bool Changed = false;
3304 if (!I->use_empty()) {
3305 // Simplification can cause a special instruction to become not special.
3306 // For example, devirtualization to a willreturn function.
3307 ICF->removeUsersOf(I);
3308 I->replaceAllUsesWith(V);
3309 Changed = true;
3310 }
3311 if (isInstructionTriviallyDead(I, TLI)) {
3313 Changed = true;
3314 }
3315 if (Changed) {
3316 if (MD && V->getType()->isPtrOrPtrVectorTy())
3317 MD->invalidateCachedPointerInfo(V);
3318 ++NumGVNSimpl;
3319 return true;
3320 }
3321 }
3322
3323 if (auto *Assume = dyn_cast<AssumeInst>(I))
3324 return processAssumeIntrinsic(Assume);
3325
3326 if (LoadInst *Load = dyn_cast<LoadInst>(I)) {
3327 if (processLoad(Load))
3328 return true;
3329
3330 unsigned Num = VN.lookupOrAdd(Load);
3331 LeaderTable.insert(Num, Load, Load->getParent());
3332 return false;
3333 }
3334
3336 processMaskedLoad(cast<IntrinsicInst>(I)))
3337 return true;
3338
3339 // For conditional branches, we can perform simple conditional propagation on
3340 // the condition value itself.
3341 if (CondBrInst *BI = dyn_cast<CondBrInst>(I)) {
3342 if (isa<Constant>(BI->getCondition()))
3343 return processFoldableCondBr(BI);
3344
3345 Value *BranchCond = BI->getCondition();
3346 BasicBlock *TrueSucc = BI->getSuccessor(0);
3347 BasicBlock *FalseSucc = BI->getSuccessor(1);
3348 // Avoid multiple edges early.
3349 if (TrueSucc == FalseSucc)
3350 return false;
3351
3352 BasicBlock *Parent = BI->getParent();
3353 bool Changed = false;
3354
3356 BasicBlockEdge TrueE(Parent, TrueSucc);
3357 Changed |= propagateEquality(BranchCond, TrueVal, TrueE);
3358
3360 BasicBlockEdge FalseE(Parent, FalseSucc);
3361 Changed |= propagateEquality(BranchCond, FalseVal, FalseE);
3362
3363 return Changed;
3364 }
3365
3366 // For switches, propagate the case values into the case destinations.
3367 if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
3368 Value *SwitchCond = SI->getCondition();
3369 BasicBlock *Parent = SI->getParent();
3370 bool Changed = false;
3371
3372 // Remember how many outgoing edges there are to every successor.
3373 SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges;
3374 for (BasicBlock *Succ : successors(Parent))
3375 ++SwitchEdges[Succ];
3376
3377 for (const auto &Case : SI->cases()) {
3378 BasicBlock *Dst = Case.getCaseSuccessor();
3379 // If there is only a single edge, propagate the case value into it.
3380 if (SwitchEdges.lookup(Dst) == 1) {
3381 BasicBlockEdge E(Parent, Dst);
3382 Changed |= propagateEquality(SwitchCond, Case.getCaseValue(), E);
3383 }
3384 }
3385 return Changed;
3386 }
3387
3388 // Instructions with void type don't return a value, so there's
3389 // no point in trying to find redundancies in them.
3390 if (I->getType()->isVoidTy())
3391 return false;
3392
3393 uint32_t NextNum = VN.getNextUnusedValueNumber();
3394 unsigned Num = VN.lookupOrAdd(I);
3395
3396 // Allocations are always uniquely numbered, so we can save time and memory
3397 // by fast failing them.
3398 if (isa<AllocaInst>(I) || I->isTerminator() || isa<PHINode>(I)) {
3399 LeaderTable.insert(Num, I, I->getParent());
3400 return false;
3401 }
3402
3403 // If the number we were assigned was a brand new VN, then we don't
3404 // need to do a lookup to see if the number already exists
3405 // somewhere in the domtree: it can't!
3406 if (Num >= NextNum) {
3407 LeaderTable.insert(Num, I, I->getParent());
3408 return false;
3409 }
3410
3411 // Perform fast-path value-number based elimination of values inherited from
3412 // dominators.
3413 Value *Repl = findLeader(I->getParent(), Num);
3414 if (!Repl) {
3415 // Failure, just remember this instance for future use.
3416 LeaderTable.insert(Num, I, I->getParent());
3417 return false;
3418 }
3419
3420 if (Repl == I) {
3421 // If I was the result of a shortcut PRE, it might already be in the table
3422 // and the best replacement for itself. Nothing to do.
3423 return false;
3424 }
3425
3426 // Remove it!
3428 if (MD && Repl->getType()->isPtrOrPtrVectorTy())
3429 MD->invalidateCachedPointerInfo(Repl);
3431 return true;
3432}
3433
3434/// runOnFunction - This is the main transformation entry point for a function.
3435bool GVNPass::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
3436 const TargetLibraryInfo &RunTLI, AAResults &RunAA,
3437 MemoryDependenceResults *RunMD, LoopInfo &LI,
3438 OptimizationRemarkEmitter *RunORE, MemorySSA *MSSA) {
3439 AC = &RunAC;
3440 DT = &RunDT;
3441 VN.setDomTree(DT);
3442 TLI = &RunTLI;
3443 AA = &RunAA;
3444 VN.setAliasAnalysis(&RunAA);
3445 MD = RunMD;
3446 ImplicitControlFlowTracking ImplicitCFT;
3447 ICF = &ImplicitCFT;
3448 this->LI = &LI;
3449 VN.setMemDep(MD);
3450 // Propagate the MSSA-enabled flag so the value-numbering paths in
3451 // lookupOrAddCall() and computeLoadStoreVN(), which depends on whether
3452 // IsMSSAEnabled is turned on.
3453 VN.setMemorySSA(MSSA, isMemorySSAEnabled());
3454 ORE = RunORE;
3455 InvalidBlockRPONumbers = true;
3456 MemorySSAUpdater Updater(MSSA);
3457 MSSAU = MSSA ? &Updater : nullptr;
3458
3459 bool Changed = false;
3460 bool ShouldContinue = true;
3461
3462 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
3463 // Merge unconditional branches, allowing PRE to catch more
3464 // optimization opportunities.
3465 for (BasicBlock &BB : make_early_inc_range(F)) {
3466 bool RemovedBlock = MergeBlockIntoPredecessor(&BB, &DTU, &LI, MSSAU, MD);
3467 if (RemovedBlock)
3468 ++NumGVNBlocks;
3469
3470 Changed |= RemovedBlock;
3471 }
3472 DTU.flush();
3473
3474 unsigned Iteration = 0;
3475 while (ShouldContinue) {
3476 LLVM_DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n");
3477 (void) Iteration;
3478 ShouldContinue = iterateOnFunction(F);
3479 Changed |= ShouldContinue;
3480 ++Iteration;
3481 }
3482
3483 if (isScalarPREEnabled()) {
3484 // Fabricate val-num for dead-code in order to suppress assertion in
3485 // performPRE().
3486 assignValNumForDeadCode();
3487 bool PREChanged = true;
3488 while (PREChanged) {
3489 PREChanged = performPRE(F);
3490 Changed |= PREChanged;
3491 }
3492 }
3493
3494 // FIXME: Should perform GVN again after PRE does something. PRE can move
3495 // computations into blocks where they become fully redundant. Note that
3496 // we can't do this until PRE's critical edge splitting updates memdep.
3497 // Actually, when this happens, we should just fully integrate PRE into GVN.
3498
3499 cleanupGlobalSets();
3500 // Do not cleanup DeadBlocks in cleanupGlobalSets() as it's called for each
3501 // iteration.
3502 DeadBlocks.clear();
3503
3504 if (MSSA && VerifyMemorySSA)
3505 MSSA->verifyMemorySSA();
3506
3507 return Changed;
3508}
3509
3510bool GVNPass::processBlock(BasicBlock *BB) {
3511 if (DeadBlocks.count(BB))
3512 return false;
3513
3514 bool ChangedFunction = false;
3515
3516 // Since we may not have visited the input blocks of the phis, we can't
3517 // use our normal hash approach for phis. Instead, simply look for
3518 // obvious duplicates. The first pass of GVN will tend to create
3519 // identical phis, and the second or later passes can eliminate them.
3520 SmallPtrSet<PHINode *, 8> PHINodesToRemove;
3521 ChangedFunction |= EliminateDuplicatePHINodes(BB, PHINodesToRemove);
3522 for (PHINode *PN : PHINodesToRemove) {
3523 removeInstruction(PN);
3524 }
3525 for (Instruction &Inst : make_early_inc_range(*BB))
3526 ChangedFunction |= processInstruction(&Inst);
3527 return ChangedFunction;
3528}
3529
3530// Instantiate an expression in a predecessor that lacked it.
3531bool GVNPass::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
3532 BasicBlock *Curr, unsigned int ValNo) {
3533 // Because we are going top-down through the block, all value numbers
3534 // will be available in the predecessor by the time we need them. Any
3535 // that weren't originally present will have been instantiated earlier
3536 // in this loop.
3537 bool Success = true;
3538 for (unsigned I = 0, E = Instr->getNumOperands(); I != E; ++I) {
3539 Value *Op = Instr->getOperand(I);
3541 continue;
3542 // This could be a newly inserted instruction, in which case, we won't
3543 // find a value number, and should give up before we hurt ourselves.
3544 // FIXME: Rewrite the infrastructure to let it easier to value number
3545 // and process newly inserted instructions.
3546 if (!VN.exists(Op)) {
3547 Success = false;
3548 break;
3549 }
3550 uint32_t TValNo =
3551 VN.phiTranslate(Pred, Curr, VN.lookup(Op), *this);
3552 if (Value *V = findLeader(Pred, TValNo)) {
3553 Instr->setOperand(I, V);
3554 } else {
3555 Success = false;
3556 break;
3557 }
3558 }
3559
3560 // Fail out if we encounter an operand that is not available in
3561 // the PRE predecessor. This is typically because of loads which
3562 // are not value numbered precisely.
3563 if (!Success)
3564 return false;
3565
3566 Instr->insertBefore(Pred->getTerminator()->getIterator());
3567 Instr->setName(Instr->getName() + ".pre");
3568 Instr->setDebugLoc(Instr->getDebugLoc());
3569
3570 ICF->insertInstructionTo(Instr, Pred);
3571
3572 unsigned Num = VN.lookupOrAdd(Instr);
3573 VN.add(Instr, Num);
3574
3575 // Update the availability map to include the new instruction.
3576 LeaderTable.insert(Num, Instr, Pred);
3577 return true;
3578}
3579
3580bool GVNPass::performScalarPRE(Instruction *CurInst) {
3581 if (isa<AllocaInst>(CurInst) || CurInst->isTerminator() ||
3582 isa<PHINode>(CurInst) || CurInst->getType()->isVoidTy() ||
3583 CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
3584 CurInst->getType()->isTokenLikeTy())
3585 return false;
3586
3587 // Don't do PRE on compares. The PHI would prevent CodeGenPrepare from
3588 // sinking the compare again, and it would force the code generator to
3589 // move the i1 from processor flags or predicate registers into a general
3590 // purpose register.
3591 if (isa<CmpInst>(CurInst))
3592 return false;
3593
3594 // Don't do PRE on GEPs. The inserted PHI would prevent CodeGenPrepare from
3595 // sinking the addressing mode computation back to its uses. Extending the
3596 // GEP's live range increases the register pressure, and therefore it can
3597 // introduce unnecessary spills.
3598 //
3599 // This doesn't prevent Load PRE. PHI translation will make the GEP available
3600 // to the load by moving it to the predecessor block if necessary.
3601 if (isa<GetElementPtrInst>(CurInst))
3602 return false;
3603
3604 if (auto *CallB = dyn_cast<CallBase>(CurInst)) {
3605 // We don't currently value number ANY inline asm calls.
3606 if (CallB->isInlineAsm())
3607 return false;
3608 }
3609
3610 uint32_t ValNo = VN.lookup(CurInst);
3611
3612 // Look for the predecessors for PRE opportunities. We're
3613 // only trying to solve the basic diamond case, where
3614 // a value is computed in the successor and one predecessor,
3615 // but not the other. We also explicitly disallow cases
3616 // where the successor is its own predecessor, because they're
3617 // more complicated to get right.
3618 unsigned NumWith = 0;
3619 unsigned NumWithout = 0;
3620 BasicBlock *PREPred = nullptr;
3621 BasicBlock *CurrentBlock = CurInst->getParent();
3622
3623 // Update the RPO numbers for this function.
3624 if (InvalidBlockRPONumbers)
3625 assignBlockRPONumber(*CurrentBlock->getParent());
3626
3628 for (BasicBlock *P : predecessors(CurrentBlock)) {
3629 // We're not interested in PRE where blocks with predecessors that are
3630 // not reachable.
3631 if (!DT->isReachableFromEntry(P)) {
3632 NumWithout = 2;
3633 break;
3634 }
3635 // It is not safe to do PRE when P->CurrentBlock is a loop backedge.
3636 assert(BlockRPONumber.count(P) && BlockRPONumber.count(CurrentBlock) &&
3637 "Invalid BlockRPONumber map.");
3638 if (BlockRPONumber[P] >= BlockRPONumber[CurrentBlock]) {
3639 NumWithout = 2;
3640 break;
3641 }
3642
3643 uint32_t TValNo = VN.phiTranslate(P, CurrentBlock, ValNo, *this);
3644 Value *PredV = findLeader(P, TValNo);
3645 if (!PredV) {
3646 PredMap.push_back(std::make_pair(static_cast<Value *>(nullptr), P));
3647 PREPred = P;
3648 ++NumWithout;
3649 } else if (PredV == CurInst) {
3650 // CurInst dominates this predecessor.
3651 NumWithout = 2;
3652 break;
3653 } else {
3654 PredMap.push_back(std::make_pair(PredV, P));
3655 ++NumWith;
3656 }
3657 }
3658
3659 // Don't do PRE when it might increase code size, i.e. when
3660 // we would need to insert instructions in more than one pred.
3661 if (NumWithout > 1 || NumWith == 0)
3662 return false;
3663
3664 // We may have a case where all predecessors have the instruction,
3665 // and we just need to insert a phi node. Otherwise, perform
3666 // insertion.
3667 Instruction *PREInstr = nullptr;
3668
3669 if (NumWithout != 0) {
3670 if (!isSafeToSpeculativelyExecute(CurInst)) {
3671 // It is only valid to insert a new instruction if the current instruction
3672 // is always executed. An instruction with implicit control flow could
3673 // prevent us from doing it. If we cannot speculate the execution, then
3674 // PRE should be prohibited.
3675 if (ICF->isDominatedByICFIFromSameBlock(CurInst))
3676 return false;
3677 }
3678
3679 // Don't do PRE across indirect branch.
3680 if (isa<IndirectBrInst>(PREPred->getTerminator()))
3681 return false;
3682
3683 // We can't do PRE safely on a critical edge, so instead we schedule
3684 // the edge to be split and perform the PRE the next time we iterate
3685 // on the function.
3686 unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock);
3687 if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) {
3688 ToSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum));
3689 return false;
3690 }
3691 // We need to insert somewhere, so let's give it a shot.
3692 PREInstr = CurInst->clone();
3693 if (!performScalarPREInsertion(PREInstr, PREPred, CurrentBlock, ValNo)) {
3694 // If we failed insertion, make sure we remove the instruction.
3695#ifndef NDEBUG
3696 verifyRemoved(PREInstr);
3697#endif
3698 PREInstr->deleteValue();
3699 return false;
3700 }
3701 }
3702
3703 // Either we should have filled in the PRE instruction, or we should
3704 // not have needed insertions.
3705 assert(PREInstr != nullptr || NumWithout == 0);
3706
3707 ++NumGVNPRE;
3708
3709 // Create a PHI to make the value available in this block.
3710 PHINode *Phi = PHINode::Create(CurInst->getType(), PredMap.size(),
3711 CurInst->getName() + ".pre-phi");
3712 Phi->insertBefore(CurrentBlock->begin());
3713 for (auto &[V, BB] : PredMap) {
3714 if (V) {
3715 // If we use an existing value in this phi, we have to patch the original
3716 // value because the phi will be used to replace a later value.
3717 patchReplacementInstruction(CurInst, V);
3718 Phi->addIncoming(V, BB);
3719 } else
3720 Phi->addIncoming(PREInstr, PREPred);
3721 }
3722
3723 VN.add(Phi, ValNo);
3724 // After creating a new PHI for ValNo, the phi translate result for ValNo will
3725 // be changed, so erase the related stale entries in phi translate cache.
3726 VN.eraseTranslateCacheEntry(ValNo, *CurrentBlock);
3727 LeaderTable.insert(ValNo, Phi, CurrentBlock);
3728 Phi->setDebugLoc(CurInst->getDebugLoc());
3729 CurInst->replaceAllUsesWith(Phi);
3730 if (MD && Phi->getType()->isPtrOrPtrVectorTy())
3731 MD->invalidateCachedPointerInfo(Phi);
3732 LeaderTable.erase(ValNo, CurInst, CurrentBlock);
3733
3734 LLVM_DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
3735 removeInstruction(CurInst);
3736
3737 return true;
3738}
3739
3740/// Perform a purely local form of PRE that looks for diamond
3741/// control flow patterns and attempts to perform simple PRE at the join point.
3742bool GVNPass::performPRE(Function &F) {
3743 bool Changed = false;
3744 for (BasicBlock *CurrentBlock : depth_first(&F.getEntryBlock())) {
3745 // Nothing to PRE in the entry block.
3746 if (CurrentBlock == &F.getEntryBlock())
3747 continue;
3748
3749 // Don't perform PRE on an EH pad.
3750 if (CurrentBlock->isEHPad())
3751 continue;
3752
3753 for (BasicBlock::iterator BI = CurrentBlock->begin(),
3754 BE = CurrentBlock->end();
3755 BI != BE;) {
3756 Instruction *CurInst = &*BI++;
3757 Changed |= performScalarPRE(CurInst);
3758 }
3759 }
3760
3761 if (splitCriticalEdges())
3762 Changed = true;
3763
3764 return Changed;
3765}
3766
3767/// Split the critical edge connecting the given two blocks, and return
3768/// the block inserted to the critical edge.
3769BasicBlock *GVNPass::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
3770 // GVN does not require loop-simplify, do not try to preserve it if it is not
3771 // possible.
3773 Pred, Succ,
3774 CriticalEdgeSplittingOptions(DT, LI, MSSAU).unsetPreserveLoopSimplify());
3775 if (BB) {
3776 if (MD)
3777 MD->invalidateCachedPredecessors();
3778 InvalidBlockRPONumbers = true;
3779 }
3780 return BB;
3781}
3782
3783/// Split critical edges found during the previous
3784/// iteration that may enable further optimization.
3785bool GVNPass::splitCriticalEdges() {
3786 if (ToSplit.empty())
3787 return false;
3788
3789 bool Changed = false;
3790 do {
3791 std::pair<Instruction *, unsigned> Edge = ToSplit.pop_back_val();
3792 Changed |= SplitCriticalEdge(Edge.first, Edge.second,
3793 CriticalEdgeSplittingOptions(DT, LI, MSSAU)) !=
3794 nullptr;
3795 } while (!ToSplit.empty());
3796 if (Changed) {
3797 if (MD)
3798 MD->invalidateCachedPredecessors();
3799 InvalidBlockRPONumbers = true;
3800 }
3801 return Changed;
3802}
3803
3804/// Executes one iteration of GVN.
3805bool GVNPass::iterateOnFunction(Function &F) {
3806 cleanupGlobalSets();
3807
3808 // Top-down walk of the dominator tree.
3809 bool Changed = false;
3810 // Needed for value numbering with phi construction to work.
3811 // RPOT walks the graph in its constructor and will not be invalidated during
3812 // processBlock.
3813 ReversePostOrderTraversal<Function *> RPOT(&F);
3814
3815 for (BasicBlock *BB : RPOT)
3816 Changed |= processBlock(BB);
3817
3818 return Changed;
3819}
3820
3821void GVNPass::cleanupGlobalSets() {
3822 VN.clear();
3823 LeaderTable.clear();
3824 BlockRPONumber.clear();
3825 ICF->clear();
3826 InvalidBlockRPONumbers = true;
3827}
3828
3829void GVNPass::removeInstruction(Instruction *I) {
3830 VN.erase(I);
3831 if (MD) MD->removeInstruction(I);
3832 if (MSSAU)
3833 MSSAU->removeMemoryAccess(I);
3834#ifndef NDEBUG
3835 verifyRemoved(I);
3836#endif
3837 ICF->removeInstruction(I);
3838 I->eraseFromParent();
3839 ++NumGVNInstr;
3840}
3841
3842/// Verify that the specified instruction does not occur in our
3843/// internal data structures.
3844void GVNPass::verifyRemoved(const Instruction *Inst) const {
3845 VN.verifyRemoved(Inst);
3846}
3847
3848/// BB is declared dead, which implied other blocks become dead as well. This
3849/// function is to add all these blocks to "DeadBlocks". For the dead blocks'
3850/// live successors, update their phi nodes by replacing the operands
3851/// corresponding to dead blocks with UndefVal.
3852void GVNPass::addDeadBlock(BasicBlock *BB) {
3854 SmallSetVector<BasicBlock *, 4> DF;
3855
3856 NewDead.push_back(BB);
3857 while (!NewDead.empty()) {
3858 BasicBlock *D = NewDead.pop_back_val();
3859 if (DeadBlocks.count(D))
3860 continue;
3861
3862 // All blocks dominated by D are dead.
3863 SmallVector<BasicBlock *, 8> Dom;
3864 DT->getDescendants(D, Dom);
3865 DeadBlocks.insert_range(Dom);
3866
3867 // Figure out the dominance-frontier(D).
3868 for (BasicBlock *B : Dom) {
3869 for (BasicBlock *S : successors(B)) {
3870 if (DeadBlocks.count(S))
3871 continue;
3872
3873 bool AllPredDead = true;
3874 for (BasicBlock *P : predecessors(S))
3875 if (!DeadBlocks.count(P)) {
3876 AllPredDead = false;
3877 break;
3878 }
3879
3880 if (!AllPredDead) {
3881 // S could be proved dead later on. That is why we don't update phi
3882 // operands at this moment.
3883 DF.insert(S);
3884 } else {
3885 // While S is not dominated by D, it is dead by now. This could take
3886 // place if S already have a dead predecessor before D is declared
3887 // dead.
3888 NewDead.push_back(S);
3889 }
3890 }
3891 }
3892 }
3893
3894 // For the dead blocks' live successors, update their phi nodes by replacing
3895 // the operands corresponding to dead blocks with UndefVal.
3896 for (BasicBlock *B : DF) {
3897 if (DeadBlocks.count(B))
3898 continue;
3899
3900 // First, split the critical edges. This might also create additional blocks
3901 // to preserve LoopSimplify form and adjust edges accordingly.
3903 for (BasicBlock *P : Preds) {
3904 if (!DeadBlocks.count(P))
3905 continue;
3906
3907 if (is_contained(successors(P), B) &&
3908 isCriticalEdge(P->getTerminator(), B)) {
3909 if (BasicBlock *S = splitCriticalEdges(P, B))
3910 DeadBlocks.insert(P = S);
3911 }
3912 }
3913
3914 // Now poison the incoming values from the dead predecessors.
3915 for (BasicBlock *P : predecessors(B)) {
3916 if (!DeadBlocks.count(P))
3917 continue;
3918 for (PHINode &Phi : B->phis()) {
3919 Phi.setIncomingValueForBlock(P, PoisonValue::get(Phi.getType()));
3920 if (MD)
3921 MD->invalidateCachedPointerInfo(&Phi);
3922 }
3923 }
3924 }
3925}
3926
3927// If the given branch is recognized as a foldable branch (i.e. conditional
3928// branch with constant condition), it will perform following analyses and
3929// transformation.
3930// 1) If the dead out-coming edge is a critical-edge, split it. Let
3931// R be the target of the dead out-coming edge.
3932// 1) Identify the set of dead blocks implied by the branch's dead outcoming
3933// edge. The result of this step will be {X| X is dominated by R}
3934// 2) Identify those blocks which haves at least one dead predecessor. The
3935// result of this step will be dominance-frontier(R).
3936// 3) Update the PHIs in DF(R) by replacing the operands corresponding to
3937// dead blocks with "UndefVal" in an hope these PHIs will optimized away.
3938//
3939// Return true iff *NEW* dead code are found.
3940bool GVNPass::processFoldableCondBr(CondBrInst *BI) {
3941 // If a branch has two identical successors, we cannot declare either dead.
3942 if (BI->getSuccessor(0) == BI->getSuccessor(1))
3943 return false;
3944
3945 ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
3946 if (!Cond)
3947 return false;
3948
3949 BasicBlock *DeadRoot =
3950 Cond->getZExtValue() ? BI->getSuccessor(1) : BI->getSuccessor(0);
3951 if (DeadBlocks.count(DeadRoot))
3952 return false;
3953
3954 if (!DeadRoot->getSinglePredecessor())
3955 DeadRoot = splitCriticalEdges(BI->getParent(), DeadRoot);
3956
3957 addDeadBlock(DeadRoot);
3958 return true;
3959}
3960
3961// performPRE() will trigger assert if it comes across an instruction without
3962// associated val-num. As it normally has far more live instructions than dead
3963// instructions, it makes more sense just to "fabricate" a val-number for the
3964// dead code than checking if instruction involved is dead or not.
3965void GVNPass::assignValNumForDeadCode() {
3966 for (BasicBlock *BB : DeadBlocks) {
3967 for (Instruction &Inst : *BB) {
3968 unsigned ValNum = VN.lookupOrAdd(&Inst);
3969 LeaderTable.insert(ValNum, &Inst, BB);
3970 }
3971 }
3972}
3973
3975public:
3976 static char ID; // Pass identification, replacement for typeid.
3977
3978 explicit GVNLegacyPass(bool MemDepAnalysis = GVNEnableMemDep,
3979 bool MemSSAAnalysis = GVNEnableMemorySSA,
3980 bool ScalarPRE = true)
3981 : FunctionPass(ID), Impl(GVNOptions()
3982 .setMemDep(MemDepAnalysis)
3983 .setMemorySSA(MemSSAAnalysis)
3984 .setScalarPRE(ScalarPRE)) {
3986 }
3987
3988 bool runOnFunction(Function &F) override {
3989 if (skipFunction(F))
3990 return false;
3991
3993 if (Impl.isMemorySSAEnabled() && !MSSAWP)
3995
3996 return Impl.runImpl(
3997 F, getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F),
4000 getAnalysis<AAResultsWrapperPass>().getAAResults(),
4001 Impl.isMemDepEnabled()
4003 : nullptr,
4004 getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
4006 MSSAWP ? &MSSAWP->getMSSA() : nullptr);
4007 }
4008
4026
4027private:
4028 GVNPass Impl;
4029};
4030
4031char GVNLegacyPass::ID = 0;
4032
4033INITIALIZE_PASS_BEGIN(GVNLegacyPass, "gvn", "Global Value Numbering", false, false)
4042INITIALIZE_PASS_END(GVNLegacyPass, "gvn", "Global Value Numbering", false, false)
4043
4044// The public interface to this file...
4047 return new GVNLegacyPass(GVNEnableMemDep, GVNEnableMemorySSA, ScalarPRE);
4048}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:663
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static RegisterPass< DebugifyFunctionPass > DF("debugify-function", "Attach debug info to a function")
This file defines the DenseMap class.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
early cse Early CSE w MemorySSA
static void reportMayClobberedLoad(LoadInst *Load, Instruction *DepInst, const DominatorTree *DT, OptimizationRemarkEmitter *ORE)
Try to locate the three instruction involved in a missed load-elimination case that is due to an inte...
Definition GVN.cpp:1275
static Instruction * findInvariantGroupValue(LoadInst *L, DominatorTree &DT)
If a load has !invariant.group, try to find the most-dominating instruction with the same metadata an...
Definition GVN.cpp:2234
static void reportLoadElim(LoadInst *Load, Value *AvailableValue, OptimizationRemarkEmitter *ORE)
Definition GVN.cpp:2033
static cl::opt< uint32_t > MaxNumInsnsPerBlock("gvn-max-num-insns", cl::Hidden, cl::init(100), cl::desc("Max number of instructions to scan in each basic block in GVN " "(default = 100)"))
static cl::opt< bool > GVNEnableMemDep("enable-gvn-memdep", cl::init(true))
static cl::opt< bool > GVNEnableLoadInLoopPRE("enable-load-in-loop-pre", cl::init(true))
static const Instruction * findMayClobberedPtrAccess(LoadInst *Load, const DominatorTree *DT)
Definition GVN.cpp:1219
static cl::opt< uint32_t > MaxNumDeps("gvn-max-num-deps", cl::Hidden, cl::init(100), cl::desc("Max number of dependences to attempt Load PRE (default = 100)"))
static std::optional< MemoryLocation > maybeLoadStoreLocation(Instruction *I, bool AllowStores, const TargetLibraryInfo *TLI)
Return the memory location accessed by the (masked) load/store instruction I, if the instruction coul...
Definition GVN.cpp:2286
static cl::opt< bool > GVNEnableMemorySSA("enable-gvn-memoryssa", cl::init(false))
static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E, DominatorTree *DT)
There is an edge from 'Src' to 'Dst'.
Definition GVN.cpp:3065
static bool IsValueFullyAvailableInBlock(BasicBlock *BB, DenseMap< BasicBlock *, AvailabilityState > &FullyAvailableBlocks)
Return true if we can prove that the value we're analyzing is fully available in the specified block.
Definition GVN.cpp:958
static cl::opt< bool > GVNEnableScalarPRE("enable-scalar-pre", cl::init(true), cl::Hidden)
static Value * findDominatingValue(const MemoryLocation &Loc, Type *LoadTy, Instruction *From, AAResults *AA)
Definition GVN.cpp:1296
static bool liesBetween(const Instruction *From, Instruction *Between, const Instruction *To, const DominatorTree *DT)
Assuming To can be reached from both From and Between, does Between lie on every path from From to To...
Definition GVN.cpp:1210
static bool isLifetimeStart(const Instruction *Inst)
Definition GVN.cpp:1202
static cl::opt< bool > GVNEnableSplitBackedgeInLoadPRE("enable-split-backedge-in-load-pre", cl::init(false))
static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl)
Definition GVN.cpp:2226
static void replaceValuesPerBlockEntry(SmallVectorImpl< AvailableValueInBlock > &ValuesPerBlock, Value *OldValue, Value *NewValue)
If the specified OldValue exists in ValuesPerBlock, replace its value with NewValue.
Definition GVN.cpp:1074
static cl::opt< unsigned > ScanUsersLimit("gvn-scan-users-limit", cl::Hidden, cl::init(100), cl::desc("The number of memory accesses to scan in a block in reaching " "memory values analysis (default = 100)"))
static Value * ConstructSSAForLoadSet(LoadInst *Load, SmallVectorImpl< AvailableValueInBlock > &ValuesPerBlock, GVNPass &GVN)
Given a set of loads specified by ValuesPerBlock, construct SSA form, allowing us to eliminate Load.
Definition GVN.cpp:1093
AvailabilityState
Definition GVN.cpp:938
@ Unavailable
We know the block is not fully available. This is a fixpoint.
Definition GVN.cpp:940
@ Available
We know the block is fully available. This is a fixpoint.
Definition GVN.cpp:942
@ SpeculativelyAvailable
We do not know whether the block is fully available or not, but we are currently speculating that it ...
Definition GVN.cpp:947
static cl::opt< uint32_t > MaxNumVisitedInsts("gvn-max-num-visited-insts", cl::Hidden, cl::init(100), cl::desc("Max number of visited instructions when trying to find " "dominating value of select dependency (default = 100)"))
static cl::opt< uint32_t > MaxBBSpeculations("gvn-max-block-speculations", cl::Hidden, cl::init(600), cl::desc("Max number of blocks we're willing to speculate on (and recurse " "into) when deducing if a value is fully available or not in GVN " "(default = 600)"))
static cl::opt< bool > GVNEnableLoadPRE("enable-load-pre", cl::init(true))
This file provides the interface for LLVM's Global Value Numbering pass which eliminates fully redund...
#define DEBUG_TYPE
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
#define _
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
static bool lookup(const GsymReader &GR, GsymDataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr, uint64_t Addr, SourceLocations &SrcLocs, llvm::Error &Err)
A Lookup helper functions.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
This file implements a map that provides insertion order iteration.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
This file contains the declarations for metadata subclasses.
uint64_t IntrinsicInst * II
#define P(N)
ppc ctr loops PowerPC CTR Loops Verify
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
static DominatorTree getDomTree(Function &F)
std::pair< BasicBlock *, BasicBlock * > Edge
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:119
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
@ MayAlias
The two locations may or may not alias.
@ NoAlias
The two locations do not alias at all.
@ PartialAlias
The two locations alias, but only due to a partial overlap.
@ MustAlias
The two locations precisely alias each other.
constexpr int32_t getOffset() const
constexpr bool hasOffset() const
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
iterator end() const
Definition ArrayRef.h:130
iterator begin() const
Definition ArrayRef.h:129
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
bool isEHPad() const
Return true if this basic block is an exception handling block.
Definition BasicBlock.h:704
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
LLVM_ABI Instruction::BinaryOps getBinaryOp() const
Returns the binary operation underlying the intrinsic.
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:890
Value * getCondition() const
BasicBlock * getSuccessor(unsigned i) const
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:231
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
Definition DenseMap.h:252
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:225
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:301
iterator end()
Definition DenseMap.h:143
Analysis pass which computes a DominatorTree.
Definition Dominators.h:270
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:306
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:151
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Class representing an expression and its matching format.
unsigned getNumIndices() const
iterator_range< idx_iterator > indices() const
idx_iterator idx_begin() const
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
FunctionPass(char &pid)
Definition Pass.h:316
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Definition Pass.cpp:193
const BasicBlock & getEntryBlock() const
Definition Function.h:783
Represents calls to the gc.relocate intrinsic.
This class holds the mapping between values and value numbers.
Definition GVN.h:164
LLVM_ABI uint32_t lookupOrAdd(MemoryAccess *MA)
Definition GVN.cpp:640
The core GVN pass object.
Definition GVN.h:131
friend class gvn::GVNLegacyPass
Definition GVN.h:250
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
Definition GVN.cpp:869
LLVM_ABI void salvageAndRemoveInstruction(Instruction *I)
This removes the specified instruction from our various maps and marks it for deletion.
Definition GVN.cpp:921
AAResults * getAliasAnalysis() const
Definition GVN.h:151
LLVM_ABI bool isLoadPREEnabled() const
Definition GVN.cpp:848
GVNPass(GVNOptions Options={})
Definition GVN.h:137
LLVM_ABI void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Definition GVN.cpp:901
LLVM_ABI bool isMemorySSAEnabled() const
Definition GVN.cpp:865
DominatorTree & getDominatorTree() const
Definition GVN.h:150
LLVM_ABI bool isLoadInLoopPREEnabled() const
Definition GVN.cpp:852
LLVM_ABI bool isScalarPREEnabled() const
Definition GVN.cpp:844
LLVM_ABI bool isLoadPRESplitBackedgeEnabled() const
Definition GVN.cpp:856
LLVM_ABI bool isMemDepEnabled() const
Definition GVN.cpp:861
Legacy wrapper pass to provide the GlobalsAAResult object.
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
LLVM_ABI bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool isTerminator() const
LLVM_ABI bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
LLVM_ABI void dropUnknownNonDebugMetadata(ArrayRef< unsigned > KnownIDs={})
Drop all unknown metadata except for debug locations.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
A wrapper class for inspecting calls to intrinsic functions.
An instruction for reading from memory.
Analysis pass that exposes the LoopInfo for a function.
Definition LoopInfo.h:587
The legacy pass manager's analysis pass to compute loop information.
Definition LoopInfo.h:612
iterator find(const KeyT &Key)
Definition MapVector.h:156
iterator end()
Definition MapVector.h:69
size_type size() const
Definition MapVector.h:58
A memory dependence query can return one of three different answers.
bool isClobber() const
Tests if this MemDepResult represents a query that is an instruction clobber dependency.
bool isNonLocal() const
Tests if this MemDepResult represents a query that is transparent to the start of the block,...
bool isDef() const
Tests if this MemDepResult represents a query that is an instruction definition dependency.
bool isLocal() const
Tests if this MemDepResult represents a valid local query (Clobber/Def).
Instruction * getInst() const
If this is a normal dependency, returns the instruction that is depended on.
This is the common base class for memset/memcpy/memmove.
BasicBlock * getBlock() const
Definition MemorySSA.h:162
An analysis that produces MemoryDependenceResults for a function.
std::vector< NonLocalDepEntry > NonLocalDepInfo
LLVM_ABI MemDepResult getDependency(Instruction *QueryInst)
Returns the instruction on which a memory operation depends.
LLVM_ABI const NonLocalDepInfo & getNonLocalCallDependency(CallBase *QueryCall)
Perform a full dependency query for the specified call, returning the set of blocks that the value is...
A wrapper analysis pass for the legacy pass manager that exposes a MemoryDepnedenceResults instance.
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
MemoryLocation getWithNewPtr(const Value *NewPtr) const
const Value * Ptr
The address of the start of the location.
static LLVM_ABI MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition MemorySSA.h:529
BasicBlock * getIncomingBlock(unsigned I) const
Return incoming basic block number i.
Definition MemorySSA.h:542
MemoryAccess * getIncomingValue(unsigned I) const
Return incoming value number x.
Definition MemorySSA.h:532
An analysis that produces MemorySSA for a function.
Definition MemorySSA.h:922
Legacy analysis pass which computes MemorySSA.
Definition MemorySSA.h:975
LLVM_ABI void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition MemorySSA.h:720
LLVM_ABI bool locallyDominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in the same basic block, determine whether MemoryAccess A dominates MemoryA...
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition MemorySSA.h:740
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition MemorySSA.h:260
This is an entry in the NonLocalDepInfo cache.
OptimizationRemarkEmitter legacy analysis pass.
The optimization diagnostic interface.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
LLVM_ABI Value * translateValue(BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree *DT, bool MustDominate)
translateValue - PHI translate the current address up the CFG from CurBB to Pred, updating our state ...
LLVM_ABI bool isPotentiallyPHITranslatable() const
isPotentiallyPHITranslatable - If this needs PHI translation, return true if we have some hope of doi...
bool needsPHITranslationFromBlock(BasicBlock *BB) const
needsPHITranslationFromBlock - Return true if moving from the specified BasicBlock to its predecessor...
Value * getAddr() const
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
AnalysisType * getAnalysisIfAvailable() const
getAnalysisIfAvailable<AnalysisType>() - Subclasses use this function to get analysis information tha...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
Helper class for SSA formation on a set of values defined in multiple blocks.
Definition SSAUpdater.h:39
LLVM_ABI void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
LLVM_ABI Value * GetValueInMiddleOfBlock(BasicBlock *BB)
Construct SSA form, materializing a value that is live in the middle of the specified block.
LLVM_ABI bool HasValueForBlock(BasicBlock *BB) const
Return true if the SSAUpdater already has a value for the specified block.
LLVM_ABI void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
std::pair< Value *, SelectAddrs > getSelectCondAndAddrs() const
Value * getAddr() const
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, const Instruction *MDFrom=nullptr)
bool erase(PtrType Ptr)
Remove pointer from the set.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
SmallVector & operator=(const SmallVector &RHS)
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM_ABI bool isTokenLikeTy() const
Returns true if this is 'token' or a token-like target type.s.
Definition Type.cpp:1144
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:307
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:285
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:141
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
op_range operands()
Definition User.h:267
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
iterator_range< user_iterator > users()
Definition Value.h:426
bool hasUseList() const
Check if this Value has a use-list.
Definition Value.h:344
LLVM_ABI bool canBeFreed() const
Return true if the memory object referred to by V can by freed in the scope for which the SSA value d...
Definition Value.cpp:832
LLVM_ABI void deleteValue()
Delete a pointer to a generic Value.
Definition Value.cpp:108
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:319
An efficient, type-erasing, non-owning reference to a callable.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition GVN.cpp:4009
GVNLegacyPass(bool MemDepAnalysis=GVNEnableMemDep, bool MemSSAAnalysis=GVNEnableMemorySSA, bool ScalarPRE=true)
Definition GVN.cpp:3978
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
Definition GVN.cpp:3988
An opaque object representing a hash code.
Definition Hashing.h:77
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Entry
Definition COFF.h:862
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedStore(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedStore Intrinsic.
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
auto m_Value()
Match an arbitrary value and ignore it.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
NoWrapTrunc_match< OpTy, TruncInst::NoUnsignedWrap > m_NUWTrunc(const OpTy &Op)
Matches trunc nuw.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
LLVM_ABI int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr, StoreInst *DepSI, const DataLayout &DL)
This function determines whether a value for the pointer LoadPtr can be extracted from the store at D...
LLVM_ABI Value * getMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, Type *LoadTy, Instruction *InsertPt, const DataLayout &DL)
If analyzeLoadFromClobberingMemInst returned an offset, this function can be used to actually perform...
LLVM_ABI int analyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, LoadInst *DepLI, const DataLayout &DL)
This function determines whether a value for the pointer LoadPtr can be extracted from the load at De...
LLVM_ABI Value * getValueForLoad(Value *SrcVal, unsigned Offset, Type *LoadTy, Instruction *InsertPt, Function *F)
If analyzeLoadFromClobberingStore/Load returned an offset, this function can be used to actually perf...
LLVM_ABI int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, MemIntrinsic *DepMI, const DataLayout &DL)
This function determines whether a value for the pointer LoadPtr can be extracted from the memory int...
LLVM_ABI bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy, Function *F)
Return true if CoerceAvailableValueToLoadType would succeed if it was called.
initializer< Ty > init(const Ty &Val)
A private "module" namespace for types and utilities used by GVN.
Definition GVN.h:66
Add a small namespace to avoid name clashes with the classes used in the streaming interface.
NodeAddr< InstrNode * > Instr
Definition RDFGraph.h:389
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:573
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
hash_code hash_value(const FixedPointSemantics &Val)
LLVM_ABI Constant * getInitialValueOfAllocation(const Value *V, const TargetLibraryInfo *TLI, Type *Ty)
If this is a call to an allocation function that initializes memory to a fixed value,...
LLVM_ABI unsigned replaceDominatedUsesWithIf(Value *From, Value *To, DominatorTree &DT, const BasicBlockEdge &Edge, function_ref< bool(const Use &U, const Value *To)> ShouldReplace)
Replace each use of 'From' with 'To' if that use is dominated by the given edge and the callback Shou...
Definition Local.cpp:3277
RelativeUniformCounterPtr Values
Definition InstrProf.h:91
LLVM_ABI unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ)
Search for the specified successor of basic block BB and return its position in the terminator instru...
Definition CFG.cpp:90
auto pred_end(const MachineBasicBlock *BB)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI FunctionPass * createGVNPass(bool ScalarPRE)
Create a legacy GVN pass.
Definition GVN.cpp:4046
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition Utils.cpp:1690
auto successors(const MachineBasicBlock *BB)
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr from_range_t from_range
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2208
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
LLVM_ABI bool isNoAliasCall(const Value *V)
Return true if this pointer is returned by a noalias function.
LLVM_ABI bool isAssumeWithEmptyBundle(const AssumeInst &Assume)
Return true iff the operand bundles of the provided llvm.assume doesn't contain any valuable informat...
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Value
Definition InstrProf.h:143
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition Local.cpp:403
LLVM_ABI bool canReplacePointersInUseIfEqual(const Use &U, const Value *To, const DataLayout &DL)
Definition Loads.cpp:862
LLVM_ABI bool canReplacePointersIfEqual(const Value *From, const Value *To, const DataLayout &DL)
Returns true if a pointer value From can be replaced with another pointer value \To if they are deeme...
Definition Loads.cpp:882
bool isModSet(const ModRefInfo MRI)
Definition ModRef.h:49
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI void patchReplacementInstruction(Instruction *I, Value *Repl)
Patch the replacement so that it is not more restrictive than the value being replaced.
Definition Local.cpp:3177
LLVM_ABI void initializeGVNLegacyPassPass(PassRegistry &)
LLVM_ABI unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT, const BasicBlockEdge &Edge)
Replace each use of 'From' with 'To' if that use is dominated by the given edge.
Definition Local.cpp:3256
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ Success
The lock was released successfully.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
LLVM_ABI void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition Local.cpp:3105
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition ModRef.h:28
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
@ NoModRef
The access neither references nor modifies the value stored in memory.
Definition ModRef.h:30
LLVM_ABI bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition MemorySSA.cpp:84
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
LLVM_ABI bool salvageKnowledge(Instruction *I, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Calls BuildAssumeFromInst and if the resulting llvm.assume is valid insert if before I.
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
LLVM_ABI FunctionPass * createGVNPass()
Definition GVN.cpp:4045
LLVM_ABI bool isPotentiallyReachable(const Instruction *From, const Instruction *To, const SmallPtrSetImpl< BasicBlock * > *ExclusionSet=nullptr, const DominatorTree *DT=nullptr, const LoopInfo *LI=nullptr, const CycleInfo *CI=nullptr)
Determine whether instruction 'To' is reachable from 'From', without passing through any blocks in Ex...
Definition CFG.cpp:335
DWARFExpression::Operation Op
LLVM_ABI BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
LLVM_ABI bool isCriticalEdge(const Instruction *TI, unsigned SuccNum, bool AllowIdenticalEdges=false)
Return true if the specified edge is a critical edge.
Definition CFG.cpp:106
constexpr unsigned BitWidth
auto pred_begin(const MachineBasicBlock *BB)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Next
Definition InstrProf.h:147
bool pred_empty(const BasicBlock *BB)
Definition CFG.h:107
iterator_range< df_iterator< T > > depth_first(const T &G)
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition Hashing.h:305
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI bool EliminateDuplicatePHINodes(BasicBlock *BB)
Check for and eliminate duplicate PHI nodes in this block.
Definition Local.cpp:1509
bool isStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
Returns true if ao is stronger than other as defined by the AtomicOrdering lattice,...
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition Hashing.h:285
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
#define N
static bool isEqual(const GVNPass::Expression &LHS, const GVNPass::Expression &RHS)
Definition GVN.cpp:186
static unsigned getHashValue(const GVNPass::Expression &E)
Definition GVN.cpp:180
An information struct used to provide DenseMap with the various necessary components for a given valu...
A set of parameters to control various transforms performed by GVN pass.
Definition GVN.h:81
bool operator==(const Expression &Other) const
Definition GVN.cpp:158
friend hash_code hash_value(const Expression &Value)
Definition GVN.cpp:173
SmallVector< uint32_t, 4 > VarArgs
Definition GVN.cpp:152
AttributeList Attrs
Definition GVN.cpp:154
Expression(uint32_t Op=~2U)
Definition GVN.cpp:156
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition PassManager.h:89
Represents an AvailableValue which can be rematerialized at the end of the associated BasicBlock.
Definition GVN.cpp:292
static AvailableValueInBlock get(BasicBlock *BB, Value *V, unsigned Offset=0)
Definition GVN.cpp:306
static AvailableValueInBlock getUndef(BasicBlock *BB)
Definition GVN.cpp:311
static AvailableValueInBlock get(BasicBlock *BB, AvailableValue &&AV)
Definition GVN.cpp:299
AvailableValue AV
AV - The actual available value.
Definition GVN.cpp:297
BasicBlock * BB
BB - The basic block in question.
Definition GVN.cpp:294
Value * MaterializeAdjustedValue(LoadInst *Load) const
Emit code at the end of this block to adjust the value defined here to the specified type.
Definition GVN.cpp:317
Represents a particular available value that we know how to materialize.
Definition GVN.cpp:196
unsigned Offset
Offset - The byte offset in Val that is interesting for the load query.
Definition GVN.cpp:213
bool isSimpleValue() const
Definition GVN.cpp:259
bool isCoercedLoadValue() const
Definition GVN.cpp:260
static AvailableValue get(Value *V, unsigned Offset=0)
Definition GVN.cpp:217
ValType Kind
Kind of the live-out value.
Definition GVN.cpp:210
static AvailableValue getSelect(Value *Cond, Value *V1, Value *V2)
Definition GVN.cpp:249
LoadInst * getCoercedLoadValue() const
Definition GVN.cpp:270
static AvailableValue getLoad(LoadInst *Load, unsigned Offset=0)
Definition GVN.cpp:233
static AvailableValue getMI(MemIntrinsic *MI, unsigned Offset=0)
Definition GVN.cpp:225
bool isUndefValue() const
Definition GVN.cpp:262
bool isSelectValue() const
Definition GVN.cpp:263
Value * Val
Val - The value that is live out of the block.
Definition GVN.cpp:208
Value * V1
V1, V2 - The dominating non-clobbered values of SelectVal.
Definition GVN.cpp:215
static AvailableValue getUndef()
Definition GVN.cpp:241
Value * getSelectCondition() const
Definition GVN.cpp:280
Value * getSimpleValue() const
Definition GVN.cpp:265
bool isMemIntrinValue() const
Definition GVN.cpp:261
MemIntrinsic * getMemIntrinValue() const
Definition GVN.cpp:275
Value * MaterializeAdjustedValue(LoadInst *Load, Instruction *InsertPt) const
Emit code at the specified insertion point to adjust the value defined here to the specified type.
Definition GVN.cpp:1136