LLVM API Documentation

GVN.cpp
Go to the documentation of this file.
00001 //===- GVN.cpp - Eliminate redundant values and loads ---------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This pass performs global value numbering to eliminate fully redundant
00011 // instructions.  It also performs simple dead load elimination.
00012 //
00013 // Note that this pass does the value numbering itself; it does not use the
00014 // ValueNumbering analysis passes.
00015 //
00016 //===----------------------------------------------------------------------===//
00017 
00018 #define DEBUG_TYPE "gvn"
00019 #include "llvm/Transforms/Scalar.h"
00020 #include "llvm/ADT/DenseMap.h"
00021 #include "llvm/ADT/DepthFirstIterator.h"
00022 #include "llvm/ADT/Hashing.h"
00023 #include "llvm/ADT/SmallPtrSet.h"
00024 #include "llvm/ADT/Statistic.h"
00025 #include "llvm/Analysis/AliasAnalysis.h"
00026 #include "llvm/Analysis/ConstantFolding.h"
00027 #include "llvm/Analysis/Dominators.h"
00028 #include "llvm/Analysis/InstructionSimplify.h"
00029 #include "llvm/Analysis/Loads.h"
00030 #include "llvm/Analysis/MemoryBuiltins.h"
00031 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
00032 #include "llvm/Analysis/PHITransAddr.h"
00033 #include "llvm/Analysis/ValueTracking.h"
00034 #include "llvm/Assembly/Writer.h"
00035 #include "llvm/IR/DataLayout.h"
00036 #include "llvm/IR/GlobalVariable.h"
00037 #include "llvm/IR/IRBuilder.h"
00038 #include "llvm/IR/IntrinsicInst.h"
00039 #include "llvm/IR/LLVMContext.h"
00040 #include "llvm/IR/Metadata.h"
00041 #include "llvm/Support/Allocator.h"
00042 #include "llvm/Support/CommandLine.h"
00043 #include "llvm/Support/Debug.h"
00044 #include "llvm/Support/PatternMatch.h"
00045 #include "llvm/Target/TargetLibraryInfo.h"
00046 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
00047 #include "llvm/Transforms/Utils/SSAUpdater.h"
00048 #include <vector>
00049 using namespace llvm;
00050 using namespace PatternMatch;
00051 
00052 STATISTIC(NumGVNInstr,  "Number of instructions deleted");
00053 STATISTIC(NumGVNLoad,   "Number of loads deleted");
00054 STATISTIC(NumGVNPRE,    "Number of instructions PRE'd");
00055 STATISTIC(NumGVNBlocks, "Number of blocks merged");
00056 STATISTIC(NumGVNSimpl,  "Number of instructions simplified");
00057 STATISTIC(NumGVNEqProp, "Number of equalities propagated");
00058 STATISTIC(NumPRELoad,   "Number of loads PRE'd");
00059 
00060 static cl::opt<bool> EnablePRE("enable-pre",
00061                                cl::init(true), cl::Hidden);
00062 static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
00063 
00064 // Maximum allowed recursion depth.
00065 static cl::opt<uint32_t>
00066 MaxRecurseDepth("max-recurse-depth", cl::Hidden, cl::init(1000), cl::ZeroOrMore,
00067                 cl::desc("Max recurse depth (default = 1000)"));
00068 
00069 //===----------------------------------------------------------------------===//
00070 //                         ValueTable Class
00071 //===----------------------------------------------------------------------===//
00072 
00073 /// This class holds the mapping between values and value numbers.  It is used
00074 /// as an efficient mechanism to determine the expression-wise equivalence of
00075 /// two values.
00076 namespace {
00077   struct Expression {
00078     uint32_t opcode;
00079     Type *type;
00080     SmallVector<uint32_t, 4> varargs;
00081 
00082     Expression(uint32_t o = ~2U) : opcode(o) { }
00083 
00084     bool operator==(const Expression &other) const {
00085       if (opcode != other.opcode)
00086         return false;
00087       if (opcode == ~0U || opcode == ~1U)
00088         return true;
00089       if (type != other.type)
00090         return false;
00091       if (varargs != other.varargs)
00092         return false;
00093       return true;
00094     }
00095 
00096     friend hash_code hash_value(const Expression &Value) {
00097       return hash_combine(Value.opcode, Value.type,
00098                           hash_combine_range(Value.varargs.begin(),
00099                                              Value.varargs.end()));
00100     }
00101   };
00102 
00103   class ValueTable {
00104     DenseMap<Value*, uint32_t> valueNumbering;
00105     DenseMap<Expression, uint32_t> expressionNumbering;
00106     AliasAnalysis *AA;
00107     MemoryDependenceAnalysis *MD;
00108     DominatorTree *DT;
00109 
00110     uint32_t nextValueNumber;
00111 
00112     Expression create_expression(Instruction* I);
00113     Expression create_cmp_expression(unsigned Opcode,
00114                                      CmpInst::Predicate Predicate,
00115                                      Value *LHS, Value *RHS);
00116     Expression create_extractvalue_expression(ExtractValueInst* EI);
00117     uint32_t lookup_or_add_call(CallInst* C);
00118   public:
00119     ValueTable() : nextValueNumber(1) { }
00120     uint32_t lookup_or_add(Value *V);
00121     uint32_t lookup(Value *V) const;
00122     uint32_t lookup_or_add_cmp(unsigned Opcode, CmpInst::Predicate Pred,
00123                                Value *LHS, Value *RHS);
00124     void add(Value *V, uint32_t num);
00125     void clear();
00126     void erase(Value *v);
00127     void setAliasAnalysis(AliasAnalysis* A) { AA = A; }
00128     AliasAnalysis *getAliasAnalysis() const { return AA; }
00129     void setMemDep(MemoryDependenceAnalysis* M) { MD = M; }
00130     void setDomTree(DominatorTree* D) { DT = D; }
00131     uint32_t getNextUnusedValueNumber() { return nextValueNumber; }
00132     void verifyRemoved(const Value *) const;
00133   };
00134 }
00135 
00136 namespace llvm {
00137 template <> struct DenseMapInfo<Expression> {
00138   static inline Expression getEmptyKey() {
00139     return ~0U;
00140   }
00141 
00142   static inline Expression getTombstoneKey() {
00143     return ~1U;
00144   }
00145 
00146   static unsigned getHashValue(const Expression e) {
00147     using llvm::hash_value;
00148     return static_cast<unsigned>(hash_value(e));
00149   }
00150   static bool isEqual(const Expression &LHS, const Expression &RHS) {
00151     return LHS == RHS;
00152   }
00153 };
00154 
00155 }
00156 
00157 //===----------------------------------------------------------------------===//
00158 //                     ValueTable Internal Functions
00159 //===----------------------------------------------------------------------===//
00160 
00161 Expression ValueTable::create_expression(Instruction *I) {
00162   Expression e;
00163   e.type = I->getType();
00164   e.opcode = I->getOpcode();
00165   for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end();
00166        OI != OE; ++OI)
00167     e.varargs.push_back(lookup_or_add(*OI));
00168   if (I->isCommutative()) {
00169     // Ensure that commutative instructions that only differ by a permutation
00170     // of their operands get the same value number by sorting the operand value
00171     // numbers.  Since all commutative instructions have two operands it is more
00172     // efficient to sort by hand rather than using, say, std::sort.
00173     assert(I->getNumOperands() == 2 && "Unsupported commutative instruction!");
00174     if (e.varargs[0] > e.varargs[1])
00175       std::swap(e.varargs[0], e.varargs[1]);
00176   }
00177 
00178   if (CmpInst *C = dyn_cast<CmpInst>(I)) {
00179     // Sort the operand value numbers so x<y and y>x get the same value number.
00180     CmpInst::Predicate Predicate = C->getPredicate();
00181     if (e.varargs[0] > e.varargs[1]) {
00182       std::swap(e.varargs[0], e.varargs[1]);
00183       Predicate = CmpInst::getSwappedPredicate(Predicate);
00184     }
00185     e.opcode = (C->getOpcode() << 8) | Predicate;
00186   } else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) {
00187     for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
00188          II != IE; ++II)
00189       e.varargs.push_back(*II);
00190   }
00191 
00192   return e;
00193 }
00194 
00195 Expression ValueTable::create_cmp_expression(unsigned Opcode,
00196                                              CmpInst::Predicate Predicate,
00197                                              Value *LHS, Value *RHS) {
00198   assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
00199          "Not a comparison!");
00200   Expression e;
00201   e.type = CmpInst::makeCmpResultType(LHS->getType());
00202   e.varargs.push_back(lookup_or_add(LHS));
00203   e.varargs.push_back(lookup_or_add(RHS));
00204 
00205   // Sort the operand value numbers so x<y and y>x get the same value number.
00206   if (e.varargs[0] > e.varargs[1]) {
00207     std::swap(e.varargs[0], e.varargs[1]);
00208     Predicate = CmpInst::getSwappedPredicate(Predicate);
00209   }
00210   e.opcode = (Opcode << 8) | Predicate;
00211   return e;
00212 }
00213 
00214 Expression ValueTable::create_extractvalue_expression(ExtractValueInst *EI) {
00215   assert(EI != 0 && "Not an ExtractValueInst?");
00216   Expression e;
00217   e.type = EI->getType();
00218   e.opcode = 0;
00219 
00220   IntrinsicInst *I = dyn_cast<IntrinsicInst>(EI->getAggregateOperand());
00221   if (I != 0 && EI->getNumIndices() == 1 && *EI->idx_begin() == 0 ) {
00222     // EI might be an extract from one of our recognised intrinsics. If it
00223     // is we'll synthesize a semantically equivalent expression instead on
00224     // an extract value expression.
00225     switch (I->getIntrinsicID()) {
00226       case Intrinsic::sadd_with_overflow:
00227       case Intrinsic::uadd_with_overflow:
00228         e.opcode = Instruction::Add;
00229         break;
00230       case Intrinsic::ssub_with_overflow:
00231       case Intrinsic::usub_with_overflow:
00232         e.opcode = Instruction::Sub;
00233         break;
00234       case Intrinsic::smul_with_overflow:
00235       case Intrinsic::umul_with_overflow:
00236         e.opcode = Instruction::Mul;
00237         break;
00238       default:
00239         break;
00240     }
00241 
00242     if (e.opcode != 0) {
00243       // Intrinsic recognized. Grab its args to finish building the expression.
00244       assert(I->getNumArgOperands() == 2 &&
00245              "Expect two args for recognised intrinsics.");
00246       e.varargs.push_back(lookup_or_add(I->getArgOperand(0)));
00247       e.varargs.push_back(lookup_or_add(I->getArgOperand(1)));
00248       return e;
00249     }
00250   }
00251 
00252   // Not a recognised intrinsic. Fall back to producing an extract value
00253   // expression.
00254   e.opcode = EI->getOpcode();
00255   for (Instruction::op_iterator OI = EI->op_begin(), OE = EI->op_end();
00256        OI != OE; ++OI)
00257     e.varargs.push_back(lookup_or_add(*OI));
00258 
00259   for (ExtractValueInst::idx_iterator II = EI->idx_begin(), IE = EI->idx_end();
00260          II != IE; ++II)
00261     e.varargs.push_back(*II);
00262 
00263   return e;
00264 }
00265 
00266 //===----------------------------------------------------------------------===//
00267 //                     ValueTable External Functions
00268 //===----------------------------------------------------------------------===//
00269 
00270 /// add - Insert a value into the table with a specified value number.
00271 void ValueTable::add(Value *V, uint32_t num) {
00272   valueNumbering.insert(std::make_pair(V, num));
00273 }
00274 
00275 uint32_t ValueTable::lookup_or_add_call(CallInst *C) {
00276   if (AA->doesNotAccessMemory(C)) {
00277     Expression exp = create_expression(C);
00278     uint32_t &e = expressionNumbering[exp];
00279     if (!e) e = nextValueNumber++;
00280     valueNumbering[C] = e;
00281     return e;
00282   } else if (AA->onlyReadsMemory(C)) {
00283     Expression exp = create_expression(C);
00284     uint32_t &e = expressionNumbering[exp];
00285     if (!e) {
00286       e = nextValueNumber++;
00287       valueNumbering[C] = e;
00288       return e;
00289     }
00290     if (!MD) {
00291       e = nextValueNumber++;
00292       valueNumbering[C] = e;
00293       return e;
00294     }
00295 
00296     MemDepResult local_dep = MD->getDependency(C);
00297 
00298     if (!local_dep.isDef() && !local_dep.isNonLocal()) {
00299       valueNumbering[C] =  nextValueNumber;
00300       return nextValueNumber++;
00301     }
00302 
00303     if (local_dep.isDef()) {
00304       CallInst* local_cdep = cast<CallInst>(local_dep.getInst());
00305 
00306       if (local_cdep->getNumArgOperands() != C->getNumArgOperands()) {
00307         valueNumbering[C] = nextValueNumber;
00308         return nextValueNumber++;
00309       }
00310 
00311       for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
00312         uint32_t c_vn = lookup_or_add(C->getArgOperand(i));
00313         uint32_t cd_vn = lookup_or_add(local_cdep->getArgOperand(i));
00314         if (c_vn != cd_vn) {
00315           valueNumbering[C] = nextValueNumber;
00316           return nextValueNumber++;
00317         }
00318       }
00319 
00320       uint32_t v = lookup_or_add(local_cdep);
00321       valueNumbering[C] = v;
00322       return v;
00323     }
00324 
00325     // Non-local case.
00326     const MemoryDependenceAnalysis::NonLocalDepInfo &deps =
00327       MD->getNonLocalCallDependency(CallSite(C));
00328     // FIXME: Move the checking logic to MemDep!
00329     CallInst* cdep = 0;
00330 
00331     // Check to see if we have a single dominating call instruction that is
00332     // identical to C.
00333     for (unsigned i = 0, e = deps.size(); i != e; ++i) {
00334       const NonLocalDepEntry *I = &deps[i];
00335       if (I->getResult().isNonLocal())
00336         continue;
00337 
00338       // We don't handle non-definitions.  If we already have a call, reject
00339       // instruction dependencies.
00340       if (!I->getResult().isDef() || cdep != 0) {
00341         cdep = 0;
00342         break;
00343       }
00344 
00345       CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->getResult().getInst());
00346       // FIXME: All duplicated with non-local case.
00347       if (NonLocalDepCall && DT->properlyDominates(I->getBB(), C->getParent())){
00348         cdep = NonLocalDepCall;
00349         continue;
00350       }
00351 
00352       cdep = 0;
00353       break;
00354     }
00355 
00356     if (!cdep) {
00357       valueNumbering[C] = nextValueNumber;
00358       return nextValueNumber++;
00359     }
00360 
00361     if (cdep->getNumArgOperands() != C->getNumArgOperands()) {
00362       valueNumbering[C] = nextValueNumber;
00363       return nextValueNumber++;
00364     }
00365     for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
00366       uint32_t c_vn = lookup_or_add(C->getArgOperand(i));
00367       uint32_t cd_vn = lookup_or_add(cdep->getArgOperand(i));
00368       if (c_vn != cd_vn) {
00369         valueNumbering[C] = nextValueNumber;
00370         return nextValueNumber++;
00371       }
00372     }
00373 
00374     uint32_t v = lookup_or_add(cdep);
00375     valueNumbering[C] = v;
00376     return v;
00377 
00378   } else {
00379     valueNumbering[C] = nextValueNumber;
00380     return nextValueNumber++;
00381   }
00382 }
00383 
00384 /// lookup_or_add - Returns the value number for the specified value, assigning
00385 /// it a new number if it did not have one before.
00386 uint32_t ValueTable::lookup_or_add(Value *V) {
00387   DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
00388   if (VI != valueNumbering.end())
00389     return VI->second;
00390 
00391   if (!isa<Instruction>(V)) {
00392     valueNumbering[V] = nextValueNumber;
00393     return nextValueNumber++;
00394   }
00395 
00396   Instruction* I = cast<Instruction>(V);
00397   Expression exp;
00398   switch (I->getOpcode()) {
00399     case Instruction::Call:
00400       return lookup_or_add_call(cast<CallInst>(I));
00401     case Instruction::Add:
00402     case Instruction::FAdd:
00403     case Instruction::Sub:
00404     case Instruction::FSub:
00405     case Instruction::Mul:
00406     case Instruction::FMul:
00407     case Instruction::UDiv:
00408     case Instruction::SDiv:
00409     case Instruction::FDiv:
00410     case Instruction::URem:
00411     case Instruction::SRem:
00412     case Instruction::FRem:
00413     case Instruction::Shl:
00414     case Instruction::LShr:
00415     case Instruction::AShr:
00416     case Instruction::And:
00417     case Instruction::Or:
00418     case Instruction::Xor:
00419     case Instruction::ICmp:
00420     case Instruction::FCmp:
00421     case Instruction::Trunc:
00422     case Instruction::ZExt:
00423     case Instruction::SExt:
00424     case Instruction::FPToUI:
00425     case Instruction::FPToSI:
00426     case Instruction::UIToFP:
00427     case Instruction::SIToFP:
00428     case Instruction::FPTrunc:
00429     case Instruction::FPExt:
00430     case Instruction::PtrToInt:
00431     case Instruction::IntToPtr:
00432     case Instruction::BitCast:
00433     case Instruction::Select:
00434     case Instruction::ExtractElement:
00435     case Instruction::InsertElement:
00436     case Instruction::ShuffleVector:
00437     case Instruction::InsertValue:
00438     case Instruction::GetElementPtr:
00439       exp = create_expression(I);
00440       break;
00441     case Instruction::ExtractValue:
00442       exp = create_extractvalue_expression(cast<ExtractValueInst>(I));
00443       break;
00444     default:
00445       valueNumbering[V] = nextValueNumber;
00446       return nextValueNumber++;
00447   }
00448 
00449   uint32_t& e = expressionNumbering[exp];
00450   if (!e) e = nextValueNumber++;
00451   valueNumbering[V] = e;
00452   return e;
00453 }
00454 
00455 /// lookup - Returns the value number of the specified value. Fails if
00456 /// the value has not yet been numbered.
00457 uint32_t ValueTable::lookup(Value *V) const {
00458   DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V);
00459   assert(VI != valueNumbering.end() && "Value not numbered?");
00460   return VI->second;
00461 }
00462 
00463 /// lookup_or_add_cmp - Returns the value number of the given comparison,
00464 /// assigning it a new number if it did not have one before.  Useful when
00465 /// we deduced the result of a comparison, but don't immediately have an
00466 /// instruction realizing that comparison to hand.
00467 uint32_t ValueTable::lookup_or_add_cmp(unsigned Opcode,
00468                                        CmpInst::Predicate Predicate,
00469                                        Value *LHS, Value *RHS) {
00470   Expression exp = create_cmp_expression(Opcode, Predicate, LHS, RHS);
00471   uint32_t& e = expressionNumbering[exp];
00472   if (!e) e = nextValueNumber++;
00473   return e;
00474 }
00475 
00476 /// clear - Remove all entries from the ValueTable.
00477 void ValueTable::clear() {
00478   valueNumbering.clear();
00479   expressionNumbering.clear();
00480   nextValueNumber = 1;
00481 }
00482 
00483 /// erase - Remove a value from the value numbering.
00484 void ValueTable::erase(Value *V) {
00485   valueNumbering.erase(V);
00486 }
00487 
00488 /// verifyRemoved - Verify that the value is removed from all internal data
00489 /// structures.
00490 void ValueTable::verifyRemoved(const Value *V) const {
00491   for (DenseMap<Value*, uint32_t>::const_iterator
00492          I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) {
00493     assert(I->first != V && "Inst still occurs in value numbering map!");
00494   }
00495 }
00496 
00497 //===----------------------------------------------------------------------===//
00498 //                                GVN Pass
00499 //===----------------------------------------------------------------------===//
00500 
00501 namespace {
00502   class GVN;
00503   struct AvailableValueInBlock {
00504     /// BB - The basic block in question.
00505     BasicBlock *BB;
00506     enum ValType {
00507       SimpleVal,  // A simple offsetted value that is accessed.
00508       LoadVal,    // A value produced by a load.
00509       MemIntrin   // A memory intrinsic which is loaded from.
00510     };
00511   
00512     /// V - The value that is live out of the block.
00513     PointerIntPair<Value *, 2, ValType> Val;
00514   
00515     /// Offset - The byte offset in Val that is interesting for the load query.
00516     unsigned Offset;
00517   
00518     static AvailableValueInBlock get(BasicBlock *BB, Value *V,
00519                                      unsigned Offset = 0) {
00520       AvailableValueInBlock Res;
00521       Res.BB = BB;
00522       Res.Val.setPointer(V);
00523       Res.Val.setInt(SimpleVal);
00524       Res.Offset = Offset;
00525       return Res;
00526     }
00527   
00528     static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI,
00529                                        unsigned Offset = 0) {
00530       AvailableValueInBlock Res;
00531       Res.BB = BB;
00532       Res.Val.setPointer(MI);
00533       Res.Val.setInt(MemIntrin);
00534       Res.Offset = Offset;
00535       return Res;
00536     }
00537   
00538     static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI,
00539                                          unsigned Offset = 0) {
00540       AvailableValueInBlock Res;
00541       Res.BB = BB;
00542       Res.Val.setPointer(LI);
00543       Res.Val.setInt(LoadVal);
00544       Res.Offset = Offset;
00545       return Res;
00546     }
00547   
00548     bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
00549     bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
00550     bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
00551   
00552     Value *getSimpleValue() const {
00553       assert(isSimpleValue() && "Wrong accessor");
00554       return Val.getPointer();
00555     }
00556   
00557     LoadInst *getCoercedLoadValue() const {
00558       assert(isCoercedLoadValue() && "Wrong accessor");
00559       return cast<LoadInst>(Val.getPointer());
00560     }
00561   
00562     MemIntrinsic *getMemIntrinValue() const {
00563       assert(isMemIntrinValue() && "Wrong accessor");
00564       return cast<MemIntrinsic>(Val.getPointer());
00565     }
00566   
00567     /// MaterializeAdjustedValue - Emit code into this block to adjust the value
00568     /// defined here to the specified type.  This handles various coercion cases.
00569     Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const;
00570   };
00571 
00572   class GVN : public FunctionPass {
00573     bool NoLoads;
00574     MemoryDependenceAnalysis *MD;
00575     DominatorTree *DT;
00576     const DataLayout *TD;
00577     const TargetLibraryInfo *TLI;
00578 
00579     ValueTable VN;
00580 
00581     /// LeaderTable - A mapping from value numbers to lists of Value*'s that
00582     /// have that value number.  Use findLeader to query it.
00583     struct LeaderTableEntry {
00584       Value *Val;
00585       const BasicBlock *BB;
00586       LeaderTableEntry *Next;
00587     };
00588     DenseMap<uint32_t, LeaderTableEntry> LeaderTable;
00589     BumpPtrAllocator TableAllocator;
00590 
00591     SmallVector<Instruction*, 8> InstrsToErase;
00592 
00593     typedef SmallVector<NonLocalDepResult, 64> LoadDepVect;
00594     typedef SmallVector<AvailableValueInBlock, 64> AvailValInBlkVect;
00595     typedef SmallVector<BasicBlock*, 64> UnavailBlkVect;
00596 
00597   public:
00598     static char ID; // Pass identification, replacement for typeid
00599     explicit GVN(bool noloads = false)
00600         : FunctionPass(ID), NoLoads(noloads), MD(0) {
00601       initializeGVNPass(*PassRegistry::getPassRegistry());
00602     }
00603 
00604     bool runOnFunction(Function &F);
00605 
00606     /// markInstructionForDeletion - This removes the specified instruction from
00607     /// our various maps and marks it for deletion.
00608     void markInstructionForDeletion(Instruction *I) {
00609       VN.erase(I);
00610       InstrsToErase.push_back(I);
00611     }
00612 
00613     const DataLayout *getDataLayout() const { return TD; }
00614     DominatorTree &getDominatorTree() const { return *DT; }
00615     AliasAnalysis *getAliasAnalysis() const { return VN.getAliasAnalysis(); }
00616     MemoryDependenceAnalysis &getMemDep() const { return *MD; }
00617   private:
00618     /// addToLeaderTable - Push a new Value to the LeaderTable onto the list for
00619     /// its value number.
00620     void addToLeaderTable(uint32_t N, Value *V, const BasicBlock *BB) {
00621       LeaderTableEntry &Curr = LeaderTable[N];
00622       if (!Curr.Val) {
00623         Curr.Val = V;
00624         Curr.BB = BB;
00625         return;
00626       }
00627 
00628       LeaderTableEntry *Node = TableAllocator.Allocate<LeaderTableEntry>();
00629       Node->Val = V;
00630       Node->BB = BB;
00631       Node->Next = Curr.Next;
00632       Curr.Next = Node;
00633     }
00634 
00635     /// removeFromLeaderTable - Scan the list of values corresponding to a given
00636     /// value number, and remove the given instruction if encountered.
00637     void removeFromLeaderTable(uint32_t N, Instruction *I, BasicBlock *BB) {
00638       LeaderTableEntry* Prev = 0;
00639       LeaderTableEntry* Curr = &LeaderTable[N];
00640 
00641       while (Curr->Val != I || Curr->BB != BB) {
00642         Prev = Curr;
00643         Curr = Curr->Next;
00644       }
00645 
00646       if (Prev) {
00647         Prev->Next = Curr->Next;
00648       } else {
00649         if (!Curr->Next) {
00650           Curr->Val = 0;
00651           Curr->BB = 0;
00652         } else {
00653           LeaderTableEntry* Next = Curr->Next;
00654           Curr->Val = Next->Val;
00655           Curr->BB = Next->BB;
00656           Curr->Next = Next->Next;
00657         }
00658       }
00659     }
00660 
00661     // List of critical edges to be split between iterations.
00662     SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit;
00663 
00664     // This transformation requires dominator postdominator info
00665     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
00666       AU.addRequired<DominatorTree>();
00667       AU.addRequired<TargetLibraryInfo>();
00668       if (!NoLoads)
00669         AU.addRequired<MemoryDependenceAnalysis>();
00670       AU.addRequired<AliasAnalysis>();
00671 
00672       AU.addPreserved<DominatorTree>();
00673       AU.addPreserved<AliasAnalysis>();
00674     }
00675 
00676 
00677     // Helper fuctions of redundant load elimination 
00678     bool processLoad(LoadInst *L);
00679     bool processNonLocalLoad(LoadInst *L);
00680     void AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, 
00681                                  AvailValInBlkVect &ValuesPerBlock,
00682                                  UnavailBlkVect &UnavailableBlocks);
00683     bool PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, 
00684                         UnavailBlkVect &UnavailableBlocks);
00685 
00686     // Other helper routines
00687     bool processInstruction(Instruction *I);
00688     bool processBlock(BasicBlock *BB);
00689     void dump(DenseMap<uint32_t, Value*> &d);
00690     bool iterateOnFunction(Function &F);
00691     bool performPRE(Function &F);
00692     Value *findLeader(const BasicBlock *BB, uint32_t num);
00693     void cleanupGlobalSets();
00694     void verifyRemoved(const Instruction *I) const;
00695     bool splitCriticalEdges();
00696     BasicBlock *splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ);
00697     unsigned replaceAllDominatedUsesWith(Value *From, Value *To,
00698                                          const BasicBlockEdge &Root);
00699     bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root);
00700   };
00701 
00702   char GVN::ID = 0;
00703 }
00704 
00705 // createGVNPass - The public interface to this file...
00706 FunctionPass *llvm::createGVNPass(bool NoLoads) {
00707   return new GVN(NoLoads);
00708 }
00709 
00710 INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false)
00711 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
00712 INITIALIZE_PASS_DEPENDENCY(DominatorTree)
00713 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
00714 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
00715 INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false)
00716 
00717 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
00718 void GVN::dump(DenseMap<uint32_t, Value*>& d) {
00719   errs() << "{\n";
00720   for (DenseMap<uint32_t, Value*>::iterator I = d.begin(),
00721        E = d.end(); I != E; ++I) {
00722       errs() << I->first << "\n";
00723       I->second->dump();
00724   }
00725   errs() << "}\n";
00726 }
00727 #endif
00728 
00729 /// IsValueFullyAvailableInBlock - Return true if we can prove that the value
00730 /// we're analyzing is fully available in the specified block.  As we go, keep
00731 /// track of which blocks we know are fully alive in FullyAvailableBlocks.  This
00732 /// map is actually a tri-state map with the following values:
00733 ///   0) we know the block *is not* fully available.
00734 ///   1) we know the block *is* fully available.
00735 ///   2) we do not know whether the block is fully available or not, but we are
00736 ///      currently speculating that it will be.
00737 ///   3) we are speculating for this block and have used that to speculate for
00738 ///      other blocks.
00739 static bool IsValueFullyAvailableInBlock(BasicBlock *BB,
00740                             DenseMap<BasicBlock*, char> &FullyAvailableBlocks,
00741                             uint32_t RecurseDepth) {
00742   if (RecurseDepth > MaxRecurseDepth)
00743     return false;
00744 
00745   // Optimistically assume that the block is fully available and check to see
00746   // if we already know about this block in one lookup.
00747   std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV =
00748     FullyAvailableBlocks.insert(std::make_pair(BB, 2));
00749 
00750   // If the entry already existed for this block, return the precomputed value.
00751   if (!IV.second) {
00752     // If this is a speculative "available" value, mark it as being used for
00753     // speculation of other blocks.
00754     if (IV.first->second == 2)
00755       IV.first->second = 3;
00756     return IV.first->second != 0;
00757   }
00758 
00759   // Otherwise, see if it is fully available in all predecessors.
00760   pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
00761 
00762   // If this block has no predecessors, it isn't live-in here.
00763   if (PI == PE)
00764     goto SpeculationFailure;
00765 
00766   for (; PI != PE; ++PI)
00767     // If the value isn't fully available in one of our predecessors, then it
00768     // isn't fully available in this block either.  Undo our previous
00769     // optimistic assumption and bail out.
00770     if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks,RecurseDepth+1))
00771       goto SpeculationFailure;
00772 
00773   return true;
00774 
00775 // SpeculationFailure - If we get here, we found out that this is not, after
00776 // all, a fully-available block.  We have a problem if we speculated on this and
00777 // used the speculation to mark other blocks as available.
00778 SpeculationFailure:
00779   char &BBVal = FullyAvailableBlocks[BB];
00780 
00781   // If we didn't speculate on this, just return with it set to false.
00782   if (BBVal == 2) {
00783     BBVal = 0;
00784     return false;
00785   }
00786 
00787   // If we did speculate on this value, we could have blocks set to 1 that are
00788   // incorrect.  Walk the (transitive) successors of this block and mark them as
00789   // 0 if set to one.
00790   SmallVector<BasicBlock*, 32> BBWorklist;
00791   BBWorklist.push_back(BB);
00792 
00793   do {
00794     BasicBlock *Entry = BBWorklist.pop_back_val();
00795     // Note that this sets blocks to 0 (unavailable) if they happen to not
00796     // already be in FullyAvailableBlocks.  This is safe.
00797     char &EntryVal = FullyAvailableBlocks[Entry];
00798     if (EntryVal == 0) continue;  // Already unavailable.
00799 
00800     // Mark as unavailable.
00801     EntryVal = 0;
00802 
00803     for (succ_iterator I = succ_begin(Entry), E = succ_end(Entry); I != E; ++I)
00804       BBWorklist.push_back(*I);
00805   } while (!BBWorklist.empty());
00806 
00807   return false;
00808 }
00809 
00810 
00811 /// CanCoerceMustAliasedValueToLoad - Return true if
00812 /// CoerceAvailableValueToLoadType will succeed.
00813 static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
00814                                             Type *LoadTy,
00815                                             const DataLayout &TD) {
00816   // If the loaded or stored value is an first class array or struct, don't try
00817   // to transform them.  We need to be able to bitcast to integer.
00818   if (LoadTy->isStructTy() || LoadTy->isArrayTy() ||
00819       StoredVal->getType()->isStructTy() ||
00820       StoredVal->getType()->isArrayTy())
00821     return false;
00822 
00823   // The store has to be at least as big as the load.
00824   if (TD.getTypeSizeInBits(StoredVal->getType()) <
00825         TD.getTypeSizeInBits(LoadTy))
00826     return false;
00827 
00828   return true;
00829 }
00830 
00831 /// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and
00832 /// then a load from a must-aliased pointer of a different type, try to coerce
00833 /// the stored value.  LoadedTy is the type of the load we want to replace and
00834 /// InsertPt is the place to insert new instructions.
00835 ///
00836 /// If we can't do it, return null.
00837 static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
00838                                              Type *LoadedTy,
00839                                              Instruction *InsertPt,
00840                                              const DataLayout &TD) {
00841   if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD))
00842     return 0;
00843 
00844   // If this is already the right type, just return it.
00845   Type *StoredValTy = StoredVal->getType();
00846 
00847   uint64_t StoreSize = TD.getTypeSizeInBits(StoredValTy);
00848   uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy);
00849 
00850   // If the store and reload are the same size, we can always reuse it.
00851   if (StoreSize == LoadSize) {
00852     // Pointer to Pointer -> use bitcast.
00853     if (StoredValTy->getScalarType()->isPointerTy() &&
00854         LoadedTy->getScalarType()->isPointerTy())
00855       return new BitCastInst(StoredVal, LoadedTy, "", InsertPt);
00856 
00857     // Convert source pointers to integers, which can be bitcast.
00858     if (StoredValTy->getScalarType()->isPointerTy()) {
00859       StoredValTy = TD.getIntPtrType(StoredValTy);
00860       StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
00861     }
00862 
00863     Type *TypeToCastTo = LoadedTy;
00864     if (TypeToCastTo->getScalarType()->isPointerTy())
00865       TypeToCastTo = TD.getIntPtrType(TypeToCastTo);
00866 
00867     if (StoredValTy != TypeToCastTo)
00868       StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt);
00869 
00870     // Cast to pointer if the load needs a pointer type.
00871     if (LoadedTy->getScalarType()->isPointerTy())
00872       StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt);
00873 
00874     return StoredVal;
00875   }
00876 
00877   // If the loaded value is smaller than the available value, then we can
00878   // extract out a piece from it.  If the available value is too small, then we
00879   // can't do anything.
00880   assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail");
00881 
00882   // Convert source pointers to integers, which can be manipulated.
00883   if (StoredValTy->getScalarType()->isPointerTy()) {
00884     StoredValTy = TD.getIntPtrType(StoredValTy);
00885     StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
00886   }
00887 
00888   // Convert vectors and fp to integer, which can be manipulated.
00889   if (!StoredValTy->isIntegerTy()) {
00890     StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize);
00891     StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt);
00892   }
00893 
00894   // If this is a big-endian system, we need to shift the value down to the low
00895   // bits so that a truncate will work.
00896   if (TD.isBigEndian()) {
00897     Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize);
00898     StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt);
00899   }
00900 
00901   // Truncate the integer to the right size now.
00902   Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize);
00903   StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt);
00904 
00905   if (LoadedTy == NewIntTy)
00906     return StoredVal;
00907 
00908   // If the result is a pointer, inttoptr.
00909   if (LoadedTy->getScalarType()->isPointerTy())
00910     return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt);
00911 
00912   // Otherwise, bitcast.
00913   return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt);
00914 }
00915 
00916 /// AnalyzeLoadFromClobberingWrite - This function is called when we have a
00917 /// memdep query of a load that ends up being a clobbering memory write (store,
00918 /// memset, memcpy, memmove).  This means that the write *may* provide bits used
00919 /// by the load but we can't be sure because the pointers don't mustalias.
00920 ///
00921 /// Check this case to see if there is anything more we can do before we give
00922 /// up.  This returns -1 if we have to give up, or a byte number in the stored
00923 /// value of the piece that feeds the load.
00924 static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
00925                                           Value *WritePtr,
00926                                           uint64_t WriteSizeInBits,
00927                                           const DataLayout &TD) {
00928   // If the loaded or stored value is a first class array or struct, don't try
00929   // to transform them.  We need to be able to bitcast to integer.
00930   if (LoadTy->isStructTy() || LoadTy->isArrayTy())
00931     return -1;
00932 
00933   int64_t StoreOffset = 0, LoadOffset = 0;
00934   Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr,StoreOffset,&TD);
00935   Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, &TD);
00936   if (StoreBase != LoadBase)
00937     return -1;
00938 
00939   // If the load and store are to the exact same address, they should have been
00940   // a must alias.  AA must have gotten confused.
00941   // FIXME: Study to see if/when this happens.  One case is forwarding a memset
00942   // to a load from the base of the memset.
00943 #if 0
00944   if (LoadOffset == StoreOffset) {
00945     dbgs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n"
00946     << "Base       = " << *StoreBase << "\n"
00947     << "Store Ptr  = " << *WritePtr << "\n"
00948     << "Store Offs = " << StoreOffset << "\n"
00949     << "Load Ptr   = " << *LoadPtr << "\n";
00950     abort();
00951   }
00952 #endif
00953 
00954   // If the load and store don't overlap at all, the store doesn't provide
00955   // anything to the load.  In this case, they really don't alias at all, AA
00956   // must have gotten confused.
00957   uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy);
00958 
00959   if ((WriteSizeInBits & 7) | (LoadSize & 7))
00960     return -1;
00961   uint64_t StoreSize = WriteSizeInBits >> 3;  // Convert to bytes.
00962   LoadSize >>= 3;
00963 
00964 
00965   bool isAAFailure = false;
00966   if (StoreOffset < LoadOffset)
00967     isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset;
00968   else
00969     isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset;
00970 
00971   if (isAAFailure) {
00972 #if 0
00973     dbgs() << "STORE LOAD DEP WITH COMMON BASE:\n"
00974     << "Base       = " << *StoreBase << "\n"
00975     << "Store Ptr  = " << *WritePtr << "\n"
00976     << "Store Offs = " << StoreOffset << "\n"
00977     << "Load Ptr   = " << *LoadPtr << "\n";
00978     abort();
00979 #endif
00980     return -1;
00981   }
00982 
00983   // If the Load isn't completely contained within the stored bits, we don't
00984   // have all the bits to feed it.  We could do something crazy in the future
00985   // (issue a smaller load then merge the bits in) but this seems unlikely to be
00986   // valuable.
00987   if (StoreOffset > LoadOffset ||
00988       StoreOffset+StoreSize < LoadOffset+LoadSize)
00989     return -1;
00990 
00991   // Okay, we can do this transformation.  Return the number of bytes into the
00992   // store that the load is.
00993   return LoadOffset-StoreOffset;
00994 }
00995 
00996 /// AnalyzeLoadFromClobberingStore - This function is called when we have a
00997 /// memdep query of a load that ends up being a clobbering store.
00998 static int AnalyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
00999                                           StoreInst *DepSI,
01000                                           const DataLayout &TD) {
01001   // Cannot handle reading from store of first-class aggregate yet.
01002   if (DepSI->getValueOperand()->getType()->isStructTy() ||
01003       DepSI->getValueOperand()->getType()->isArrayTy())
01004     return -1;
01005 
01006   Value *StorePtr = DepSI->getPointerOperand();
01007   uint64_t StoreSize =TD.getTypeSizeInBits(DepSI->getValueOperand()->getType());
01008   return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
01009                                         StorePtr, StoreSize, TD);
01010 }
01011 
01012 /// AnalyzeLoadFromClobberingLoad - This function is called when we have a
01013 /// memdep query of a load that ends up being clobbered by another load.  See if
01014 /// the other load can feed into the second load.
01015 static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr,
01016                                          LoadInst *DepLI, const DataLayout &TD){
01017   // Cannot handle reading from store of first-class aggregate yet.
01018   if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy())
01019     return -1;
01020 
01021   Value *DepPtr = DepLI->getPointerOperand();
01022   uint64_t DepSize = TD.getTypeSizeInBits(DepLI->getType());
01023   int R = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, TD);
01024   if (R != -1) return R;
01025 
01026   // If we have a load/load clobber an DepLI can be widened to cover this load,
01027   // then we should widen it!
01028   int64_t LoadOffs = 0;
01029   const Value *LoadBase =
01030     GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, &TD);
01031   unsigned LoadSize = TD.getTypeStoreSize(LoadTy);
01032 
01033   unsigned Size = MemoryDependenceAnalysis::
01034     getLoadLoadClobberFullWidthSize(LoadBase, LoadOffs, LoadSize, DepLI, TD);
01035   if (Size == 0) return -1;
01036 
01037   return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size*8, TD);
01038 }
01039 
01040 
01041 
01042 static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
01043                                             MemIntrinsic *MI,
01044                                             const DataLayout &TD) {
01045   // If the mem operation is a non-constant size, we can't handle it.
01046   ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength());
01047   if (SizeCst == 0) return -1;
01048   uint64_t MemSizeInBits = SizeCst->getZExtValue()*8;
01049 
01050   // If this is memset, we just need to see if the offset is valid in the size
01051   // of the memset..
01052   if (MI->getIntrinsicID() == Intrinsic::memset)
01053     return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
01054                                           MemSizeInBits, TD);
01055 
01056   // If we have a memcpy/memmove, the only case we can handle is if this is a
01057   // copy from constant memory.  In that case, we can read directly from the
01058   // constant memory.
01059   MemTransferInst *MTI = cast<MemTransferInst>(MI);
01060 
01061   Constant *Src = dyn_cast<Constant>(MTI->getSource());
01062   if (Src == 0) return -1;
01063 
01064   GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, &TD));
01065   if (GV == 0 || !GV->isConstant()) return -1;
01066 
01067   // See if the access is within the bounds of the transfer.
01068   int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
01069                                               MI->getDest(), MemSizeInBits, TD);
01070   if (Offset == -1)
01071     return Offset;
01072 
01073   // Otherwise, see if we can constant fold a load from the constant with the
01074   // offset applied as appropriate.
01075   Src = ConstantExpr::getBitCast(Src,
01076                                  llvm::Type::getInt8PtrTy(Src->getContext()));
01077   Constant *OffsetCst =
01078     ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
01079   Src = ConstantExpr::getGetElementPtr(Src, OffsetCst);
01080   Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
01081   if (ConstantFoldLoadFromConstPtr(Src, &TD))
01082     return Offset;
01083   return -1;
01084 }
01085 
01086 
01087 /// GetStoreValueForLoad - This function is called when we have a
01088 /// memdep query of a load that ends up being a clobbering store.  This means
01089 /// that the store provides bits used by the load but we the pointers don't
01090 /// mustalias.  Check this case to see if there is anything more we can do
01091 /// before we give up.
01092 static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
01093                                    Type *LoadTy,
01094                                    Instruction *InsertPt, const DataLayout &TD){
01095   LLVMContext &Ctx = SrcVal->getType()->getContext();
01096 
01097   uint64_t StoreSize = (TD.getTypeSizeInBits(SrcVal->getType()) + 7) / 8;
01098   uint64_t LoadSize = (TD.getTypeSizeInBits(LoadTy) + 7) / 8;
01099 
01100   IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
01101 
01102   // Compute which bits of the stored value are being used by the load.  Convert
01103   // to an integer type to start with.
01104   if (SrcVal->getType()->getScalarType()->isPointerTy())
01105     SrcVal = Builder.CreatePtrToInt(SrcVal,
01106         TD.getIntPtrType(SrcVal->getType()));
01107   if (!SrcVal->getType()->isIntegerTy())
01108     SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8));
01109 
01110   // Shift the bits to the least significant depending on endianness.
01111   unsigned ShiftAmt;
01112   if (TD.isLittleEndian())
01113     ShiftAmt = Offset*8;
01114   else
01115     ShiftAmt = (StoreSize-LoadSize-Offset)*8;
01116 
01117   if (ShiftAmt)
01118     SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt);
01119 
01120   if (LoadSize != StoreSize)
01121     SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8));
01122 
01123   return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD);
01124 }
01125 
01126 /// GetLoadValueForLoad - This function is called when we have a
01127 /// memdep query of a load that ends up being a clobbering load.  This means
01128 /// that the load *may* provide bits used by the load but we can't be sure
01129 /// because the pointers don't mustalias.  Check this case to see if there is
01130 /// anything more we can do before we give up.
01131 static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
01132                                   Type *LoadTy, Instruction *InsertPt,
01133                                   GVN &gvn) {
01134   const DataLayout &TD = *gvn.getDataLayout();
01135   // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to
01136   // widen SrcVal out to a larger load.
01137   unsigned SrcValSize = TD.getTypeStoreSize(SrcVal->getType());
01138   unsigned LoadSize = TD.getTypeStoreSize(LoadTy);
01139   if (Offset+LoadSize > SrcValSize) {
01140     assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!");
01141     assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load");
01142     // If we have a load/load clobber an DepLI can be widened to cover this
01143     // load, then we should widen it to the next power of 2 size big enough!
01144     unsigned NewLoadSize = Offset+LoadSize;
01145     if (!isPowerOf2_32(NewLoadSize))
01146       NewLoadSize = NextPowerOf2(NewLoadSize);
01147 
01148     Value *PtrVal = SrcVal->getPointerOperand();
01149 
01150     // Insert the new load after the old load.  This ensures that subsequent
01151     // memdep queries will find the new load.  We can't easily remove the old
01152     // load completely because it is already in the value numbering table.
01153     IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal));
01154     Type *DestPTy =
01155       IntegerType::get(LoadTy->getContext(), NewLoadSize*8);
01156     DestPTy = PointerType::get(DestPTy,
01157                        cast<PointerType>(PtrVal->getType())->getAddressSpace());
01158     Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc());
01159     PtrVal = Builder.CreateBitCast(PtrVal, DestPTy);
01160     LoadInst *NewLoad = Builder.CreateLoad(PtrVal);
01161     NewLoad->takeName(SrcVal);
01162     NewLoad->setAlignment(SrcVal->getAlignment());
01163 
01164     DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n");
01165     DEBUG(dbgs() << "TO: " << *NewLoad << "\n");
01166 
01167     // Replace uses of the original load with the wider load.  On a big endian
01168     // system, we need to shift down to get the relevant bits.
01169     Value *RV = NewLoad;
01170     if (TD.isBigEndian())
01171       RV = Builder.CreateLShr(RV,
01172                     NewLoadSize*8-SrcVal->getType()->getPrimitiveSizeInBits());
01173     RV = Builder.CreateTrunc(RV, SrcVal->getType());
01174     SrcVal->replaceAllUsesWith(RV);
01175 
01176     // We would like to use gvn.markInstructionForDeletion here, but we can't
01177     // because the load is already memoized into the leader map table that GVN
01178     // tracks.  It is potentially possible to remove the load from the table,
01179     // but then there all of the operations based on it would need to be
01180     // rehashed.  Just leave the dead load around.
01181     gvn.getMemDep().removeInstruction(SrcVal);
01182     SrcVal = NewLoad;
01183   }
01184 
01185   return GetStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, TD);
01186 }
01187 
01188 
01189 /// GetMemInstValueForLoad - This function is called when we have a
01190 /// memdep query of a load that ends up being a clobbering mem intrinsic.
01191 static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
01192                                      Type *LoadTy, Instruction *InsertPt,
01193                                      const DataLayout &TD){
01194   LLVMContext &Ctx = LoadTy->getContext();
01195   uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8;
01196 
01197   IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
01198 
01199   // We know that this method is only called when the mem transfer fully
01200   // provides the bits for the load.
01201   if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) {
01202     // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and
01203     // independently of what the offset is.
01204     Value *Val = MSI->getValue();
01205     if (LoadSize != 1)
01206       Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8));
01207 
01208     Value *OneElt = Val;
01209 
01210     // Splat the value out to the right number of bits.
01211     for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) {
01212       // If we can double the number of bytes set, do it.
01213       if (NumBytesSet*2 <= LoadSize) {
01214         Value *ShVal = Builder.CreateShl(Val, NumBytesSet*8);
01215         Val = Builder.CreateOr(Val, ShVal);
01216         NumBytesSet <<= 1;
01217         continue;
01218       }
01219 
01220       // Otherwise insert one byte at a time.
01221       Value *ShVal = Builder.CreateShl(Val, 1*8);
01222       Val = Builder.CreateOr(OneElt, ShVal);
01223       ++NumBytesSet;
01224     }
01225 
01226     return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, TD);
01227   }
01228 
01229   // Otherwise, this is a memcpy/memmove from a constant global.
01230   MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
01231   Constant *Src = cast<Constant>(MTI->getSource());
01232 
01233   // Otherwise, see if we can constant fold a load from the constant with the
01234   // offset applied as appropriate.
01235   Src = ConstantExpr::getBitCast(Src,
01236                                  llvm::Type::getInt8PtrTy(Src->getContext()));
01237   Constant *OffsetCst =
01238   ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
01239   Src = ConstantExpr::getGetElementPtr(Src, OffsetCst);
01240   Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
01241   return ConstantFoldLoadFromConstPtr(Src, &TD);
01242 }
01243 
01244 
01245 /// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
01246 /// construct SSA form, allowing us to eliminate LI.  This returns the value
01247 /// that should be used at LI's definition site.
01248 static Value *ConstructSSAForLoadSet(LoadInst *LI,
01249                          SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock,
01250                                      GVN &gvn) {
01251   // Check for the fully redundant, dominating load case.  In this case, we can
01252   // just use the dominating value directly.
01253   if (ValuesPerBlock.size() == 1 &&
01254       gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB,
01255                                                LI->getParent()))
01256     return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), gvn);
01257 
01258   // Otherwise, we have to construct SSA form.
01259   SmallVector<PHINode*, 8> NewPHIs;
01260   SSAUpdater SSAUpdate(&NewPHIs);
01261   SSAUpdate.Initialize(LI->getType(), LI->getName());
01262 
01263   Type *LoadTy = LI->getType();
01264 
01265   for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
01266     const AvailableValueInBlock &AV = ValuesPerBlock[i];
01267     BasicBlock *BB = AV.BB;
01268 
01269     if (SSAUpdate.HasValueForBlock(BB))
01270       continue;
01271 
01272     SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, gvn));
01273   }
01274 
01275   // Perform PHI construction.
01276   Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent());
01277 
01278   // If new PHI nodes were created, notify alias analysis.
01279   if (V->getType()->getScalarType()->isPointerTy()) {
01280     AliasAnalysis *AA = gvn.getAliasAnalysis();
01281 
01282     for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
01283       AA->copyValue(LI, NewPHIs[i]);
01284 
01285     // Now that we've copied information to the new PHIs, scan through
01286     // them again and inform alias analysis that we've added potentially
01287     // escaping uses to any values that are operands to these PHIs.
01288     for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) {
01289       PHINode *P = NewPHIs[i];
01290       for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii) {
01291         unsigned jj = PHINode::getOperandNumForIncomingValue(ii);
01292         AA->addEscapingUse(P->getOperandUse(jj));
01293       }
01294     }
01295   }
01296 
01297   return V;
01298 }
01299 
01300 Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const {
01301   Value *Res;
01302   if (isSimpleValue()) {
01303     Res = getSimpleValue();
01304     if (Res->getType() != LoadTy) {
01305       const DataLayout *TD = gvn.getDataLayout();
01306       assert(TD && "Need target data to handle type mismatch case");
01307       Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
01308                                  *TD);
01309   
01310       DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << "  "
01311                    << *getSimpleValue() << '\n'
01312                    << *Res << '\n' << "\n\n\n");
01313     }
01314   } else if (isCoercedLoadValue()) {
01315     LoadInst *Load = getCoercedLoadValue();
01316     if (Load->getType() == LoadTy && Offset == 0) {
01317       Res = Load;
01318     } else {
01319       Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(),
01320                                 gvn);
01321   
01322       DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << "  "
01323                    << *getCoercedLoadValue() << '\n'
01324                    << *Res << '\n' << "\n\n\n");
01325     }
01326   } else {
01327     const DataLayout *TD = gvn.getDataLayout();
01328     assert(TD && "Need target data to handle type mismatch case");
01329     Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
01330                                  LoadTy, BB->getTerminator(), *TD);
01331     DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
01332                  << "  " << *getMemIntrinValue() << '\n'
01333                  << *Res << '\n' << "\n\n\n");
01334   }
01335   return Res;
01336 }
01337 
01338 static bool isLifetimeStart(const Instruction *Inst) {
01339   if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst))
01340     return II->getIntrinsicID() == Intrinsic::lifetime_start;
01341   return false;
01342 }
01343 
01344 void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, 
01345                                   AvailValInBlkVect &ValuesPerBlock,
01346                                   UnavailBlkVect &UnavailableBlocks) {
01347 
01348   // Filter out useless results (non-locals, etc).  Keep track of the blocks
01349   // where we have a value available in repl, also keep track of whether we see
01350   // dependencies that produce an unknown value for the load (such as a call
01351   // that could potentially clobber the load).
01352   unsigned NumDeps = Deps.size();
01353   for (unsigned i = 0, e = NumDeps; i != e; ++i) {
01354     BasicBlock *DepBB = Deps[i].getBB();
01355     MemDepResult DepInfo = Deps[i].getResult();
01356 
01357     if (!DepInfo.isDef() && !DepInfo.isClobber()) {
01358       UnavailableBlocks.push_back(DepBB);
01359       continue;
01360     }
01361 
01362     if (DepInfo.isClobber()) {
01363       // The address being loaded in this non-local block may not be the same as
01364       // the pointer operand of the load if PHI translation occurs.  Make sure
01365       // to consider the right address.
01366       Value *Address = Deps[i].getAddress();
01367 
01368       // If the dependence is to a store that writes to a superset of the bits
01369       // read by the load, we can extract the bits we need for the load from the
01370       // stored value.
01371       if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) {
01372         if (TD && Address) {
01373           int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address,
01374                                                       DepSI, *TD);
01375           if (Offset != -1) {
01376             ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
01377                                                        DepSI->getValueOperand(),
01378                                                                 Offset));
01379             continue;
01380           }
01381         }
01382       }
01383 
01384       // Check to see if we have something like this:
01385       //    load i32* P
01386       //    load i8* (P+1)
01387       // if we have this, replace the later with an extraction from the former.
01388       if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInfo.getInst())) {
01389         // If this is a clobber and L is the first instruction in its block, then
01390         // we have the first instruction in the entry block.
01391         if (DepLI != LI && Address && TD) {
01392           int Offset = AnalyzeLoadFromClobberingLoad(LI->getType(),
01393                                                      LI->getPointerOperand(),
01394                                                      DepLI, *TD);
01395 
01396           if (Offset != -1) {
01397             ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB,DepLI,
01398                                                                     Offset));
01399             continue;
01400           }
01401         }
01402       }
01403 
01404       // If the clobbering value is a memset/memcpy/memmove, see if we can
01405       // forward a value on from it.
01406       if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
01407         if (TD && Address) {
01408           int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address,
01409                                                         DepMI, *TD);
01410           if (Offset != -1) {
01411             ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI,
01412                                                                   Offset));
01413             continue;
01414           }
01415         }
01416       }
01417 
01418       UnavailableBlocks.push_back(DepBB);
01419       continue;
01420     }
01421 
01422     // DepInfo.isDef() here
01423 
01424     Instruction *DepInst = DepInfo.getInst();
01425 
01426     // Loading the allocation -> undef.
01427     if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst, TLI) ||
01428         // Loading immediately after lifetime begin -> undef.
01429         isLifetimeStart(DepInst)) {
01430       ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
01431                                              UndefValue::get(LI->getType())));
01432       continue;
01433     }
01434 
01435     if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
01436       // Reject loads and stores that are to the same address but are of
01437       // different types if we have to.
01438       if (S->getValueOperand()->getType() != LI->getType()) {
01439         // If the stored value is larger or equal to the loaded value, we can
01440         // reuse it.
01441         if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(),
01442                                                         LI->getType(), *TD)) {
01443           UnavailableBlocks.push_back(DepBB);
01444           continue;
01445         }
01446       }
01447 
01448       ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
01449                                                          S->getValueOperand()));
01450       continue;
01451     }
01452 
01453     if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) {
01454       // If the types mismatch and we can't handle it, reject reuse of the load.
01455       if (LD->getType() != LI->getType()) {
01456         // If the stored value is larger or equal to the loaded value, we can
01457         // reuse it.
01458         if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){
01459           UnavailableBlocks.push_back(DepBB);
01460           continue;
01461         }
01462       }
01463       ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB, LD));
01464       continue;
01465     }
01466 
01467     UnavailableBlocks.push_back(DepBB);
01468   }
01469 }
01470 
01471 bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, 
01472                          UnavailBlkVect &UnavailableBlocks) {
01473   // Okay, we have *some* definitions of the value.  This means that the value
01474   // is available in some of our (transitive) predecessors.  Lets think about
01475   // doing PRE of this load.  This will involve inserting a new load into the
01476   // predecessor when it's not available.  We could do this in general, but
01477   // prefer to not increase code size.  As such, we only do this when we know
01478   // that we only have to insert *one* load (which means we're basically moving
01479   // the load, not inserting a new one).
01480 
01481   SmallPtrSet<BasicBlock *, 4> Blockers;
01482   for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
01483     Blockers.insert(UnavailableBlocks[i]);
01484 
01485   // Let's find the first basic block with more than one predecessor.  Walk
01486   // backwards through predecessors if needed.
01487   BasicBlock *LoadBB = LI->getParent();
01488   BasicBlock *TmpBB = LoadBB;
01489 
01490   while (TmpBB->getSinglePredecessor()) {
01491     TmpBB = TmpBB->getSinglePredecessor();
01492     if (TmpBB == LoadBB) // Infinite (unreachable) loop.
01493       return false;
01494     if (Blockers.count(TmpBB))
01495       return false;
01496 
01497     // If any of these blocks has more than one successor (i.e. if the edge we
01498     // just traversed was critical), then there are other paths through this
01499     // block along which the load may not be anticipated.  Hoisting the load
01500     // above this block would be adding the load to execution paths along
01501     // which it was not previously executed.
01502     if (TmpBB->getTerminator()->getNumSuccessors() != 1)
01503       return false;
01504   }
01505 
01506   assert(TmpBB);
01507   LoadBB = TmpBB;
01508 
01509   // Check to see how many predecessors have the loaded value fully
01510   // available.
01511   DenseMap<BasicBlock*, Value*> PredLoads;
01512   DenseMap<BasicBlock*, char> FullyAvailableBlocks;
01513   for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
01514     FullyAvailableBlocks[ValuesPerBlock[i].BB] = true;
01515   for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
01516     FullyAvailableBlocks[UnavailableBlocks[i]] = false;
01517 
01518   SmallVector<BasicBlock *, 4> CriticalEdgePred;
01519   for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB);
01520        PI != E; ++PI) {
01521     BasicBlock *Pred = *PI;
01522     if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks, 0)) {
01523       continue;
01524     }
01525     PredLoads[Pred] = 0;
01526 
01527     if (Pred->getTerminator()->getNumSuccessors() != 1) {
01528       if (isa<IndirectBrInst>(Pred->getTerminator())) {
01529         DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '"
01530               << Pred->getName() << "': " << *LI << '\n');
01531         return false;
01532       }
01533 
01534       if (LoadBB->isLandingPad()) {
01535         DEBUG(dbgs()
01536               << "COULD NOT PRE LOAD BECAUSE OF LANDING PAD CRITICAL EDGE '"
01537               << Pred->getName() << "': " << *LI << '\n');
01538         return false;
01539       }
01540 
01541       CriticalEdgePred.push_back(Pred);
01542     }
01543   }
01544 
01545   // Decide whether PRE is profitable for this load.
01546   unsigned NumUnavailablePreds = PredLoads.size();
01547   assert(NumUnavailablePreds != 0 &&
01548          "Fully available value should already be eliminated!");
01549 
01550   // If this load is unavailable in multiple predecessors, reject it.
01551   // FIXME: If we could restructure the CFG, we could make a common pred with
01552   // all the preds that don't have an available LI and insert a new load into
01553   // that one block.
01554   if (NumUnavailablePreds != 1)
01555       return false;
01556 
01557   // Split critical edges, and update the unavailable predecessors accordingly.
01558   for (SmallVector<BasicBlock *, 4>::iterator I = CriticalEdgePred.begin(), 
01559          E = CriticalEdgePred.end(); I != E; I++) {
01560     BasicBlock *OrigPred = *I;
01561     BasicBlock *NewPred = splitCriticalEdges(OrigPred, LoadBB);
01562     PredLoads.erase(OrigPred);
01563     PredLoads[NewPred] = 0;
01564     DEBUG(dbgs() << "Split critical edge " << OrigPred->getName() << "->"
01565                  << LoadBB->getName() << '\n');
01566   }
01567 
01568   // Check if the load can safely be moved to all the unavailable predecessors.
01569   bool CanDoPRE = true;
01570   SmallVector<Instruction*, 8> NewInsts;
01571   for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(),
01572          E = PredLoads.end(); I != E; ++I) {
01573     BasicBlock *UnavailablePred = I->first;
01574 
01575     // Do PHI translation to get its value in the predecessor if necessary.  The
01576     // returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
01577 
01578     // If all preds have a single successor, then we know it is safe to insert
01579     // the load on the pred (?!?), so we can insert code to materialize the
01580     // pointer if it is not available.
01581     PHITransAddr Address(LI->getPointerOperand(), TD);
01582     Value *LoadPtr = 0;
01583     LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
01584                                                 *DT, NewInsts);
01585 
01586     // If we couldn't find or insert a computation of this phi translated value,
01587     // we fail PRE.
01588     if (LoadPtr == 0) {
01589       DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
01590             << *LI->getPointerOperand() << "\n");
01591       CanDoPRE = false;
01592       break;
01593     }
01594 
01595     I->second = LoadPtr;
01596   }
01597 
01598   if (!CanDoPRE) {
01599     while (!NewInsts.empty()) {
01600       Instruction *I = NewInsts.pop_back_val();
01601       if (MD) MD->removeInstruction(I);
01602       I->eraseFromParent();
01603     }
01604     // HINT:Don't revert the edge-splitting as following transformation may 
01605     // also need to split these critial edges.
01606     return !CriticalEdgePred.empty();
01607   }
01608 
01609   // Okay, we can eliminate this load by inserting a reload in the predecessor
01610   // and using PHI construction to get the value in the other predecessors, do
01611   // it.
01612   DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *LI << '\n');
01613   DEBUG(if (!NewInsts.empty())
01614           dbgs() << "INSERTED " << NewInsts.size() << " INSTS: "
01615                  << *NewInsts.back() << '\n');
01616 
01617   // Assign value numbers to the new instructions.
01618   for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) {
01619     // FIXME: We really _ought_ to insert these value numbers into their
01620     // parent's availability map.  However, in doing so, we risk getting into
01621     // ordering issues.  If a block hasn't been processed yet, we would be
01622     // marking a value as AVAIL-IN, which isn't what we intend.
01623     VN.lookup_or_add(NewInsts[i]);
01624   }
01625 
01626   for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(),
01627          E = PredLoads.end(); I != E; ++I) {
01628     BasicBlock *UnavailablePred = I->first;
01629     Value *LoadPtr = I->second;
01630 
01631     Instruction *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false,
01632                                         LI->getAlignment(),
01633                                         UnavailablePred->getTerminator());
01634 
01635     // Transfer the old load's TBAA tag to the new load.
01636     if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa))
01637       NewLoad->setMetadata(LLVMContext::MD_tbaa, Tag);
01638 
01639     // Transfer DebugLoc.
01640     NewLoad->setDebugLoc(LI->getDebugLoc());
01641 
01642     // Add the newly created load.
01643     ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,
01644                                                         NewLoad));
01645     MD->invalidateCachedPointerInfo(LoadPtr);
01646     DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n');
01647   }
01648 
01649   // Perform PHI construction.
01650   Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this);
01651   LI->replaceAllUsesWith(V);
01652   if (isa<PHINode>(V))
01653     V->takeName(LI);
01654   if (V->getType()->getScalarType()->isPointerTy())
01655     MD->invalidateCachedPointerInfo(V);
01656   markInstructionForDeletion(LI);
01657   ++NumPRELoad;
01658   return true;
01659 }
01660 
01661 /// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
01662 /// non-local by performing PHI construction.
01663 bool GVN::processNonLocalLoad(LoadInst *LI) {
01664   // Step 1: Find the non-local dependencies of the load.
01665   LoadDepVect Deps;
01666   AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI);
01667   MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps);
01668 
01669   // If we had to process more than one hundred blocks to find the
01670   // dependencies, this load isn't worth worrying about.  Optimizing
01671   // it will be too expensive.
01672   unsigned NumDeps = Deps.size();
01673   if (NumDeps > 100)
01674     return false;
01675 
01676   // If we had a phi translation failure, we'll have a single entry which is a
01677   // clobber in the current block.  Reject this early.
01678   if (NumDeps == 1 &&
01679       !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) {
01680     DEBUG(
01681       dbgs() << "GVN: non-local load ";
01682       WriteAsOperand(dbgs(), LI);
01683       dbgs() << " has unknown dependencies\n";
01684     );
01685     return false;
01686   }
01687 
01688   // Step 2: Analyze the availability of the load
01689   AvailValInBlkVect ValuesPerBlock;
01690   UnavailBlkVect UnavailableBlocks;
01691   AnalyzeLoadAvailability(LI, Deps, ValuesPerBlock, UnavailableBlocks);
01692 
01693   // If we have no predecessors that produce a known value for this load, exit
01694   // early.
01695   if (ValuesPerBlock.empty())
01696     return false;
01697 
01698   // Step 3: Eliminate fully redundancy.
01699   //
01700   // If all of the instructions we depend on produce a known value for this
01701   // load, then it is fully redundant and we can use PHI insertion to compute
01702   // its value.  Insert PHIs and remove the fully redundant value now.
01703   if (UnavailableBlocks.empty()) {
01704     DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
01705 
01706     // Perform PHI construction.
01707     Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this);
01708     LI->replaceAllUsesWith(V);
01709 
01710     if (isa<PHINode>(V))
01711       V->takeName(LI);
01712     if (V->getType()->getScalarType()->isPointerTy())
01713       MD->invalidateCachedPointerInfo(V);
01714     markInstructionForDeletion(LI);
01715     ++NumGVNLoad;
01716     return true;
01717   }
01718 
01719   // Step 4: Eliminate partial redundancy.
01720   if (!EnablePRE || !EnableLoadPRE)
01721     return false;
01722 
01723   return PerformLoadPRE(LI, ValuesPerBlock, UnavailableBlocks);
01724 }
01725 
01726 
01727 static void patchReplacementInstruction(Instruction *I, Value *Repl) {
01728   // Patch the replacement so that it is not more restrictive than the value
01729   // being replaced.
01730   BinaryOperator *Op = dyn_cast<BinaryOperator>(I);
01731   BinaryOperator *ReplOp = dyn_cast<BinaryOperator>(Repl);
01732   if (Op && ReplOp && isa<OverflowingBinaryOperator>(Op) &&
01733       isa<OverflowingBinaryOperator>(ReplOp)) {
01734     if (ReplOp->hasNoSignedWrap() && !Op->hasNoSignedWrap())
01735       ReplOp->setHasNoSignedWrap(false);
01736     if (ReplOp->hasNoUnsignedWrap() && !Op->hasNoUnsignedWrap())
01737       ReplOp->setHasNoUnsignedWrap(false);
01738   }
01739   if (Instruction *ReplInst = dyn_cast<Instruction>(Repl)) {
01740     SmallVector<std::pair<unsigned, MDNode*>, 4> Metadata;
01741     ReplInst->getAllMetadataOtherThanDebugLoc(Metadata);
01742     for (int i = 0, n = Metadata.size(); i < n; ++i) {
01743       unsigned Kind = Metadata[i].first;
01744       MDNode *IMD = I->getMetadata(Kind);
01745       MDNode *ReplMD = Metadata[i].second;
01746       switch(Kind) {
01747       default:
01748         ReplInst->setMetadata(Kind, NULL); // Remove unknown metadata
01749         break;
01750       case LLVMContext::MD_dbg:
01751         llvm_unreachable("getAllMetadataOtherThanDebugLoc returned a MD_dbg");
01752       case LLVMContext::MD_tbaa:
01753         ReplInst->setMetadata(Kind, MDNode::getMostGenericTBAA(IMD, ReplMD));
01754         break;
01755       case LLVMContext::MD_range:
01756         ReplInst->setMetadata(Kind, MDNode::getMostGenericRange(IMD, ReplMD));
01757         break;
01758       case LLVMContext::MD_prof:
01759         llvm_unreachable("MD_prof in a non terminator instruction");
01760         break;
01761       case LLVMContext::MD_fpmath:
01762         ReplInst->setMetadata(Kind, MDNode::getMostGenericFPMath(IMD, ReplMD));
01763         break;
01764       }
01765     }
01766   }
01767 }
01768 
01769 static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) {
01770   patchReplacementInstruction(I, Repl);
01771   I->replaceAllUsesWith(Repl);
01772 }
01773 
01774 /// processLoad - Attempt to eliminate a load, first by eliminating it
01775 /// locally, and then attempting non-local elimination if that fails.
01776 bool GVN::processLoad(LoadInst *L) {
01777   if (!MD)
01778     return false;
01779 
01780   if (!L->isSimple())
01781     return false;
01782 
01783   if (L->use_empty()) {
01784     markInstructionForDeletion(L);
01785     return true;
01786   }
01787 
01788   // ... to a pointer that has been loaded from before...
01789   MemDepResult Dep = MD->getDependency(L);
01790 
01791   // If we have a clobber and target data is around, see if this is a clobber
01792   // that we can fix up through code synthesis.
01793   if (Dep.isClobber() && TD) {
01794     // Check to see if we have something like this:
01795     //   store i32 123, i32* %P
01796     //   %A = bitcast i32* %P to i8*
01797     //   %B = gep i8* %A, i32 1
01798     //   %C = load i8* %B
01799     //
01800     // We could do that by recognizing if the clobber instructions are obviously
01801     // a common base + constant offset, and if the previous store (or memset)
01802     // completely covers this load.  This sort of thing can happen in bitfield
01803     // access code.
01804     Value *AvailVal = 0;
01805     if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) {
01806       int Offset = AnalyzeLoadFromClobberingStore(L->getType(),
01807                                                   L->getPointerOperand(),
01808                                                   DepSI, *TD);
01809       if (Offset != -1)
01810         AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset,
01811                                         L->getType(), L, *TD);
01812     }
01813 
01814     // Check to see if we have something like this:
01815     //    load i32* P
01816     //    load i8* (P+1)
01817     // if we have this, replace the later with an extraction from the former.
01818     if (LoadInst *DepLI = dyn_cast<LoadInst>(Dep.getInst())) {
01819       // If this is a clobber and L is the first instruction in its block, then
01820       // we have the first instruction in the entry block.
01821       if (DepLI == L)
01822         return false;
01823 
01824       int Offset = AnalyzeLoadFromClobberingLoad(L->getType(),
01825                                                  L->getPointerOperand(),
01826                                                  DepLI, *TD);
01827       if (Offset != -1)
01828         AvailVal = GetLoadValueForLoad(DepLI, Offset, L->getType(), L, *this);
01829     }
01830 
01831     // If the clobbering value is a memset/memcpy/memmove, see if we can forward
01832     // a value on from it.
01833     if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) {
01834       int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(),
01835                                                     L->getPointerOperand(),
01836                                                     DepMI, *TD);
01837       if (Offset != -1)
01838         AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L, *TD);
01839     }
01840 
01841     if (AvailVal) {
01842       DEBUG(dbgs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n'
01843             << *AvailVal << '\n' << *L << "\n\n\n");
01844 
01845       // Replace the load!
01846       L->replaceAllUsesWith(AvailVal);
01847       if (AvailVal->getType()->getScalarType()->isPointerTy())
01848         MD->invalidateCachedPointerInfo(AvailVal);
01849       markInstructionForDeletion(L);
01850       ++NumGVNLoad;
01851       return true;
01852     }
01853   }
01854 
01855   // If the value isn't available, don't do anything!
01856   if (Dep.isClobber()) {
01857     DEBUG(
01858       // fast print dep, using operator<< on instruction is too slow.
01859       dbgs() << "GVN: load ";
01860       WriteAsOperand(dbgs(), L);
01861       Instruction *I = Dep.getInst();
01862       dbgs() << " is clobbered by " << *I << '\n';
01863     );
01864     return false;
01865   }
01866 
01867   // If it is defined in another block, try harder.
01868   if (Dep.isNonLocal())
01869     return processNonLocalLoad(L);
01870 
01871   if (!Dep.isDef()) {
01872     DEBUG(
01873       // fast print dep, using operator<< on instruction is too slow.
01874       dbgs() << "GVN: load ";
01875       WriteAsOperand(dbgs(), L);
01876       dbgs() << " has unknown dependence\n";
01877     );
01878     return false;
01879   }
01880 
01881   Instruction *DepInst = Dep.getInst();
01882   if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
01883     Value *StoredVal = DepSI->getValueOperand();
01884 
01885     // The store and load are to a must-aliased pointer, but they may not
01886     // actually have the same type.  See if we know how to reuse the stored
01887     // value (depending on its type).
01888     if (StoredVal->getType() != L->getType()) {
01889       if (TD) {
01890         StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(),
01891                                                    L, *TD);
01892         if (StoredVal == 0)
01893           return false;
01894 
01895         DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal
01896                      << '\n' << *L << "\n\n\n");
01897       }
01898       else
01899         return false;
01900     }
01901 
01902     // Remove it!
01903     L->replaceAllUsesWith(StoredVal);
01904     if (StoredVal->getType()->getScalarType()->isPointerTy())
01905       MD->invalidateCachedPointerInfo(StoredVal);
01906     markInstructionForDeletion(L);
01907     ++NumGVNLoad;
01908     return true;
01909   }
01910 
01911   if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) {
01912     Value *AvailableVal = DepLI;
01913 
01914     // The loads are of a must-aliased pointer, but they may not actually have
01915     // the same type.  See if we know how to reuse the previously loaded value
01916     // (depending on its type).
01917     if (DepLI->getType() != L->getType()) {
01918       if (TD) {
01919         AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(),
01920                                                       L, *TD);
01921         if (AvailableVal == 0)
01922           return false;
01923 
01924         DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal
01925                      << "\n" << *L << "\n\n\n");
01926       }
01927       else
01928         return false;
01929     }
01930 
01931     // Remove it!
01932     patchAndReplaceAllUsesWith(L, AvailableVal);
01933     if (DepLI->getType()->getScalarType()->isPointerTy())
01934       MD->invalidateCachedPointerInfo(DepLI);
01935     markInstructionForDeletion(L);
01936     ++NumGVNLoad;
01937     return true;
01938   }
01939 
01940   // If this load really doesn't depend on anything, then we must be loading an
01941   // undef value.  This can happen when loading for a fresh allocation with no
01942   // intervening stores, for example.
01943   if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst, TLI)) {
01944     L->replaceAllUsesWith(UndefValue::get(L->getType()));
01945     markInstructionForDeletion(L);
01946     ++NumGVNLoad;
01947     return true;
01948   }
01949 
01950   // If this load occurs either right after a lifetime begin,
01951   // then the loaded value is undefined.
01952   if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(DepInst)) {
01953     if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
01954       L->replaceAllUsesWith(UndefValue::get(L->getType()));
01955       markInstructionForDeletion(L);
01956       ++NumGVNLoad;
01957       return true;
01958     }
01959   }
01960 
01961   return false;
01962 }
01963 
01964 // findLeader - In order to find a leader for a given value number at a
01965 // specific basic block, we first obtain the list of all Values for that number,
01966 // and then scan the list to find one whose block dominates the block in
01967 // question.  This is fast because dominator tree queries consist of only
01968 // a few comparisons of DFS numbers.
01969 Value *GVN::findLeader(const BasicBlock *BB, uint32_t num) {
01970   LeaderTableEntry Vals = LeaderTable[num];
01971   if (!Vals.Val) return 0;
01972 
01973   Value *Val = 0;
01974   if (DT->dominates(Vals.BB, BB)) {
01975     Val = Vals.Val;
01976     if (isa<Constant>(Val)) return Val;
01977   }
01978 
01979   LeaderTableEntry* Next = Vals.Next;
01980   while (Next) {
01981     if (DT->dominates(Next->BB, BB)) {
01982       if (isa<Constant>(Next->Val)) return Next->Val;
01983       if (!Val) Val = Next->Val;
01984     }
01985 
01986     Next = Next->Next;
01987   }
01988 
01989   return Val;
01990 }
01991 
01992 /// replaceAllDominatedUsesWith - Replace all uses of 'From' with 'To' if the
01993 /// use is dominated by the given basic block.  Returns the number of uses that
01994 /// were replaced.
01995 unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To,
01996                                           const BasicBlockEdge &Root) {
01997   unsigned Count = 0;
01998   for (Value::use_iterator UI = From->use_begin(), UE = From->use_end();
01999        UI != UE; ) {
02000     Use &U = (UI++).getUse();
02001 
02002     if (DT->dominates(Root, U)) {
02003       U.set(To);
02004       ++Count;
02005     }
02006   }
02007   return Count;
02008 }
02009 
02010 /// isOnlyReachableViaThisEdge - There is an edge from 'Src' to 'Dst'.  Return
02011 /// true if every path from the entry block to 'Dst' passes via this edge.  In
02012 /// particular 'Dst' must not be reachable via another edge from 'Src'.
02013 static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E,
02014                                        DominatorTree *DT) {
02015   // While in theory it is interesting to consider the case in which Dst has
02016   // more than one predecessor, because Dst might be part of a loop which is
02017   // only reachable from Src, in practice it is pointless since at the time
02018   // GVN runs all such loops have preheaders, which means that Dst will have
02019   // been changed to have only one predecessor, namely Src.
02020   const BasicBlock *Pred = E.getEnd()->getSinglePredecessor();
02021   const BasicBlock *Src = E.getStart();
02022   assert((!Pred || Pred == Src) && "No edge between these basic blocks!");
02023   (void)Src;
02024   return Pred != 0;
02025 }
02026 
02027 /// propagateEquality - The given values are known to be equal in every block
02028 /// dominated by 'Root'.  Exploit this, for example by replacing 'LHS' with
02029 /// 'RHS' everywhere in the scope.  Returns whether a change was made.
02030 bool GVN::propagateEquality(Value *LHS, Value *RHS,
02031                             const BasicBlockEdge &Root) {
02032   SmallVector<std::pair<Value*, Value*>, 4> Worklist;
02033   Worklist.push_back(std::make_pair(LHS, RHS));
02034   bool Changed = false;
02035   // For speed, compute a conservative fast approximation to
02036   // DT->dominates(Root, Root.getEnd());
02037   bool RootDominatesEnd = isOnlyReachableViaThisEdge(Root, DT);
02038 
02039   while (!Worklist.empty()) {
02040     std::pair<Value*, Value*> Item = Worklist.pop_back_val();
02041     LHS = Item.first; RHS = Item.second;
02042 
02043     if (LHS == RHS) continue;
02044     assert(LHS->getType() == RHS->getType() && "Equality but unequal types!");
02045 
02046     // Don't try to propagate equalities between constants.
02047     if (isa<Constant>(LHS) && isa<Constant>(RHS)) continue;
02048 
02049     // Prefer a constant on the right-hand side, or an Argument if no constants.
02050     if (isa<Constant>(LHS) || (isa<Argument>(LHS) && !isa<Constant>(RHS)))
02051       std::swap(LHS, RHS);
02052     assert((isa<Argument>(LHS) || isa<Instruction>(LHS)) && "Unexpected value!");
02053 
02054     // If there is no obvious reason to prefer the left-hand side over the right-
02055     // hand side, ensure the longest lived term is on the right-hand side, so the
02056     // shortest lived term will be replaced by the longest lived.  This tends to
02057     // expose more simplifications.
02058     uint32_t LVN = VN.lookup_or_add(LHS);
02059     if ((isa<Argument>(LHS) && isa<Argument>(RHS)) ||
02060         (isa<Instruction>(LHS) && isa<Instruction>(RHS))) {
02061       // Move the 'oldest' value to the right-hand side, using the value number as
02062       // a proxy for age.
02063       uint32_t RVN = VN.lookup_or_add(RHS);
02064       if (LVN < RVN) {
02065         std::swap(LHS, RHS);
02066         LVN = RVN;
02067       }
02068     }
02069 
02070     // If value numbering later sees that an instruction in the scope is equal
02071     // to 'LHS' then ensure it will be turned into 'RHS'.  In order to preserve
02072     // the invariant that instructions only occur in the leader table for their
02073     // own value number (this is used by removeFromLeaderTable), do not do this
02074     // if RHS is an instruction (if an instruction in the scope is morphed into
02075     // LHS then it will be turned into RHS by the next GVN iteration anyway, so
02076     // using the leader table is about compiling faster, not optimizing better).
02077     // The leader table only tracks basic blocks, not edges. Only add to if we
02078     // have the simple case where the edge dominates the end.
02079     if (RootDominatesEnd && !isa<Instruction>(RHS))
02080       addToLeaderTable(LVN, RHS, Root.getEnd());
02081 
02082     // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope.  As
02083     // LHS always has at least one use that is not dominated by Root, this will
02084     // never do anything if LHS has only one use.
02085     if (!LHS->hasOneUse()) {
02086       unsigned NumReplacements = replaceAllDominatedUsesWith(LHS, RHS, Root);
02087       Changed |= NumReplacements > 0;
02088       NumGVNEqProp += NumReplacements;
02089     }
02090 
02091     // Now try to deduce additional equalities from this one.  For example, if the
02092     // known equality was "(A != B)" == "false" then it follows that A and B are
02093     // equal in the scope.  Only boolean equalities with an explicit true or false
02094     // RHS are currently supported.
02095     if (!RHS->getType()->isIntegerTy(1))
02096       // Not a boolean equality - bail out.
02097       continue;
02098     ConstantInt *CI = dyn_cast<ConstantInt>(RHS);
02099     if (!CI)
02100       // RHS neither 'true' nor 'false' - bail out.
02101       continue;
02102     // Whether RHS equals 'true'.  Otherwise it equals 'false'.
02103     bool isKnownTrue = CI->isAllOnesValue();
02104     bool isKnownFalse = !isKnownTrue;
02105 
02106     // If "A && B" is known true then both A and B are known true.  If "A || B"
02107     // is known false then both A and B are known false.
02108     Value *A, *B;
02109     if ((isKnownTrue && match(LHS, m_And(m_Value(A), m_Value(B)))) ||
02110         (isKnownFalse && match(LHS, m_Or(m_Value(A), m_Value(B))))) {
02111       Worklist.push_back(std::make_pair(A, RHS));
02112       Worklist.push_back(std::make_pair(B, RHS));
02113       continue;
02114     }
02115 
02116     // If we are propagating an equality like "(A == B)" == "true" then also
02117     // propagate the equality A == B.  When propagating a comparison such as
02118     // "(A >= B)" == "true", replace all instances of "A < B" with "false".
02119     if (ICmpInst *Cmp = dyn_cast<ICmpInst>(LHS)) {
02120       Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1);
02121 
02122       // If "A == B" is known true, or "A != B" is known false, then replace
02123       // A with B everywhere in the scope.
02124       if ((isKnownTrue && Cmp->getPredicate() == CmpInst::ICMP_EQ) ||
02125           (isKnownFalse && Cmp->getPredicate() == CmpInst::ICMP_NE))
02126         Worklist.push_back(std::make_pair(Op0, Op1));
02127 
02128       // If "A >= B" is known true, replace "A < B" with false everywhere.
02129       CmpInst::Predicate NotPred = Cmp->getInversePredicate();
02130       Constant *NotVal = ConstantInt::get(Cmp->getType(), isKnownFalse);
02131       // Since we don't have the instruction "A < B" immediately to hand, work out
02132       // the value number that it would have and use that to find an appropriate
02133       // instruction (if any).
02134       uint32_t NextNum = VN.getNextUnusedValueNumber();
02135       uint32_t Num = VN.lookup_or_add_cmp(Cmp->getOpcode(), NotPred, Op0, Op1);
02136       // If the number we were assigned was brand new then there is no point in
02137       // looking for an instruction realizing it: there cannot be one!
02138       if (Num < NextNum) {
02139         Value *NotCmp = findLeader(Root.getEnd(), Num);
02140         if (NotCmp && isa<Instruction>(NotCmp)) {
02141           unsigned NumReplacements =
02142             replaceAllDominatedUsesWith(NotCmp, NotVal, Root);
02143           Changed |= NumReplacements > 0;
02144           NumGVNEqProp += NumReplacements;
02145         }
02146       }
02147       // Ensure that any instruction in scope that gets the "A < B" value number
02148       // is replaced with false.
02149       // The leader table only tracks basic blocks, not edges. Only add to if we
02150       // have the simple case where the edge dominates the end.
02151       if (RootDominatesEnd)
02152         addToLeaderTable(Num, NotVal, Root.getEnd());
02153 
02154       continue;
02155     }
02156   }
02157 
02158   return Changed;
02159 }
02160 
02161 /// processInstruction - When calculating availability, handle an instruction
02162 /// by inserting it into the appropriate sets
02163 bool GVN::processInstruction(Instruction *I) {
02164   // Ignore dbg info intrinsics.
02165   if (isa<DbgInfoIntrinsic>(I))
02166     return false;
02167 
02168   // If the instruction can be easily simplified then do so now in preference
02169   // to value numbering it.  Value numbering often exposes redundancies, for
02170   // example if it determines that %y is equal to %x then the instruction
02171   // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
02172   if (Value *V = SimplifyInstruction(I, TD, TLI, DT)) {
02173     I->replaceAllUsesWith(V);
02174     if (MD && V->getType()->getScalarType()->isPointerTy())
02175       MD->invalidateCachedPointerInfo(V);
02176     markInstructionForDeletion(I);
02177     ++NumGVNSimpl;
02178     return true;
02179   }
02180 
02181   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
02182     if (processLoad(LI))
02183       return true;
02184 
02185     unsigned Num = VN.lookup_or_add(LI);
02186     addToLeaderTable(Num, LI, LI->getParent());
02187     return false;
02188   }
02189 
02190   // For conditional branches, we can perform simple conditional propagation on
02191   // the condition value itself.
02192   if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
02193     if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
02194       return false;
02195 
02196     Value *BranchCond = BI->getCondition();
02197 
02198     BasicBlock *TrueSucc = BI->getSuccessor(0);
02199     BasicBlock *FalseSucc = BI->getSuccessor(1);
02200     // Avoid multiple edges early.
02201     if (TrueSucc == FalseSucc)
02202       return false;
02203 
02204     BasicBlock *Parent = BI->getParent();
02205     bool Changed = false;
02206 
02207     Value *TrueVal = ConstantInt::getTrue(TrueSucc->getContext());
02208     BasicBlockEdge TrueE(Parent, TrueSucc);
02209     Changed |= propagateEquality(BranchCond, TrueVal, TrueE);
02210 
02211     Value *FalseVal = ConstantInt::getFalse(FalseSucc->getContext());
02212     BasicBlockEdge FalseE(Parent, FalseSucc);
02213     Changed |= propagateEquality(BranchCond, FalseVal, FalseE);
02214 
02215     return Changed;
02216   }
02217 
02218   // For switches, propagate the case values into the case destinations.
02219   if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
02220     Value *SwitchCond = SI->getCondition();
02221     BasicBlock *Parent = SI->getParent();
02222     bool Changed = false;
02223 
02224     // Remember how many outgoing edges there are to every successor.
02225     SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges;
02226     for (unsigned i = 0, n = SI->getNumSuccessors(); i != n; ++i)
02227       ++SwitchEdges[SI->getSuccessor(i)];
02228 
02229     for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
02230          i != e; ++i) {
02231       BasicBlock *Dst = i.getCaseSuccessor();
02232       // If there is only a single edge, propagate the case value into it.
02233       if (SwitchEdges.lookup(Dst) == 1) {
02234         BasicBlockEdge E(Parent, Dst);
02235         Changed |= propagateEquality(SwitchCond, i.getCaseValue(), E);
02236       }
02237     }
02238     return Changed;
02239   }
02240 
02241   // Instructions with void type don't return a value, so there's
02242   // no point in trying to find redundancies in them.
02243   if (I->getType()->isVoidTy()) return false;
02244 
02245   uint32_t NextNum = VN.getNextUnusedValueNumber();
02246   unsigned Num = VN.lookup_or_add(I);
02247 
02248   // Allocations are always uniquely numbered, so we can save time and memory
02249   // by fast failing them.
02250   if (isa<AllocaInst>(I) || isa<TerminatorInst>(I) || isa<PHINode>(I)) {
02251     addToLeaderTable(Num, I, I->getParent());
02252     return false;
02253   }
02254 
02255   // If the number we were assigned was a brand new VN, then we don't
02256   // need to do a lookup to see if the number already exists
02257   // somewhere in the domtree: it can't!
02258   if (Num >= NextNum) {
02259     addToLeaderTable(Num, I, I->getParent());
02260     return false;
02261   }
02262 
02263   // Perform fast-path value-number based elimination of values inherited from
02264   // dominators.
02265   Value *repl = findLeader(I->getParent(), Num);
02266   if (repl == 0) {
02267     // Failure, just remember this instance for future use.
02268     addToLeaderTable(Num, I, I->getParent());
02269     return false;
02270   }
02271 
02272   // Remove it!
02273   patchAndReplaceAllUsesWith(I, repl);
02274   if (MD && repl->getType()->getScalarType()->isPointerTy())
02275     MD->invalidateCachedPointerInfo(repl);
02276   markInstructionForDeletion(I);
02277   return true;
02278 }
02279 
02280 /// runOnFunction - This is the main transformation entry point for a function.
02281 bool GVN::runOnFunction(Function& F) {
02282   if (!NoLoads)
02283     MD = &getAnalysis<MemoryDependenceAnalysis>();
02284   DT = &getAnalysis<DominatorTree>();
02285   TD = getAnalysisIfAvailable<DataLayout>();
02286   TLI = &getAnalysis<TargetLibraryInfo>();
02287   VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
02288   VN.setMemDep(MD);
02289   VN.setDomTree(DT);
02290 
02291   bool Changed = false;
02292   bool ShouldContinue = true;
02293 
02294   // Merge unconditional branches, allowing PRE to catch more
02295   // optimization opportunities.
02296   for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
02297     BasicBlock *BB = FI++;
02298 
02299     bool removedBlock = MergeBlockIntoPredecessor(BB, this);
02300     if (removedBlock) ++NumGVNBlocks;
02301 
02302     Changed |= removedBlock;
02303   }
02304 
02305   unsigned Iteration = 0;
02306   while (ShouldContinue) {
02307     DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n");
02308     ShouldContinue = iterateOnFunction(F);
02309     Changed |= ShouldContinue;
02310     ++Iteration;
02311   }
02312 
02313   if (EnablePRE) {
02314     bool PREChanged = true;
02315     while (PREChanged) {
02316       PREChanged = performPRE(F);
02317       Changed |= PREChanged;
02318     }
02319   }
02320 
02321   // FIXME: Should perform GVN again after PRE does something.  PRE can move
02322   // computations into blocks where they become fully redundant.  Note that
02323   // we can't do this until PRE's critical edge splitting updates memdep.
02324   // Actually, when this happens, we should just fully integrate PRE into GVN.
02325 
02326   cleanupGlobalSets();
02327 
02328   return Changed;
02329 }
02330 
02331 
02332 bool GVN::processBlock(BasicBlock *BB) {
02333   // FIXME: Kill off InstrsToErase by doing erasing eagerly in a helper function
02334   // (and incrementing BI before processing an instruction).
02335   assert(InstrsToErase.empty() &&
02336          "We expect InstrsToErase to be empty across iterations");
02337   bool ChangedFunction = false;
02338 
02339   for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
02340        BI != BE;) {
02341     ChangedFunction |= processInstruction(BI);
02342     if (InstrsToErase.empty()) {
02343       ++BI;
02344       continue;
02345     }
02346 
02347     // If we need some instructions deleted, do it now.
02348     NumGVNInstr += InstrsToErase.size();
02349 
02350     // Avoid iterator invalidation.
02351     bool AtStart = BI == BB->begin();
02352     if (!AtStart)
02353       --BI;
02354 
02355     for (SmallVector<Instruction*, 4>::iterator I = InstrsToErase.begin(),
02356          E = InstrsToErase.end(); I != E; ++I) {
02357       DEBUG(dbgs() << "GVN removed: " << **I << '\n');
02358       if (MD) MD->removeInstruction(*I);
02359       DEBUG(verifyRemoved(*I));
02360       (*I)->eraseFromParent();
02361     }
02362     InstrsToErase.clear();
02363 
02364     if (AtStart)
02365       BI = BB->begin();
02366     else
02367       ++BI;
02368   }
02369 
02370   return ChangedFunction;
02371 }
02372 
02373 /// performPRE - Perform a purely local form of PRE that looks for diamond
02374 /// control flow patterns and attempts to perform simple PRE at the join point.
02375 bool GVN::performPRE(Function &F) {
02376   bool Changed = false;
02377   SmallVector<std::pair<Value*, BasicBlock*>, 8> predMap;
02378   for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
02379        DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) {
02380     BasicBlock *CurrentBlock = *DI;
02381 
02382     // Nothing to PRE in the entry block.
02383     if (CurrentBlock == &F.getEntryBlock()) continue;
02384 
02385     // Don't perform PRE on a landing pad.
02386     if (CurrentBlock->isLandingPad()) continue;
02387 
02388     for (BasicBlock::iterator BI = CurrentBlock->begin(),
02389          BE = CurrentBlock->end(); BI != BE; ) {
02390       Instruction *CurInst = BI++;
02391 
02392       if (isa<AllocaInst>(CurInst) ||
02393           isa<TerminatorInst>(CurInst) || isa<PHINode>(CurInst) ||
02394           CurInst->getType()->isVoidTy() ||
02395           CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
02396           isa<DbgInfoIntrinsic>(CurInst))
02397         continue;
02398 
02399       // Don't do PRE on compares. The PHI would prevent CodeGenPrepare from
02400       // sinking the compare again, and it would force the code generator to
02401       // move the i1 from processor flags or predicate registers into a general
02402       // purpose register.
02403       if (isa<CmpInst>(CurInst))
02404         continue;
02405 
02406       // We don't currently value number ANY inline asm calls.
02407       if (CallInst *CallI = dyn_cast<CallInst>(CurInst))
02408         if (CallI->isInlineAsm())
02409           continue;
02410 
02411       uint32_t ValNo = VN.lookup(CurInst);
02412 
02413       // Look for the predecessors for PRE opportunities.  We're
02414       // only trying to solve the basic diamond case, where
02415       // a value is computed in the successor and one predecessor,
02416       // but not the other.  We also explicitly disallow cases
02417       // where the successor is its own predecessor, because they're
02418       // more complicated to get right.
02419       unsigned NumWith = 0;
02420       unsigned NumWithout = 0;
02421       BasicBlock *PREPred = 0;
02422       predMap.clear();
02423 
02424       for (pred_iterator PI = pred_begin(CurrentBlock),
02425            PE = pred_end(CurrentBlock); PI != PE; ++PI) {
02426         BasicBlock *P = *PI;
02427         // We're not interested in PRE where the block is its
02428         // own predecessor, or in blocks with predecessors
02429         // that are not reachable.
02430         if (P == CurrentBlock) {
02431           NumWithout = 2;
02432           break;
02433         } else if (!DT->isReachableFromEntry(P))  {
02434           NumWithout = 2;
02435           break;
02436         }
02437 
02438         Value* predV = findLeader(P, ValNo);
02439         if (predV == 0) {
02440           predMap.push_back(std::make_pair(static_cast<Value *>(0), P));
02441           PREPred = P;
02442           ++NumWithout;
02443         } else if (predV == CurInst) {
02444           /* CurInst dominates this predecessor. */
02445           NumWithout = 2;
02446           break;
02447         } else {
02448           predMap.push_back(std::make_pair(predV, P));
02449           ++NumWith;
02450         }
02451       }
02452 
02453       // Don't do PRE when it might increase code size, i.e. when
02454       // we would need to insert instructions in more than one pred.
02455       if (NumWithout != 1 || NumWith == 0)
02456         continue;
02457 
02458       // Don't do PRE across indirect branch.
02459       if (isa<IndirectBrInst>(PREPred->getTerminator()))
02460         continue;
02461 
02462       // We can't do PRE safely on a critical edge, so instead we schedule
02463       // the edge to be split and perform the PRE the next time we iterate
02464       // on the function.
02465       unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock);
02466       if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) {
02467         toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum));
02468         continue;
02469       }
02470 
02471       // Instantiate the expression in the predecessor that lacked it.
02472       // Because we are going top-down through the block, all value numbers
02473       // will be available in the predecessor by the time we need them.  Any
02474       // that weren't originally present will have been instantiated earlier
02475       // in this loop.
02476       Instruction *PREInstr = CurInst->clone();
02477       bool success = true;
02478       for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) {
02479         Value *Op = PREInstr->getOperand(i);
02480         if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))
02481           continue;
02482 
02483         if (Value *V = findLeader(PREPred, VN.lookup(Op))) {
02484           PREInstr->setOperand(i, V);
02485         } else {
02486           success = false;
02487           break;
02488         }
02489       }
02490 
02491       // Fail out if we encounter an operand that is not available in
02492       // the PRE predecessor.  This is typically because of loads which
02493       // are not value numbered precisely.
02494       if (!success) {
02495         DEBUG(verifyRemoved(PREInstr));
02496         delete PREInstr;
02497         continue;
02498       }
02499 
02500       PREInstr->insertBefore(PREPred->getTerminator());
02501       PREInstr->setName(CurInst->getName() + ".pre");
02502       PREInstr->setDebugLoc(CurInst->getDebugLoc());
02503       VN.add(PREInstr, ValNo);
02504       ++NumGVNPRE;
02505 
02506       // Update the availability map to include the new instruction.
02507       addToLeaderTable(ValNo, PREInstr, PREPred);
02508 
02509       // Create a PHI to make the value available in this block.
02510       PHINode* Phi = PHINode::Create(CurInst->getType(), predMap.size(),
02511                                      CurInst->getName() + ".pre-phi",
02512                                      CurrentBlock->begin());
02513       for (unsigned i = 0, e = predMap.size(); i != e; ++i) {
02514         if (Value *V = predMap[i].first)
02515           Phi->addIncoming(V, predMap[i].second);
02516         else
02517           Phi->addIncoming(PREInstr, PREPred);
02518       }
02519 
02520       VN.add(Phi, ValNo);
02521       addToLeaderTable(ValNo, Phi, CurrentBlock);
02522       Phi->setDebugLoc(CurInst->getDebugLoc());
02523       CurInst->replaceAllUsesWith(Phi);
02524       if (Phi->getType()->getScalarType()->isPointerTy()) {
02525         // Because we have added a PHI-use of the pointer value, it has now
02526         // "escaped" from alias analysis' perspective.  We need to inform
02527         // AA of this.
02528         for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee;
02529              ++ii) {
02530           unsigned jj = PHINode::getOperandNumForIncomingValue(ii);
02531           VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(jj));
02532         }
02533 
02534         if (MD)
02535           MD->invalidateCachedPointerInfo(Phi);
02536       }
02537       VN.erase(CurInst);
02538       removeFromLeaderTable(ValNo, CurInst, CurrentBlock);
02539 
02540       DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
02541       if (MD) MD->removeInstruction(CurInst);
02542       DEBUG(verifyRemoved(CurInst));
02543       CurInst->eraseFromParent();
02544       Changed = true;
02545     }
02546   }
02547 
02548   if (splitCriticalEdges())
02549     Changed = true;
02550 
02551   return Changed;
02552 }
02553 
02554 /// Split the critical edge connecting the given two blocks, and return
02555 /// the block inserted to the critical edge.
02556 BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
02557   BasicBlock *BB = SplitCriticalEdge(Pred, Succ, this);
02558   if (MD)
02559     MD->invalidateCachedPredecessors();
02560   return BB;
02561 }
02562 
02563 /// splitCriticalEdges - Split critical edges found during the previous
02564 /// iteration that may enable further optimization.
02565 bool GVN::splitCriticalEdges() {
02566   if (toSplit.empty())
02567     return false;
02568   do {
02569     std::pair<TerminatorInst*, unsigned> Edge = toSplit.pop_back_val();
02570     SplitCriticalEdge(Edge.first, Edge.second, this);
02571   } while (!toSplit.empty());
02572   if (MD) MD->invalidateCachedPredecessors();
02573   return true;
02574 }
02575 
02576 /// iterateOnFunction - Executes one iteration of GVN
02577 bool GVN::iterateOnFunction(Function &F) {
02578   cleanupGlobalSets();
02579 
02580   // Top-down walk of the dominator tree
02581   bool Changed = false;
02582 #if 0
02583   // Needed for value numbering with phi construction to work.
02584   ReversePostOrderTraversal<Function*> RPOT(&F);
02585   for (ReversePostOrderTraversal<Function*>::rpo_iterator RI = RPOT.begin(),
02586        RE = RPOT.end(); RI != RE; ++RI)
02587     Changed |= processBlock(*RI);
02588 #else
02589   // Save the blocks this function have before transformation begins. GVN may
02590   // split critical edge, and hence may invalidate the RPO/DT iterator.
02591   //
02592   std::vector<BasicBlock *> BBVect;
02593   BBVect.reserve(256);
02594   for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),
02595        DE = df_end(DT->getRootNode()); DI != DE; ++DI)
02596     BBVect.push_back(DI->getBlock());
02597 
02598   for (std::vector<BasicBlock *>::iterator I = BBVect.begin(), E = BBVect.end();
02599        I != E; I++)
02600     Changed |= processBlock(*I);
02601 #endif
02602 
02603   return Changed;
02604 }
02605 
02606 void GVN::cleanupGlobalSets() {
02607   VN.clear();
02608   LeaderTable.clear();
02609   TableAllocator.Reset();
02610 }
02611 
02612 /// verifyRemoved - Verify that the specified instruction does not occur in our
02613 /// internal data structures.
02614 void GVN::verifyRemoved(const Instruction *Inst) const {
02615   VN.verifyRemoved(Inst);
02616 
02617   // Walk through the value number scope to make sure the instruction isn't
02618   // ferreted away in it.
02619   for (DenseMap<uint32_t, LeaderTableEntry>::const_iterator
02620        I = LeaderTable.begin(), E = LeaderTable.end(); I != E; ++I) {
02621     const LeaderTableEntry *Node = &I->second;
02622     assert(Node->Val != Inst && "Inst still in value numbering scope!");
02623 
02624     while (Node->Next) {
02625       Node = Node->Next;
02626       assert(Node->Val != Inst && "Inst still in value numbering scope!");
02627     }
02628   }
02629 }