LLVM API Documentation

InstCombineLoadStoreAlloca.cpp
Go to the documentation of this file.
00001 //===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file implements the visit functions for load, store and alloca.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 
00014 #include "InstCombine.h"
00015 #include "llvm/ADT/Statistic.h"
00016 #include "llvm/Analysis/Loads.h"
00017 #include "llvm/IR/DataLayout.h"
00018 #include "llvm/IR/IntrinsicInst.h"
00019 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
00020 #include "llvm/Transforms/Utils/Local.h"
00021 using namespace llvm;
00022 
00023 STATISTIC(NumDeadStore,    "Number of dead stores eliminated");
00024 STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
00025 
00026 /// pointsToConstantGlobal - Return true if V (possibly indirectly) points to
00027 /// some part of a constant global variable.  This intentionally only accepts
00028 /// constant expressions because we can't rewrite arbitrary instructions.
00029 static bool pointsToConstantGlobal(Value *V) {
00030   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
00031     return GV->isConstant();
00032   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
00033     if (CE->getOpcode() == Instruction::BitCast ||
00034         CE->getOpcode() == Instruction::GetElementPtr)
00035       return pointsToConstantGlobal(CE->getOperand(0));
00036   return false;
00037 }
00038 
00039 /// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
00040 /// pointer to an alloca.  Ignore any reads of the pointer, return false if we
00041 /// see any stores or other unknown uses.  If we see pointer arithmetic, keep
00042 /// track of whether it moves the pointer (with IsOffset) but otherwise traverse
00043 /// the uses.  If we see a memcpy/memmove that targets an unoffseted pointer to
00044 /// the alloca, and if the source pointer is a pointer to a constant global, we
00045 /// can optimize this.
00046 static bool
00047 isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
00048                                SmallVectorImpl<Instruction *> &ToDelete,
00049                                bool IsOffset = false) {
00050   // We track lifetime intrinsics as we encounter them.  If we decide to go
00051   // ahead and replace the value with the global, this lets the caller quickly
00052   // eliminate the markers.
00053 
00054   for (Use &U : V->uses()) {
00055     Instruction *I = cast<Instruction>(U.getUser());
00056 
00057     if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
00058       // Ignore non-volatile loads, they are always ok.
00059       if (!LI->isSimple()) return false;
00060       continue;
00061     }
00062 
00063     if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) {
00064       // If uses of the bitcast are ok, we are ok.
00065       if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, ToDelete, IsOffset))
00066         return false;
00067       continue;
00068     }
00069     if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
00070       // If the GEP has all zero indices, it doesn't offset the pointer.  If it
00071       // doesn't, it does.
00072       if (!isOnlyCopiedFromConstantGlobal(
00073               GEP, TheCopy, ToDelete, IsOffset || !GEP->hasAllZeroIndices()))
00074         return false;
00075       continue;
00076     }
00077 
00078     if (CallSite CS = I) {
00079       // If this is the function being called then we treat it like a load and
00080       // ignore it.
00081       if (CS.isCallee(&U))
00082         continue;
00083 
00084       // Inalloca arguments are clobbered by the call.
00085       unsigned ArgNo = CS.getArgumentNo(&U);
00086       if (CS.isInAllocaArgument(ArgNo))
00087         return false;
00088 
00089       // If this is a readonly/readnone call site, then we know it is just a
00090       // load (but one that potentially returns the value itself), so we can
00091       // ignore it if we know that the value isn't captured.
00092       if (CS.onlyReadsMemory() &&
00093           (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo)))
00094         continue;
00095 
00096       // If this is being passed as a byval argument, the caller is making a
00097       // copy, so it is only a read of the alloca.
00098       if (CS.isByValArgument(ArgNo))
00099         continue;
00100     }
00101 
00102     // Lifetime intrinsics can be handled by the caller.
00103     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
00104       if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
00105           II->getIntrinsicID() == Intrinsic::lifetime_end) {
00106         assert(II->use_empty() && "Lifetime markers have no result to use!");
00107         ToDelete.push_back(II);
00108         continue;
00109       }
00110     }
00111 
00112     // If this is isn't our memcpy/memmove, reject it as something we can't
00113     // handle.
00114     MemTransferInst *MI = dyn_cast<MemTransferInst>(I);
00115     if (MI == 0)
00116       return false;
00117 
00118     // If the transfer is using the alloca as a source of the transfer, then
00119     // ignore it since it is a load (unless the transfer is volatile).
00120     if (U.getOperandNo() == 1) {
00121       if (MI->isVolatile()) return false;
00122       continue;
00123     }
00124 
00125     // If we already have seen a copy, reject the second one.
00126     if (TheCopy) return false;
00127 
00128     // If the pointer has been offset from the start of the alloca, we can't
00129     // safely handle this.
00130     if (IsOffset) return false;
00131 
00132     // If the memintrinsic isn't using the alloca as the dest, reject it.
00133     if (U.getOperandNo() != 0) return false;
00134 
00135     // If the source of the memcpy/move is not a constant global, reject it.
00136     if (!pointsToConstantGlobal(MI->getSource()))
00137       return false;
00138 
00139     // Otherwise, the transform is safe.  Remember the copy instruction.
00140     TheCopy = MI;
00141   }
00142   return true;
00143 }
00144 
00145 /// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
00146 /// modified by a copy from a constant global.  If we can prove this, we can
00147 /// replace any uses of the alloca with uses of the global directly.
00148 static MemTransferInst *
00149 isOnlyCopiedFromConstantGlobal(AllocaInst *AI,
00150                                SmallVectorImpl<Instruction *> &ToDelete) {
00151   MemTransferInst *TheCopy = 0;
00152   if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete))
00153     return TheCopy;
00154   return 0;
00155 }
00156 
00157 Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
00158   // Ensure that the alloca array size argument has type intptr_t, so that
00159   // any casting is exposed early.
00160   if (DL) {
00161     Type *IntPtrTy = DL->getIntPtrType(AI.getType());
00162     if (AI.getArraySize()->getType() != IntPtrTy) {
00163       Value *V = Builder->CreateIntCast(AI.getArraySize(),
00164                                         IntPtrTy, false);
00165       AI.setOperand(0, V);
00166       return &AI;
00167     }
00168   }
00169 
00170   // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
00171   if (AI.isArrayAllocation()) {  // Check C != 1
00172     if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
00173       Type *NewTy =
00174         ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
00175       AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName());
00176       New->setAlignment(AI.getAlignment());
00177 
00178       // Scan to the end of the allocation instructions, to skip over a block of
00179       // allocas if possible...also skip interleaved debug info
00180       //
00181       BasicBlock::iterator It = New;
00182       while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It;
00183 
00184       // Now that I is pointing to the first non-allocation-inst in the block,
00185       // insert our getelementptr instruction...
00186       //
00187       Type *IdxTy = DL
00188                   ? DL->getIntPtrType(AI.getType())
00189                   : Type::getInt64Ty(AI.getContext());
00190       Value *NullIdx = Constant::getNullValue(IdxTy);
00191       Value *Idx[2] = { NullIdx, NullIdx };
00192       Instruction *GEP =
00193         GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub");
00194       InsertNewInstBefore(GEP, *It);
00195 
00196       // Now make everything use the getelementptr instead of the original
00197       // allocation.
00198       return ReplaceInstUsesWith(AI, GEP);
00199     } else if (isa<UndefValue>(AI.getArraySize())) {
00200       return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
00201     }
00202   }
00203 
00204   if (DL && AI.getAllocatedType()->isSized()) {
00205     // If the alignment is 0 (unspecified), assign it the preferred alignment.
00206     if (AI.getAlignment() == 0)
00207       AI.setAlignment(DL->getPrefTypeAlignment(AI.getAllocatedType()));
00208 
00209     // Move all alloca's of zero byte objects to the entry block and merge them
00210     // together.  Note that we only do this for alloca's, because malloc should
00211     // allocate and return a unique pointer, even for a zero byte allocation.
00212     if (DL->getTypeAllocSize(AI.getAllocatedType()) == 0) {
00213       // For a zero sized alloca there is no point in doing an array allocation.
00214       // This is helpful if the array size is a complicated expression not used
00215       // elsewhere.
00216       if (AI.isArrayAllocation()) {
00217         AI.setOperand(0, ConstantInt::get(AI.getArraySize()->getType(), 1));
00218         return &AI;
00219       }
00220 
00221       // Get the first instruction in the entry block.
00222       BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock();
00223       Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg();
00224       if (FirstInst != &AI) {
00225         // If the entry block doesn't start with a zero-size alloca then move
00226         // this one to the start of the entry block.  There is no problem with
00227         // dominance as the array size was forced to a constant earlier already.
00228         AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst);
00229         if (!EntryAI || !EntryAI->getAllocatedType()->isSized() ||
00230             DL->getTypeAllocSize(EntryAI->getAllocatedType()) != 0) {
00231           AI.moveBefore(FirstInst);
00232           return &AI;
00233         }
00234 
00235         // If the alignment of the entry block alloca is 0 (unspecified),
00236         // assign it the preferred alignment.
00237         if (EntryAI->getAlignment() == 0)
00238           EntryAI->setAlignment(
00239             DL->getPrefTypeAlignment(EntryAI->getAllocatedType()));
00240         // Replace this zero-sized alloca with the one at the start of the entry
00241         // block after ensuring that the address will be aligned enough for both
00242         // types.
00243         unsigned MaxAlign = std::max(EntryAI->getAlignment(),
00244                                      AI.getAlignment());
00245         EntryAI->setAlignment(MaxAlign);
00246         if (AI.getType() != EntryAI->getType())
00247           return new BitCastInst(EntryAI, AI.getType());
00248         return ReplaceInstUsesWith(AI, EntryAI);
00249       }
00250     }
00251   }
00252 
00253   if (AI.getAlignment()) {
00254     // Check to see if this allocation is only modified by a memcpy/memmove from
00255     // a constant global whose alignment is equal to or exceeds that of the
00256     // allocation.  If this is the case, we can change all users to use
00257     // the constant global instead.  This is commonly produced by the CFE by
00258     // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
00259     // is only subsequently read.
00260     SmallVector<Instruction *, 4> ToDelete;
00261     if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
00262       unsigned SourceAlign = getOrEnforceKnownAlignment(Copy->getSource(),
00263                                                         AI.getAlignment(), DL);
00264       if (AI.getAlignment() <= SourceAlign) {
00265         DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
00266         DEBUG(dbgs() << "  memcpy = " << *Copy << '\n');
00267         for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
00268           EraseInstFromFunction(*ToDelete[i]);
00269         Constant *TheSrc = cast<Constant>(Copy->getSource());
00270         Constant *Cast
00271           = ConstantExpr::getPointerBitCastOrAddrSpaceCast(TheSrc, AI.getType());
00272         Instruction *NewI = ReplaceInstUsesWith(AI, Cast);
00273         EraseInstFromFunction(*Copy);
00274         ++NumGlobalCopies;
00275         return NewI;
00276       }
00277     }
00278   }
00279 
00280   // At last, use the generic allocation site handler to aggressively remove
00281   // unused allocas.
00282   return visitAllocSite(AI);
00283 }
00284 
00285 
00286 /// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible.
00287 static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
00288                                         const DataLayout *DL) {
00289   User *CI = cast<User>(LI.getOperand(0));
00290   Value *CastOp = CI->getOperand(0);
00291 
00292   PointerType *DestTy = cast<PointerType>(CI->getType());
00293   Type *DestPTy = DestTy->getElementType();
00294   if (PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) {
00295 
00296     // If the address spaces don't match, don't eliminate the cast.
00297     if (DestTy->getAddressSpace() != SrcTy->getAddressSpace())
00298       return 0;
00299 
00300     Type *SrcPTy = SrcTy->getElementType();
00301 
00302     if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() ||
00303          DestPTy->isVectorTy()) {
00304       // If the source is an array, the code below will not succeed.  Check to
00305       // see if a trivial 'gep P, 0, 0' will help matters.  Only do this for
00306       // constants.
00307       if (ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy))
00308         if (Constant *CSrc = dyn_cast<Constant>(CastOp))
00309           if (ASrcTy->getNumElements() != 0) {
00310             Type *IdxTy = DL
00311                         ? DL->getIntPtrType(SrcTy)
00312                         : Type::getInt64Ty(SrcTy->getContext());
00313             Value *Idx = Constant::getNullValue(IdxTy);
00314             Value *Idxs[2] = { Idx, Idx };
00315             CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs);
00316             SrcTy = cast<PointerType>(CastOp->getType());
00317             SrcPTy = SrcTy->getElementType();
00318           }
00319 
00320       if (IC.getDataLayout() &&
00321           (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() ||
00322             SrcPTy->isVectorTy()) &&
00323           // Do not allow turning this into a load of an integer, which is then
00324           // casted to a pointer, this pessimizes pointer analysis a lot.
00325           (SrcPTy->isPtrOrPtrVectorTy() ==
00326            LI.getType()->isPtrOrPtrVectorTy()) &&
00327           IC.getDataLayout()->getTypeSizeInBits(SrcPTy) ==
00328                IC.getDataLayout()->getTypeSizeInBits(DestPTy)) {
00329 
00330         // Okay, we are casting from one integer or pointer type to another of
00331         // the same size.  Instead of casting the pointer before the load, cast
00332         // the result of the loaded value.
00333         LoadInst *NewLoad =
00334           IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName());
00335         NewLoad->setAlignment(LI.getAlignment());
00336         NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope());
00337         // Now cast the result of the load.
00338         PointerType *OldTy = dyn_cast<PointerType>(NewLoad->getType());
00339         PointerType *NewTy = dyn_cast<PointerType>(LI.getType());
00340         if (OldTy && NewTy &&
00341             OldTy->getAddressSpace() != NewTy->getAddressSpace()) {
00342           return new AddrSpaceCastInst(NewLoad, LI.getType());
00343         }
00344 
00345         return new BitCastInst(NewLoad, LI.getType());
00346       }
00347     }
00348   }
00349   return 0;
00350 }
00351 
00352 Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
00353   Value *Op = LI.getOperand(0);
00354 
00355   // Attempt to improve the alignment.
00356   if (DL) {
00357     unsigned KnownAlign =
00358       getOrEnforceKnownAlignment(Op, DL->getPrefTypeAlignment(LI.getType()),DL);
00359     unsigned LoadAlign = LI.getAlignment();
00360     unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign :
00361       DL->getABITypeAlignment(LI.getType());
00362 
00363     if (KnownAlign > EffectiveLoadAlign)
00364       LI.setAlignment(KnownAlign);
00365     else if (LoadAlign == 0)
00366       LI.setAlignment(EffectiveLoadAlign);
00367   }
00368 
00369   // load (cast X) --> cast (load X) iff safe.
00370   if (isa<CastInst>(Op))
00371     if (Instruction *Res = InstCombineLoadCast(*this, LI, DL))
00372       return Res;
00373 
00374   // None of the following transforms are legal for volatile/atomic loads.
00375   // FIXME: Some of it is okay for atomic loads; needs refactoring.
00376   if (!LI.isSimple()) return 0;
00377 
00378   // Do really simple store-to-load forwarding and load CSE, to catch cases
00379   // where there are several consecutive memory accesses to the same location,
00380   // separated by a few arithmetic operations.
00381   BasicBlock::iterator BBI = &LI;
00382   if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6))
00383     return ReplaceInstUsesWith(LI, AvailableVal);
00384 
00385   // load(gep null, ...) -> unreachable
00386   if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
00387     const Value *GEPI0 = GEPI->getOperand(0);
00388     // TODO: Consider a target hook for valid address spaces for this xform.
00389     if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){
00390       // Insert a new store to null instruction before the load to indicate
00391       // that this code is not reachable.  We do this instead of inserting
00392       // an unreachable instruction directly because we cannot modify the
00393       // CFG.
00394       new StoreInst(UndefValue::get(LI.getType()),
00395                     Constant::getNullValue(Op->getType()), &LI);
00396       return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
00397     }
00398   }
00399 
00400   // load null/undef -> unreachable
00401   // TODO: Consider a target hook for valid address spaces for this xform.
00402   if (isa<UndefValue>(Op) ||
00403       (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) {
00404     // Insert a new store to null instruction before the load to indicate that
00405     // this code is not reachable.  We do this instead of inserting an
00406     // unreachable instruction directly because we cannot modify the CFG.
00407     new StoreInst(UndefValue::get(LI.getType()),
00408                   Constant::getNullValue(Op->getType()), &LI);
00409     return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
00410   }
00411 
00412   // Instcombine load (constantexpr_cast global) -> cast (load global)
00413   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op))
00414     if (CE->isCast())
00415       if (Instruction *Res = InstCombineLoadCast(*this, LI, DL))
00416         return Res;
00417 
00418   if (Op->hasOneUse()) {
00419     // Change select and PHI nodes to select values instead of addresses: this
00420     // helps alias analysis out a lot, allows many others simplifications, and
00421     // exposes redundancy in the code.
00422     //
00423     // Note that we cannot do the transformation unless we know that the
00424     // introduced loads cannot trap!  Something like this is valid as long as
00425     // the condition is always false: load (select bool %C, int* null, int* %G),
00426     // but it would not be valid if we transformed it to load from null
00427     // unconditionally.
00428     //
00429     if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
00430       // load (select (Cond, &V1, &V2))  --> select(Cond, load &V1, load &V2).
00431       unsigned Align = LI.getAlignment();
00432       if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align, DL) &&
00433           isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align, DL)) {
00434         LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1),
00435                                            SI->getOperand(1)->getName()+".val");
00436         LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2),
00437                                            SI->getOperand(2)->getName()+".val");
00438         V1->setAlignment(Align);
00439         V2->setAlignment(Align);
00440         return SelectInst::Create(SI->getCondition(), V1, V2);
00441       }
00442 
00443       // load (select (cond, null, P)) -> load P
00444       if (Constant *C = dyn_cast<Constant>(SI->getOperand(1)))
00445         if (C->isNullValue()) {
00446           LI.setOperand(0, SI->getOperand(2));
00447           return &LI;
00448         }
00449 
00450       // load (select (cond, P, null)) -> load P
00451       if (Constant *C = dyn_cast<Constant>(SI->getOperand(2)))
00452         if (C->isNullValue()) {
00453           LI.setOperand(0, SI->getOperand(1));
00454           return &LI;
00455         }
00456     }
00457   }
00458   return 0;
00459 }
00460 
00461 /// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P
00462 /// when possible.  This makes it generally easy to do alias analysis and/or
00463 /// SROA/mem2reg of the memory object.
00464 static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
00465   User *CI = cast<User>(SI.getOperand(1));
00466   Value *CastOp = CI->getOperand(0);
00467 
00468   Type *DestPTy = cast<PointerType>(CI->getType())->getElementType();
00469   PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType());
00470   if (SrcTy == 0) return 0;
00471 
00472   Type *SrcPTy = SrcTy->getElementType();
00473 
00474   if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy())
00475     return 0;
00476 
00477   /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep"
00478   /// to its first element.  This allows us to handle things like:
00479   ///   store i32 xxx, (bitcast {foo*, float}* %P to i32*)
00480   /// on 32-bit hosts.
00481   SmallVector<Value*, 4> NewGEPIndices;
00482 
00483   // If the source is an array, the code below will not succeed.  Check to
00484   // see if a trivial 'gep P, 0, 0' will help matters.  Only do this for
00485   // constants.
00486   if (SrcPTy->isArrayTy() || SrcPTy->isStructTy()) {
00487     // Index through pointer.
00488     Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext()));
00489     NewGEPIndices.push_back(Zero);
00490 
00491     while (1) {
00492       if (StructType *STy = dyn_cast<StructType>(SrcPTy)) {
00493         if (!STy->getNumElements()) /* Struct can be empty {} */
00494           break;
00495         NewGEPIndices.push_back(Zero);
00496         SrcPTy = STy->getElementType(0);
00497       } else if (ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) {
00498         NewGEPIndices.push_back(Zero);
00499         SrcPTy = ATy->getElementType();
00500       } else {
00501         break;
00502       }
00503     }
00504 
00505     SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace());
00506   }
00507 
00508   if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy())
00509     return 0;
00510 
00511   // If the pointers point into different address spaces don't do the
00512   // transformation.
00513   if (SrcTy->getAddressSpace() !=
00514       cast<PointerType>(CI->getType())->getAddressSpace())
00515     return 0;
00516 
00517   // If the pointers point to values of different sizes don't do the
00518   // transformation.
00519   if (!IC.getDataLayout() ||
00520       IC.getDataLayout()->getTypeSizeInBits(SrcPTy) !=
00521       IC.getDataLayout()->getTypeSizeInBits(DestPTy))
00522     return 0;
00523 
00524   // If the pointers point to pointers to different address spaces don't do the
00525   // transformation. It is not safe to introduce an addrspacecast instruction in
00526   // this case since, depending on the target, addrspacecast may not be a no-op
00527   // cast.
00528   if (SrcPTy->isPointerTy() && DestPTy->isPointerTy() &&
00529       SrcPTy->getPointerAddressSpace() != DestPTy->getPointerAddressSpace())
00530     return 0;
00531 
00532   // Okay, we are casting from one integer or pointer type to another of
00533   // the same size.  Instead of casting the pointer before
00534   // the store, cast the value to be stored.
00535   Value *NewCast;
00536   Instruction::CastOps opcode = Instruction::BitCast;
00537   Type* CastSrcTy = DestPTy;
00538   Type* CastDstTy = SrcPTy;
00539   if (CastDstTy->isPointerTy()) {
00540     if (CastSrcTy->isIntegerTy())
00541       opcode = Instruction::IntToPtr;
00542   } else if (CastDstTy->isIntegerTy()) {
00543     if (CastSrcTy->isPointerTy())
00544       opcode = Instruction::PtrToInt;
00545   }
00546 
00547   // SIOp0 is a pointer to aggregate and this is a store to the first field,
00548   // emit a GEP to index into its first field.
00549   if (!NewGEPIndices.empty())
00550     CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices);
00551 
00552   Value *SIOp0 = SI.getOperand(0);
00553   NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy,
00554                                    SIOp0->getName()+".c");
00555   SI.setOperand(0, NewCast);
00556   SI.setOperand(1, CastOp);
00557   return &SI;
00558 }
00559 
00560 /// equivalentAddressValues - Test if A and B will obviously have the same
00561 /// value. This includes recognizing that %t0 and %t1 will have the same
00562 /// value in code like this:
00563 ///   %t0 = getelementptr \@a, 0, 3
00564 ///   store i32 0, i32* %t0
00565 ///   %t1 = getelementptr \@a, 0, 3
00566 ///   %t2 = load i32* %t1
00567 ///
00568 static bool equivalentAddressValues(Value *A, Value *B) {
00569   // Test if the values are trivially equivalent.
00570   if (A == B) return true;
00571 
00572   // Test if the values come form identical arithmetic instructions.
00573   // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
00574   // its only used to compare two uses within the same basic block, which
00575   // means that they'll always either have the same value or one of them
00576   // will have an undefined value.
00577   if (isa<BinaryOperator>(A) ||
00578       isa<CastInst>(A) ||
00579       isa<PHINode>(A) ||
00580       isa<GetElementPtrInst>(A))
00581     if (Instruction *BI = dyn_cast<Instruction>(B))
00582       if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
00583         return true;
00584 
00585   // Otherwise they may not be equivalent.
00586   return false;
00587 }
00588 
00589 Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
00590   Value *Val = SI.getOperand(0);
00591   Value *Ptr = SI.getOperand(1);
00592 
00593   // Attempt to improve the alignment.
00594   if (DL) {
00595     unsigned KnownAlign =
00596       getOrEnforceKnownAlignment(Ptr, DL->getPrefTypeAlignment(Val->getType()),
00597                                  DL);
00598     unsigned StoreAlign = SI.getAlignment();
00599     unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign :
00600       DL->getABITypeAlignment(Val->getType());
00601 
00602     if (KnownAlign > EffectiveStoreAlign)
00603       SI.setAlignment(KnownAlign);
00604     else if (StoreAlign == 0)
00605       SI.setAlignment(EffectiveStoreAlign);
00606   }
00607 
00608   // Don't hack volatile/atomic stores.
00609   // FIXME: Some bits are legal for atomic stores; needs refactoring.
00610   if (!SI.isSimple()) return 0;
00611 
00612   // If the RHS is an alloca with a single use, zapify the store, making the
00613   // alloca dead.
00614   if (Ptr->hasOneUse()) {
00615     if (isa<AllocaInst>(Ptr))
00616       return EraseInstFromFunction(SI);
00617     if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
00618       if (isa<AllocaInst>(GEP->getOperand(0))) {
00619         if (GEP->getOperand(0)->hasOneUse())
00620           return EraseInstFromFunction(SI);
00621       }
00622     }
00623   }
00624 
00625   // Do really simple DSE, to catch cases where there are several consecutive
00626   // stores to the same location, separated by a few arithmetic operations. This
00627   // situation often occurs with bitfield accesses.
00628   BasicBlock::iterator BBI = &SI;
00629   for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
00630        --ScanInsts) {
00631     --BBI;
00632     // Don't count debug info directives, lest they affect codegen,
00633     // and we skip pointer-to-pointer bitcasts, which are NOPs.
00634     if (isa<DbgInfoIntrinsic>(BBI) ||
00635         (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
00636       ScanInsts++;
00637       continue;
00638     }
00639 
00640     if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
00641       // Prev store isn't volatile, and stores to the same location?
00642       if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1),
00643                                                         SI.getOperand(1))) {
00644         ++NumDeadStore;
00645         ++BBI;
00646         EraseInstFromFunction(*PrevSI);
00647         continue;
00648       }
00649       break;
00650     }
00651 
00652     // If this is a load, we have to stop.  However, if the loaded value is from
00653     // the pointer we're loading and is producing the pointer we're storing,
00654     // then *this* store is dead (X = load P; store X -> P).
00655     if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
00656       if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) &&
00657           LI->isSimple())
00658         return EraseInstFromFunction(SI);
00659 
00660       // Otherwise, this is a load from some other location.  Stores before it
00661       // may not be dead.
00662       break;
00663     }
00664 
00665     // Don't skip over loads or things that can modify memory.
00666     if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory())
00667       break;
00668   }
00669 
00670   // store X, null    -> turns into 'unreachable' in SimplifyCFG
00671   if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) {
00672     if (!isa<UndefValue>(Val)) {
00673       SI.setOperand(0, UndefValue::get(Val->getType()));
00674       if (Instruction *U = dyn_cast<Instruction>(Val))
00675         Worklist.Add(U);  // Dropped a use.
00676     }
00677     return 0;  // Do not modify these!
00678   }
00679 
00680   // store undef, Ptr -> noop
00681   if (isa<UndefValue>(Val))
00682     return EraseInstFromFunction(SI);
00683 
00684   // If the pointer destination is a cast, see if we can fold the cast into the
00685   // source instead.
00686   if (isa<CastInst>(Ptr))
00687     if (Instruction *Res = InstCombineStoreToCast(*this, SI))
00688       return Res;
00689   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
00690     if (CE->isCast())
00691       if (Instruction *Res = InstCombineStoreToCast(*this, SI))
00692         return Res;
00693 
00694 
00695   // If this store is the last instruction in the basic block (possibly
00696   // excepting debug info instructions), and if the block ends with an
00697   // unconditional branch, try to move it to the successor block.
00698   BBI = &SI;
00699   do {
00700     ++BBI;
00701   } while (isa<DbgInfoIntrinsic>(BBI) ||
00702            (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy()));
00703   if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
00704     if (BI->isUnconditional())
00705       if (SimplifyStoreAtEndOfBlock(SI))
00706         return 0;  // xform done!
00707 
00708   return 0;
00709 }
00710 
00711 /// SimplifyStoreAtEndOfBlock - Turn things like:
00712 ///   if () { *P = v1; } else { *P = v2 }
00713 /// into a phi node with a store in the successor.
00714 ///
00715 /// Simplify things like:
00716 ///   *P = v1; if () { *P = v2; }
00717 /// into a phi node with a store in the successor.
00718 ///
00719 bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
00720   BasicBlock *StoreBB = SI.getParent();
00721 
00722   // Check to see if the successor block has exactly two incoming edges.  If
00723   // so, see if the other predecessor contains a store to the same location.
00724   // if so, insert a PHI node (if needed) and move the stores down.
00725   BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
00726 
00727   // Determine whether Dest has exactly two predecessors and, if so, compute
00728   // the other predecessor.
00729   pred_iterator PI = pred_begin(DestBB);
00730   BasicBlock *P = *PI;
00731   BasicBlock *OtherBB = 0;
00732 
00733   if (P != StoreBB)
00734     OtherBB = P;
00735 
00736   if (++PI == pred_end(DestBB))
00737     return false;
00738 
00739   P = *PI;
00740   if (P != StoreBB) {
00741     if (OtherBB)
00742       return false;
00743     OtherBB = P;
00744   }
00745   if (++PI != pred_end(DestBB))
00746     return false;
00747 
00748   // Bail out if all the relevant blocks aren't distinct (this can happen,
00749   // for example, if SI is in an infinite loop)
00750   if (StoreBB == DestBB || OtherBB == DestBB)
00751     return false;
00752 
00753   // Verify that the other block ends in a branch and is not otherwise empty.
00754   BasicBlock::iterator BBI = OtherBB->getTerminator();
00755   BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
00756   if (!OtherBr || BBI == OtherBB->begin())
00757     return false;
00758 
00759   // If the other block ends in an unconditional branch, check for the 'if then
00760   // else' case.  there is an instruction before the branch.
00761   StoreInst *OtherStore = 0;
00762   if (OtherBr->isUnconditional()) {
00763     --BBI;
00764     // Skip over debugging info.
00765     while (isa<DbgInfoIntrinsic>(BBI) ||
00766            (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
00767       if (BBI==OtherBB->begin())
00768         return false;
00769       --BBI;
00770     }
00771     // If this isn't a store, isn't a store to the same location, or is not the
00772     // right kind of store, bail out.
00773     OtherStore = dyn_cast<StoreInst>(BBI);
00774     if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) ||
00775         !SI.isSameOperationAs(OtherStore))
00776       return false;
00777   } else {
00778     // Otherwise, the other block ended with a conditional branch. If one of the
00779     // destinations is StoreBB, then we have the if/then case.
00780     if (OtherBr->getSuccessor(0) != StoreBB &&
00781         OtherBr->getSuccessor(1) != StoreBB)
00782       return false;
00783 
00784     // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
00785     // if/then triangle.  See if there is a store to the same ptr as SI that
00786     // lives in OtherBB.
00787     for (;; --BBI) {
00788       // Check to see if we find the matching store.
00789       if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
00790         if (OtherStore->getOperand(1) != SI.getOperand(1) ||
00791             !SI.isSameOperationAs(OtherStore))
00792           return false;
00793         break;
00794       }
00795       // If we find something that may be using or overwriting the stored
00796       // value, or if we run out of instructions, we can't do the xform.
00797       if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() ||
00798           BBI == OtherBB->begin())
00799         return false;
00800     }
00801 
00802     // In order to eliminate the store in OtherBr, we have to
00803     // make sure nothing reads or overwrites the stored value in
00804     // StoreBB.
00805     for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
00806       // FIXME: This should really be AA driven.
00807       if (I->mayReadFromMemory() || I->mayWriteToMemory())
00808         return false;
00809     }
00810   }
00811 
00812   // Insert a PHI node now if we need it.
00813   Value *MergedVal = OtherStore->getOperand(0);
00814   if (MergedVal != SI.getOperand(0)) {
00815     PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge");
00816     PN->addIncoming(SI.getOperand(0), SI.getParent());
00817     PN->addIncoming(OtherStore->getOperand(0), OtherBB);
00818     MergedVal = InsertNewInstBefore(PN, DestBB->front());
00819   }
00820 
00821   // Advance to a place where it is safe to insert the new store and
00822   // insert it.
00823   BBI = DestBB->getFirstInsertionPt();
00824   StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1),
00825                                    SI.isVolatile(),
00826                                    SI.getAlignment(),
00827                                    SI.getOrdering(),
00828                                    SI.getSynchScope());
00829   InsertNewInstBefore(NewSI, *BBI);
00830   NewSI->setDebugLoc(OtherStore->getDebugLoc());
00831 
00832   // If the two stores had the same TBAA tag, preserve it.
00833   if (MDNode *TBAATag = SI.getMetadata(LLVMContext::MD_tbaa))
00834     if ((TBAATag = MDNode::getMostGenericTBAA(TBAATag,
00835                                OtherStore->getMetadata(LLVMContext::MD_tbaa))))
00836       NewSI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
00837 
00838 
00839   // Nuke the old stores.
00840   EraseInstFromFunction(SI);
00841   EraseInstFromFunction(*OtherStore);
00842   return true;
00843 }