LLVM API Documentation

InstCombineVectorOps.cpp
Go to the documentation of this file.
00001 //===- InstCombineVectorOps.cpp -------------------------------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file implements instcombine for ExtractElement, InsertElement and
00011 // ShuffleVector.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #include "InstCombine.h"
00016 #include "llvm/IR/PatternMatch.h"
00017 using namespace llvm;
00018 using namespace PatternMatch;
00019 
00020 /// CheapToScalarize - Return true if the value is cheaper to scalarize than it
00021 /// is to leave as a vector operation.  isConstant indicates whether we're
00022 /// extracting one known element.  If false we're extracting a variable index.
00023 static bool CheapToScalarize(Value *V, bool isConstant) {
00024   if (Constant *C = dyn_cast<Constant>(V)) {
00025     if (isConstant) return true;
00026 
00027     // If all elts are the same, we can extract it and use any of the values.
00028     if (Constant *Op0 = C->getAggregateElement(0U)) {
00029       for (unsigned i = 1, e = V->getType()->getVectorNumElements(); i != e;
00030            ++i)
00031         if (C->getAggregateElement(i) != Op0)
00032           return false;
00033       return true;
00034     }
00035   }
00036   Instruction *I = dyn_cast<Instruction>(V);
00037   if (!I) return false;
00038 
00039   // Insert element gets simplified to the inserted element or is deleted if
00040   // this is constant idx extract element and its a constant idx insertelt.
00041   if (I->getOpcode() == Instruction::InsertElement && isConstant &&
00042       isa<ConstantInt>(I->getOperand(2)))
00043     return true;
00044   if (I->getOpcode() == Instruction::Load && I->hasOneUse())
00045     return true;
00046   if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I))
00047     if (BO->hasOneUse() &&
00048         (CheapToScalarize(BO->getOperand(0), isConstant) ||
00049          CheapToScalarize(BO->getOperand(1), isConstant)))
00050       return true;
00051   if (CmpInst *CI = dyn_cast<CmpInst>(I))
00052     if (CI->hasOneUse() &&
00053         (CheapToScalarize(CI->getOperand(0), isConstant) ||
00054          CheapToScalarize(CI->getOperand(1), isConstant)))
00055       return true;
00056 
00057   return false;
00058 }
00059 
00060 /// FindScalarElement - Given a vector and an element number, see if the scalar
00061 /// value is already around as a register, for example if it were inserted then
00062 /// extracted from the vector.
00063 static Value *FindScalarElement(Value *V, unsigned EltNo) {
00064   assert(V->getType()->isVectorTy() && "Not looking at a vector?");
00065   VectorType *VTy = cast<VectorType>(V->getType());
00066   unsigned Width = VTy->getNumElements();
00067   if (EltNo >= Width)  // Out of range access.
00068     return UndefValue::get(VTy->getElementType());
00069 
00070   if (Constant *C = dyn_cast<Constant>(V))
00071     return C->getAggregateElement(EltNo);
00072 
00073   if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
00074     // If this is an insert to a variable element, we don't know what it is.
00075     if (!isa<ConstantInt>(III->getOperand(2)))
00076       return 0;
00077     unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
00078 
00079     // If this is an insert to the element we are looking for, return the
00080     // inserted value.
00081     if (EltNo == IIElt)
00082       return III->getOperand(1);
00083 
00084     // Otherwise, the insertelement doesn't modify the value, recurse on its
00085     // vector input.
00086     return FindScalarElement(III->getOperand(0), EltNo);
00087   }
00088 
00089   if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) {
00090     unsigned LHSWidth = SVI->getOperand(0)->getType()->getVectorNumElements();
00091     int InEl = SVI->getMaskValue(EltNo);
00092     if (InEl < 0)
00093       return UndefValue::get(VTy->getElementType());
00094     if (InEl < (int)LHSWidth)
00095       return FindScalarElement(SVI->getOperand(0), InEl);
00096     return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth);
00097   }
00098 
00099   // Extract a value from a vector add operation with a constant zero.
00100   Value *Val = 0; Constant *Con = 0;
00101   if (match(V, m_Add(m_Value(Val), m_Constant(Con)))) {
00102     if (Con->getAggregateElement(EltNo)->isNullValue())
00103       return FindScalarElement(Val, EltNo);
00104   }
00105 
00106   // Otherwise, we don't know.
00107   return 0;
00108 }
00109 
00110 // If we have a PHI node with a vector type that has only 2 uses: feed
00111 // itself and be an operand of extractelement at a constant location,
00112 // try to replace the PHI of the vector type with a PHI of a scalar type.
00113 Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
00114   // Verify that the PHI node has exactly 2 uses. Otherwise return NULL.
00115   if (!PN->hasNUses(2))
00116     return NULL;
00117 
00118   // If so, it's known at this point that one operand is PHI and the other is
00119   // an extractelement node. Find the PHI user that is not the extractelement
00120   // node.
00121   auto iu = PN->user_begin();
00122   Instruction *PHIUser = dyn_cast<Instruction>(*iu);
00123   if (PHIUser == cast<Instruction>(&EI))
00124     PHIUser = cast<Instruction>(*(++iu));
00125 
00126   // Verify that this PHI user has one use, which is the PHI itself,
00127   // and that it is a binary operation which is cheap to scalarize.
00128   // otherwise return NULL.
00129   if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
00130       !(isa<BinaryOperator>(PHIUser)) || !CheapToScalarize(PHIUser, true))
00131     return NULL;
00132 
00133   // Create a scalar PHI node that will replace the vector PHI node
00134   // just before the current PHI node.
00135   PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
00136       PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), *PN));
00137   // Scalarize each PHI operand.
00138   for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
00139     Value *PHIInVal = PN->getIncomingValue(i);
00140     BasicBlock *inBB = PN->getIncomingBlock(i);
00141     Value *Elt = EI.getIndexOperand();
00142     // If the operand is the PHI induction variable:
00143     if (PHIInVal == PHIUser) {
00144       // Scalarize the binary operation. Its first operand is the
00145       // scalar PHI and the second operand is extracted from the other
00146       // vector operand.
00147       BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
00148       unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
00149       Value *Op = InsertNewInstWith(
00150           ExtractElementInst::Create(B0->getOperand(opId), Elt,
00151                                      B0->getOperand(opId)->getName() + ".Elt"),
00152           *B0);
00153       Value *newPHIUser = InsertNewInstWith(
00154           BinaryOperator::Create(B0->getOpcode(), scalarPHI, Op), *B0);
00155       scalarPHI->addIncoming(newPHIUser, inBB);
00156     } else {
00157       // Scalarize PHI input:
00158       Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, "");
00159       // Insert the new instruction into the predecessor basic block.
00160       Instruction *pos = dyn_cast<Instruction>(PHIInVal);
00161       BasicBlock::iterator InsertPos;
00162       if (pos && !isa<PHINode>(pos)) {
00163         InsertPos = pos;
00164         ++InsertPos;
00165       } else {
00166         InsertPos = inBB->getFirstInsertionPt();
00167       }
00168 
00169       InsertNewInstWith(newEI, *InsertPos);
00170 
00171       scalarPHI->addIncoming(newEI, inBB);
00172     }
00173   }
00174   return ReplaceInstUsesWith(EI, scalarPHI);
00175 }
00176 
00177 Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
00178   // If vector val is constant with all elements the same, replace EI with
00179   // that element.  We handle a known element # below.
00180   if (Constant *C = dyn_cast<Constant>(EI.getOperand(0)))
00181     if (CheapToScalarize(C, false))
00182       return ReplaceInstUsesWith(EI, C->getAggregateElement(0U));
00183 
00184   // If extracting a specified index from the vector, see if we can recursively
00185   // find a previously computed scalar that was inserted into the vector.
00186   if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) {
00187     unsigned IndexVal = IdxC->getZExtValue();
00188     unsigned VectorWidth = EI.getVectorOperandType()->getNumElements();
00189 
00190     // If this is extracting an invalid index, turn this into undef, to avoid
00191     // crashing the code below.
00192     if (IndexVal >= VectorWidth)
00193       return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
00194 
00195     // This instruction only demands the single element from the input vector.
00196     // If the input vector has a single use, simplify it based on this use
00197     // property.
00198     if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) {
00199       APInt UndefElts(VectorWidth, 0);
00200       APInt DemandedMask(VectorWidth, 0);
00201       DemandedMask.setBit(IndexVal);
00202       if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0),
00203                                                 DemandedMask, UndefElts)) {
00204         EI.setOperand(0, V);
00205         return &EI;
00206       }
00207     }
00208 
00209     if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal))
00210       return ReplaceInstUsesWith(EI, Elt);
00211 
00212     // If the this extractelement is directly using a bitcast from a vector of
00213     // the same number of elements, see if we can find the source element from
00214     // it.  In this case, we will end up needing to bitcast the scalars.
00215     if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) {
00216       if (VectorType *VT = dyn_cast<VectorType>(BCI->getOperand(0)->getType()))
00217         if (VT->getNumElements() == VectorWidth)
00218           if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal))
00219             return new BitCastInst(Elt, EI.getType());
00220     }
00221 
00222     // If there's a vector PHI feeding a scalar use through this extractelement
00223     // instruction, try to scalarize the PHI.
00224     if (PHINode *PN = dyn_cast<PHINode>(EI.getOperand(0))) {
00225       Instruction *scalarPHI = scalarizePHI(EI, PN);
00226       if (scalarPHI)
00227         return scalarPHI;
00228     }
00229   }
00230 
00231   if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) {
00232     // Push extractelement into predecessor operation if legal and
00233     // profitable to do so
00234     if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
00235       if (I->hasOneUse() &&
00236           CheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) {
00237         Value *newEI0 =
00238           Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1),
00239                                         EI.getName()+".lhs");
00240         Value *newEI1 =
00241           Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1),
00242                                         EI.getName()+".rhs");
00243         return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1);
00244       }
00245     } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) {
00246       // Extracting the inserted element?
00247       if (IE->getOperand(2) == EI.getOperand(1))
00248         return ReplaceInstUsesWith(EI, IE->getOperand(1));
00249       // If the inserted and extracted elements are constants, they must not
00250       // be the same value, extract from the pre-inserted value instead.
00251       if (isa<Constant>(IE->getOperand(2)) && isa<Constant>(EI.getOperand(1))) {
00252         Worklist.AddValue(EI.getOperand(0));
00253         EI.setOperand(0, IE->getOperand(0));
00254         return &EI;
00255       }
00256     } else if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) {
00257       // If this is extracting an element from a shufflevector, figure out where
00258       // it came from and extract from the appropriate input element instead.
00259       if (ConstantInt *Elt = dyn_cast<ConstantInt>(EI.getOperand(1))) {
00260         int SrcIdx = SVI->getMaskValue(Elt->getZExtValue());
00261         Value *Src;
00262         unsigned LHSWidth =
00263           SVI->getOperand(0)->getType()->getVectorNumElements();
00264 
00265         if (SrcIdx < 0)
00266           return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
00267         if (SrcIdx < (int)LHSWidth)
00268           Src = SVI->getOperand(0);
00269         else {
00270           SrcIdx -= LHSWidth;
00271           Src = SVI->getOperand(1);
00272         }
00273         Type *Int32Ty = Type::getInt32Ty(EI.getContext());
00274         return ExtractElementInst::Create(Src,
00275                                           ConstantInt::get(Int32Ty,
00276                                                            SrcIdx, false));
00277       }
00278     } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
00279       // Canonicalize extractelement(cast) -> cast(extractelement)
00280       // bitcasts can change the number of vector elements and they cost nothing
00281       if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
00282         Value *EE = Builder->CreateExtractElement(CI->getOperand(0),
00283                                                   EI.getIndexOperand());
00284         Worklist.AddValue(EE);
00285         return CastInst::Create(CI->getOpcode(), EE, EI.getType());
00286       }
00287     } else if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
00288       if (SI->hasOneUse()) {
00289         // TODO: For a select on vectors, it might be useful to do this if it
00290         // has multiple extractelement uses. For vector select, that seems to
00291         // fight the vectorizer.
00292 
00293         // If we are extracting an element from a vector select or a select on
00294         // vectors, a select on the scalars extracted from the vector arguments.
00295         Value *TrueVal = SI->getTrueValue();
00296         Value *FalseVal = SI->getFalseValue();
00297 
00298         Value *Cond = SI->getCondition();
00299         if (Cond->getType()->isVectorTy()) {
00300           Cond = Builder->CreateExtractElement(Cond,
00301                                                EI.getIndexOperand(),
00302                                                Cond->getName() + ".elt");
00303         }
00304 
00305         Value *V1Elem
00306           = Builder->CreateExtractElement(TrueVal,
00307                                           EI.getIndexOperand(),
00308                                           TrueVal->getName() + ".elt");
00309 
00310         Value *V2Elem
00311           = Builder->CreateExtractElement(FalseVal,
00312                                           EI.getIndexOperand(),
00313                                           FalseVal->getName() + ".elt");
00314         return SelectInst::Create(Cond,
00315                                   V1Elem,
00316                                   V2Elem,
00317                                   SI->getName() + ".elt");
00318       }
00319     }
00320   }
00321   return 0;
00322 }
00323 
00324 /// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns
00325 /// elements from either LHS or RHS, return the shuffle mask and true.
00326 /// Otherwise, return false.
00327 static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
00328                                          SmallVectorImpl<Constant*> &Mask) {
00329   assert(LHS->getType() == RHS->getType() &&
00330          "Invalid CollectSingleShuffleElements");
00331   unsigned NumElts = V->getType()->getVectorNumElements();
00332 
00333   if (isa<UndefValue>(V)) {
00334     Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
00335     return true;
00336   }
00337 
00338   if (V == LHS) {
00339     for (unsigned i = 0; i != NumElts; ++i)
00340       Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
00341     return true;
00342   }
00343 
00344   if (V == RHS) {
00345     for (unsigned i = 0; i != NumElts; ++i)
00346       Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()),
00347                                       i+NumElts));
00348     return true;
00349   }
00350 
00351   if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
00352     // If this is an insert of an extract from some other vector, include it.
00353     Value *VecOp    = IEI->getOperand(0);
00354     Value *ScalarOp = IEI->getOperand(1);
00355     Value *IdxOp    = IEI->getOperand(2);
00356 
00357     if (!isa<ConstantInt>(IdxOp))
00358       return false;
00359     unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
00360 
00361     if (isa<UndefValue>(ScalarOp)) {  // inserting undef into vector.
00362       // Okay, we can handle this if the vector we are insertinting into is
00363       // transitively ok.
00364       if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
00365         // If so, update the mask to reflect the inserted undef.
00366         Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext()));
00367         return true;
00368       }
00369     } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
00370       if (isa<ConstantInt>(EI->getOperand(1))) {
00371         unsigned ExtractedIdx =
00372         cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
00373         unsigned NumLHSElts = LHS->getType()->getVectorNumElements();
00374 
00375         // This must be extracting from either LHS or RHS.
00376         if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
00377           // Okay, we can handle this if the vector we are insertinting into is
00378           // transitively ok.
00379           if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
00380             // If so, update the mask to reflect the inserted value.
00381             if (EI->getOperand(0) == LHS) {
00382               Mask[InsertedIdx % NumElts] =
00383               ConstantInt::get(Type::getInt32Ty(V->getContext()),
00384                                ExtractedIdx);
00385             } else {
00386               assert(EI->getOperand(0) == RHS);
00387               Mask[InsertedIdx % NumElts] =
00388               ConstantInt::get(Type::getInt32Ty(V->getContext()),
00389                                ExtractedIdx + NumLHSElts);
00390             }
00391             return true;
00392           }
00393         }
00394       }
00395     }
00396   }
00397 
00398   return false;
00399 }
00400 
00401 
00402 /// We are building a shuffle to create V, which is a sequence of insertelement,
00403 /// extractelement pairs. If PermittedRHS is set, then we must either use it or
00404 /// not rely on the second vector source. Return an std::pair containing the
00405 /// left and right vectors of the proposed shuffle (or 0), and set the Mask
00406 /// parameter as required.
00407 ///
00408 /// Note: we intentionally don't try to fold earlier shuffles since they have
00409 /// often been chosen carefully to be efficiently implementable on the target.
00410 typedef std::pair<Value *, Value *> ShuffleOps;
00411 
00412 static ShuffleOps CollectShuffleElements(Value *V,
00413                                          SmallVectorImpl<Constant *> &Mask,
00414                                          Value *PermittedRHS) {
00415   assert(V->getType()->isVectorTy() && "Invalid shuffle!");
00416   unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
00417 
00418   if (isa<UndefValue>(V)) {
00419     Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
00420     return std::make_pair(
00421         PermittedRHS ? UndefValue::get(PermittedRHS->getType()) : V, nullptr);
00422   }
00423 
00424   if (isa<ConstantAggregateZero>(V)) {
00425     Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0));
00426     return std::make_pair(V, nullptr);
00427   }
00428 
00429   if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
00430     // If this is an insert of an extract from some other vector, include it.
00431     Value *VecOp    = IEI->getOperand(0);
00432     Value *ScalarOp = IEI->getOperand(1);
00433     Value *IdxOp    = IEI->getOperand(2);
00434 
00435     if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
00436       if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
00437         unsigned ExtractedIdx =
00438           cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
00439         unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
00440 
00441         // Either the extracted from or inserted into vector must be RHSVec,
00442         // otherwise we'd end up with a shuffle of three inputs.
00443         if (EI->getOperand(0) == PermittedRHS || PermittedRHS == 0) {
00444           Value *RHS = EI->getOperand(0);
00445           ShuffleOps LR = CollectShuffleElements(VecOp, Mask, RHS);
00446           assert(LR.second == 0 || LR.second == RHS);
00447 
00448           if (LR.first->getType() != RHS->getType()) {
00449             // We tried our best, but we can't find anything compatible with RHS
00450             // further up the chain. Return a trivial shuffle.
00451             for (unsigned i = 0; i < NumElts; ++i)
00452               Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()), i);
00453             return std::make_pair(V, nullptr);
00454           }
00455 
00456           unsigned NumLHSElts = RHS->getType()->getVectorNumElements();
00457           Mask[InsertedIdx % NumElts] =
00458             ConstantInt::get(Type::getInt32Ty(V->getContext()),
00459                              NumLHSElts+ExtractedIdx);
00460           return std::make_pair(LR.first, RHS);
00461         }
00462 
00463         if (VecOp == PermittedRHS) {
00464           // We've gone as far as we can: anything on the other side of the
00465           // extractelement will already have been converted into a shuffle.
00466           unsigned NumLHSElts =
00467               EI->getOperand(0)->getType()->getVectorNumElements();
00468           for (unsigned i = 0; i != NumElts; ++i)
00469             Mask.push_back(ConstantInt::get(
00470                 Type::getInt32Ty(V->getContext()),
00471                 i == InsertedIdx ? ExtractedIdx : NumLHSElts + i));
00472           return std::make_pair(EI->getOperand(0), PermittedRHS);
00473         }
00474 
00475         // If this insertelement is a chain that comes from exactly these two
00476         // vectors, return the vector and the effective shuffle.
00477         if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
00478             CollectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
00479                                          Mask))
00480           return std::make_pair(EI->getOperand(0), PermittedRHS);
00481       }
00482     }
00483   }
00484 
00485   // Otherwise, can't do anything fancy.  Return an identity vector.
00486   for (unsigned i = 0; i != NumElts; ++i)
00487     Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
00488   return std::make_pair(V, nullptr);
00489 }
00490 
00491 Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
00492   Value *VecOp    = IE.getOperand(0);
00493   Value *ScalarOp = IE.getOperand(1);
00494   Value *IdxOp    = IE.getOperand(2);
00495 
00496   // Inserting an undef or into an undefined place, remove this.
00497   if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp))
00498     ReplaceInstUsesWith(IE, VecOp);
00499 
00500   // If the inserted element was extracted from some other vector, and if the
00501   // indexes are constant, try to turn this into a shufflevector operation.
00502   if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
00503     if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
00504       unsigned NumInsertVectorElts = IE.getType()->getNumElements();
00505       unsigned NumExtractVectorElts =
00506           EI->getOperand(0)->getType()->getVectorNumElements();
00507       unsigned ExtractedIdx =
00508         cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
00509       unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
00510 
00511       if (ExtractedIdx >= NumExtractVectorElts) // Out of range extract.
00512         return ReplaceInstUsesWith(IE, VecOp);
00513 
00514       if (InsertedIdx >= NumInsertVectorElts)  // Out of range insert.
00515         return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType()));
00516 
00517       // If we are extracting a value from a vector, then inserting it right
00518       // back into the same place, just use the input vector.
00519       if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx)
00520         return ReplaceInstUsesWith(IE, VecOp);
00521 
00522       // If this insertelement isn't used by some other insertelement, turn it
00523       // (and any insertelements it points to), into one big shuffle.
00524       if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.user_back())) {
00525         SmallVector<Constant*, 16> Mask;
00526         ShuffleOps LR = CollectShuffleElements(&IE, Mask, 0);
00527 
00528         // The proposed shuffle may be trivial, in which case we shouldn't
00529         // perform the combine.
00530         if (LR.first != &IE && LR.second != &IE) {
00531           // We now have a shuffle of LHS, RHS, Mask.
00532           if (LR.second == 0) LR.second = UndefValue::get(LR.first->getType());
00533           return new ShuffleVectorInst(LR.first, LR.second,
00534                                        ConstantVector::get(Mask));
00535         }
00536       }
00537     }
00538   }
00539 
00540   unsigned VWidth = cast<VectorType>(VecOp->getType())->getNumElements();
00541   APInt UndefElts(VWidth, 0);
00542   APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
00543   if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) {
00544     if (V != &IE)
00545       return ReplaceInstUsesWith(IE, V);
00546     return &IE;
00547   }
00548 
00549   return 0;
00550 }
00551 
00552 /// Return true if we can evaluate the specified expression tree if the vector
00553 /// elements were shuffled in a different order.
00554 static bool CanEvaluateShuffled(Value *V, ArrayRef<int> Mask,
00555                                 unsigned Depth = 5) {
00556   // We can always reorder the elements of a constant.
00557   if (isa<Constant>(V))
00558     return true;
00559 
00560   // We won't reorder vector arguments. No IPO here.
00561   Instruction *I = dyn_cast<Instruction>(V);
00562   if (!I) return false;
00563 
00564   // Two users may expect different orders of the elements. Don't try it.
00565   if (!I->hasOneUse())
00566     return false;
00567 
00568   if (Depth == 0) return false;
00569 
00570   switch (I->getOpcode()) {
00571     case Instruction::Add:
00572     case Instruction::FAdd:
00573     case Instruction::Sub:
00574     case Instruction::FSub:
00575     case Instruction::Mul:
00576     case Instruction::FMul:
00577     case Instruction::UDiv:
00578     case Instruction::SDiv:
00579     case Instruction::FDiv:
00580     case Instruction::URem:
00581     case Instruction::SRem:
00582     case Instruction::FRem:
00583     case Instruction::Shl:
00584     case Instruction::LShr:
00585     case Instruction::AShr:
00586     case Instruction::And:
00587     case Instruction::Or:
00588     case Instruction::Xor:
00589     case Instruction::ICmp:
00590     case Instruction::FCmp:
00591     case Instruction::Trunc:
00592     case Instruction::ZExt:
00593     case Instruction::SExt:
00594     case Instruction::FPToUI:
00595     case Instruction::FPToSI:
00596     case Instruction::UIToFP:
00597     case Instruction::SIToFP:
00598     case Instruction::FPTrunc:
00599     case Instruction::FPExt:
00600     case Instruction::GetElementPtr: {
00601       for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
00602         if (!CanEvaluateShuffled(I->getOperand(i), Mask, Depth-1))
00603           return false;
00604       }
00605       return true;
00606     }
00607     case Instruction::InsertElement: {
00608       ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
00609       if (!CI) return false;
00610       int ElementNumber = CI->getLimitedValue();
00611 
00612       // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
00613       // can't put an element into multiple indices.
00614       bool SeenOnce = false;
00615       for (int i = 0, e = Mask.size(); i != e; ++i) {
00616         if (Mask[i] == ElementNumber) {
00617           if (SeenOnce)
00618             return false;
00619           SeenOnce = true;
00620         }
00621       }
00622       return CanEvaluateShuffled(I->getOperand(0), Mask, Depth-1);
00623     }
00624   }
00625   return false;
00626 }
00627 
00628 /// Rebuild a new instruction just like 'I' but with the new operands given.
00629 /// In the event of type mismatch, the type of the operands is correct.
00630 static Value *BuildNew(Instruction *I, ArrayRef<Value*> NewOps) {
00631   // We don't want to use the IRBuilder here because we want the replacement
00632   // instructions to appear next to 'I', not the builder's insertion point.
00633   switch (I->getOpcode()) {
00634     case Instruction::Add:
00635     case Instruction::FAdd:
00636     case Instruction::Sub:
00637     case Instruction::FSub:
00638     case Instruction::Mul:
00639     case Instruction::FMul:
00640     case Instruction::UDiv:
00641     case Instruction::SDiv:
00642     case Instruction::FDiv:
00643     case Instruction::URem:
00644     case Instruction::SRem:
00645     case Instruction::FRem:
00646     case Instruction::Shl:
00647     case Instruction::LShr:
00648     case Instruction::AShr:
00649     case Instruction::And:
00650     case Instruction::Or:
00651     case Instruction::Xor: {
00652       BinaryOperator *BO = cast<BinaryOperator>(I);
00653       assert(NewOps.size() == 2 && "binary operator with #ops != 2");
00654       BinaryOperator *New =
00655           BinaryOperator::Create(cast<BinaryOperator>(I)->getOpcode(),
00656                                  NewOps[0], NewOps[1], "", BO);
00657       if (isa<OverflowingBinaryOperator>(BO)) {
00658         New->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
00659         New->setHasNoSignedWrap(BO->hasNoSignedWrap());
00660       }
00661       if (isa<PossiblyExactOperator>(BO)) {
00662         New->setIsExact(BO->isExact());
00663       }
00664       if (isa<FPMathOperator>(BO))
00665         New->copyFastMathFlags(I);
00666       return New;
00667     }
00668     case Instruction::ICmp:
00669       assert(NewOps.size() == 2 && "icmp with #ops != 2");
00670       return new ICmpInst(I, cast<ICmpInst>(I)->getPredicate(),
00671                           NewOps[0], NewOps[1]);
00672     case Instruction::FCmp:
00673       assert(NewOps.size() == 2 && "fcmp with #ops != 2");
00674       return new FCmpInst(I, cast<FCmpInst>(I)->getPredicate(),
00675                           NewOps[0], NewOps[1]);
00676     case Instruction::Trunc:
00677     case Instruction::ZExt:
00678     case Instruction::SExt:
00679     case Instruction::FPToUI:
00680     case Instruction::FPToSI:
00681     case Instruction::UIToFP:
00682     case Instruction::SIToFP:
00683     case Instruction::FPTrunc:
00684     case Instruction::FPExt: {
00685       // It's possible that the mask has a different number of elements from
00686       // the original cast. We recompute the destination type to match the mask.
00687       Type *DestTy =
00688           VectorType::get(I->getType()->getScalarType(),
00689                           NewOps[0]->getType()->getVectorNumElements());
00690       assert(NewOps.size() == 1 && "cast with #ops != 1");
00691       return CastInst::Create(cast<CastInst>(I)->getOpcode(), NewOps[0], DestTy,
00692                               "", I);
00693     }
00694     case Instruction::GetElementPtr: {
00695       Value *Ptr = NewOps[0];
00696       ArrayRef<Value*> Idx = NewOps.slice(1);
00697       GetElementPtrInst *GEP = GetElementPtrInst::Create(Ptr, Idx, "", I);
00698       GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds());
00699       return GEP;
00700     }
00701   }
00702   llvm_unreachable("failed to rebuild vector instructions");
00703 }
00704 
00705 Value *
00706 InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
00707   // Mask.size() does not need to be equal to the number of vector elements.
00708 
00709   assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
00710   if (isa<UndefValue>(V)) {
00711     return UndefValue::get(VectorType::get(V->getType()->getScalarType(),
00712                                            Mask.size()));
00713   }
00714   if (isa<ConstantAggregateZero>(V)) {
00715     return ConstantAggregateZero::get(
00716                VectorType::get(V->getType()->getScalarType(),
00717                                Mask.size()));
00718   }
00719   if (Constant *C = dyn_cast<Constant>(V)) {
00720     SmallVector<Constant *, 16> MaskValues;
00721     for (int i = 0, e = Mask.size(); i != e; ++i) {
00722       if (Mask[i] == -1)
00723         MaskValues.push_back(UndefValue::get(Builder->getInt32Ty()));
00724       else
00725         MaskValues.push_back(Builder->getInt32(Mask[i]));
00726     }
00727     return ConstantExpr::getShuffleVector(C, UndefValue::get(C->getType()),
00728                                           ConstantVector::get(MaskValues));
00729   }
00730 
00731   Instruction *I = cast<Instruction>(V);
00732   switch (I->getOpcode()) {
00733     case Instruction::Add:
00734     case Instruction::FAdd:
00735     case Instruction::Sub:
00736     case Instruction::FSub:
00737     case Instruction::Mul:
00738     case Instruction::FMul:
00739     case Instruction::UDiv:
00740     case Instruction::SDiv:
00741     case Instruction::FDiv:
00742     case Instruction::URem:
00743     case Instruction::SRem:
00744     case Instruction::FRem:
00745     case Instruction::Shl:
00746     case Instruction::LShr:
00747     case Instruction::AShr:
00748     case Instruction::And:
00749     case Instruction::Or:
00750     case Instruction::Xor:
00751     case Instruction::ICmp:
00752     case Instruction::FCmp:
00753     case Instruction::Trunc:
00754     case Instruction::ZExt:
00755     case Instruction::SExt:
00756     case Instruction::FPToUI:
00757     case Instruction::FPToSI:
00758     case Instruction::UIToFP:
00759     case Instruction::SIToFP:
00760     case Instruction::FPTrunc:
00761     case Instruction::FPExt:
00762     case Instruction::Select:
00763     case Instruction::GetElementPtr: {
00764       SmallVector<Value*, 8> NewOps;
00765       bool NeedsRebuild = (Mask.size() != I->getType()->getVectorNumElements());
00766       for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
00767         Value *V = EvaluateInDifferentElementOrder(I->getOperand(i), Mask);
00768         NewOps.push_back(V);
00769         NeedsRebuild |= (V != I->getOperand(i));
00770       }
00771       if (NeedsRebuild) {
00772         return BuildNew(I, NewOps);
00773       }
00774       return I;
00775     }
00776     case Instruction::InsertElement: {
00777       int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();
00778 
00779       // The insertelement was inserting at Element. Figure out which element
00780       // that becomes after shuffling. The answer is guaranteed to be unique
00781       // by CanEvaluateShuffled.
00782       bool Found = false;
00783       int Index = 0;
00784       for (int e = Mask.size(); Index != e; ++Index) {
00785         if (Mask[Index] == Element) {
00786           Found = true;
00787           break;
00788         }
00789       }
00790 
00791       // If element is not in Mask, no need to handle the operand 1 (element to
00792       // be inserted). Just evaluate values in operand 0 according to Mask.
00793       if (!Found)
00794         return EvaluateInDifferentElementOrder(I->getOperand(0), Mask);
00795 
00796       Value *V = EvaluateInDifferentElementOrder(I->getOperand(0), Mask);
00797       return InsertElementInst::Create(V, I->getOperand(1),
00798                                        Builder->getInt32(Index), "", I);
00799     }
00800   }
00801   llvm_unreachable("failed to reorder elements of vector instruction!");
00802 }
00803 
00804 Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
00805   Value *LHS = SVI.getOperand(0);
00806   Value *RHS = SVI.getOperand(1);
00807   SmallVector<int, 16> Mask = SVI.getShuffleMask();
00808 
00809   bool MadeChange = false;
00810 
00811   // Undefined shuffle mask -> undefined value.
00812   if (isa<UndefValue>(SVI.getOperand(2)))
00813     return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType()));
00814 
00815   unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements();
00816 
00817   APInt UndefElts(VWidth, 0);
00818   APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
00819   if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
00820     if (V != &SVI)
00821       return ReplaceInstUsesWith(SVI, V);
00822     LHS = SVI.getOperand(0);
00823     RHS = SVI.getOperand(1);
00824     MadeChange = true;
00825   }
00826 
00827   unsigned LHSWidth = cast<VectorType>(LHS->getType())->getNumElements();
00828 
00829   // Canonicalize shuffle(x    ,x,mask) -> shuffle(x, undef,mask')
00830   // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask').
00831   if (LHS == RHS || isa<UndefValue>(LHS)) {
00832     if (isa<UndefValue>(LHS) && LHS == RHS) {
00833       // shuffle(undef,undef,mask) -> undef.
00834       Value *Result = (VWidth == LHSWidth)
00835                       ? LHS : UndefValue::get(SVI.getType());
00836       return ReplaceInstUsesWith(SVI, Result);
00837     }
00838 
00839     // Remap any references to RHS to use LHS.
00840     SmallVector<Constant*, 16> Elts;
00841     for (unsigned i = 0, e = LHSWidth; i != VWidth; ++i) {
00842       if (Mask[i] < 0) {
00843         Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
00844         continue;
00845       }
00846 
00847       if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) ||
00848           (Mask[i] <  (int)e && isa<UndefValue>(LHS))) {
00849         Mask[i] = -1;     // Turn into undef.
00850         Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
00851       } else {
00852         Mask[i] = Mask[i] % e;  // Force to LHS.
00853         Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()),
00854                                         Mask[i]));
00855       }
00856     }
00857     SVI.setOperand(0, SVI.getOperand(1));
00858     SVI.setOperand(1, UndefValue::get(RHS->getType()));
00859     SVI.setOperand(2, ConstantVector::get(Elts));
00860     LHS = SVI.getOperand(0);
00861     RHS = SVI.getOperand(1);
00862     MadeChange = true;
00863   }
00864 
00865   if (VWidth == LHSWidth) {
00866     // Analyze the shuffle, are the LHS or RHS and identity shuffles?
00867     bool isLHSID = true, isRHSID = true;
00868 
00869     for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
00870       if (Mask[i] < 0) continue;  // Ignore undef values.
00871       // Is this an identity shuffle of the LHS value?
00872       isLHSID &= (Mask[i] == (int)i);
00873 
00874       // Is this an identity shuffle of the RHS value?
00875       isRHSID &= (Mask[i]-e == i);
00876     }
00877 
00878     // Eliminate identity shuffles.
00879     if (isLHSID) return ReplaceInstUsesWith(SVI, LHS);
00880     if (isRHSID) return ReplaceInstUsesWith(SVI, RHS);
00881   }
00882 
00883   if (isa<UndefValue>(RHS) && CanEvaluateShuffled(LHS, Mask)) {
00884     Value *V = EvaluateInDifferentElementOrder(LHS, Mask);
00885     return ReplaceInstUsesWith(SVI, V);
00886   }
00887 
00888   // If the LHS is a shufflevector itself, see if we can combine it with this
00889   // one without producing an unusual shuffle.
00890   // Cases that might be simplified:
00891   // 1.
00892   // x1=shuffle(v1,v2,mask1)
00893   //  x=shuffle(x1,undef,mask)
00894   //        ==>
00895   //  x=shuffle(v1,undef,newMask)
00896   // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
00897   // 2.
00898   // x1=shuffle(v1,undef,mask1)
00899   //  x=shuffle(x1,x2,mask)
00900   // where v1.size() == mask1.size()
00901   //        ==>
00902   //  x=shuffle(v1,x2,newMask)
00903   // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
00904   // 3.
00905   // x2=shuffle(v2,undef,mask2)
00906   //  x=shuffle(x1,x2,mask)
00907   // where v2.size() == mask2.size()
00908   //        ==>
00909   //  x=shuffle(x1,v2,newMask)
00910   // newMask[i] = (mask[i] < x1.size())
00911   //              ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
00912   // 4.
00913   // x1=shuffle(v1,undef,mask1)
00914   // x2=shuffle(v2,undef,mask2)
00915   //  x=shuffle(x1,x2,mask)
00916   // where v1.size() == v2.size()
00917   //        ==>
00918   //  x=shuffle(v1,v2,newMask)
00919   // newMask[i] = (mask[i] < x1.size())
00920   //              ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
00921   //
00922   // Here we are really conservative:
00923   // we are absolutely afraid of producing a shuffle mask not in the input
00924   // program, because the code gen may not be smart enough to turn a merged
00925   // shuffle into two specific shuffles: it may produce worse code.  As such,
00926   // we only merge two shuffles if the result is either a splat or one of the
00927   // input shuffle masks.  In this case, merging the shuffles just removes
00928   // one instruction, which we know is safe.  This is good for things like
00929   // turning: (splat(splat)) -> splat, or
00930   // merge(V[0..n], V[n+1..2n]) -> V[0..2n]
00931   ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);
00932   ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
00933   if (LHSShuffle)
00934     if (!isa<UndefValue>(LHSShuffle->getOperand(1)) && !isa<UndefValue>(RHS))
00935       LHSShuffle = NULL;
00936   if (RHSShuffle)
00937     if (!isa<UndefValue>(RHSShuffle->getOperand(1)))
00938       RHSShuffle = NULL;
00939   if (!LHSShuffle && !RHSShuffle)
00940     return MadeChange ? &SVI : 0;
00941 
00942   Value* LHSOp0 = NULL;
00943   Value* LHSOp1 = NULL;
00944   Value* RHSOp0 = NULL;
00945   unsigned LHSOp0Width = 0;
00946   unsigned RHSOp0Width = 0;
00947   if (LHSShuffle) {
00948     LHSOp0 = LHSShuffle->getOperand(0);
00949     LHSOp1 = LHSShuffle->getOperand(1);
00950     LHSOp0Width = cast<VectorType>(LHSOp0->getType())->getNumElements();
00951   }
00952   if (RHSShuffle) {
00953     RHSOp0 = RHSShuffle->getOperand(0);
00954     RHSOp0Width = cast<VectorType>(RHSOp0->getType())->getNumElements();
00955   }
00956   Value* newLHS = LHS;
00957   Value* newRHS = RHS;
00958   if (LHSShuffle) {
00959     // case 1
00960     if (isa<UndefValue>(RHS)) {
00961       newLHS = LHSOp0;
00962       newRHS = LHSOp1;
00963     }
00964     // case 2 or 4
00965     else if (LHSOp0Width == LHSWidth) {
00966       newLHS = LHSOp0;
00967     }
00968   }
00969   // case 3 or 4
00970   if (RHSShuffle && RHSOp0Width == LHSWidth) {
00971     newRHS = RHSOp0;
00972   }
00973   // case 4
00974   if (LHSOp0 == RHSOp0) {
00975     newLHS = LHSOp0;
00976     newRHS = NULL;
00977   }
00978 
00979   if (newLHS == LHS && newRHS == RHS)
00980     return MadeChange ? &SVI : 0;
00981 
00982   SmallVector<int, 16> LHSMask;
00983   SmallVector<int, 16> RHSMask;
00984   if (newLHS != LHS)
00985     LHSMask = LHSShuffle->getShuffleMask();
00986   if (RHSShuffle && newRHS != RHS)
00987     RHSMask = RHSShuffle->getShuffleMask();
00988 
00989   unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
00990   SmallVector<int, 16> newMask;
00991   bool isSplat = true;
00992   int SplatElt = -1;
00993   // Create a new mask for the new ShuffleVectorInst so that the new
00994   // ShuffleVectorInst is equivalent to the original one.
00995   for (unsigned i = 0; i < VWidth; ++i) {
00996     int eltMask;
00997     if (Mask[i] < 0) {
00998       // This element is an undef value.
00999       eltMask = -1;
01000     } else if (Mask[i] < (int)LHSWidth) {
01001       // This element is from left hand side vector operand.
01002       //
01003       // If LHS is going to be replaced (case 1, 2, or 4), calculate the
01004       // new mask value for the element.
01005       if (newLHS != LHS) {
01006         eltMask = LHSMask[Mask[i]];
01007         // If the value selected is an undef value, explicitly specify it
01008         // with a -1 mask value.
01009         if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1))
01010           eltMask = -1;
01011       } else
01012         eltMask = Mask[i];
01013     } else {
01014       // This element is from right hand side vector operand
01015       //
01016       // If the value selected is an undef value, explicitly specify it
01017       // with a -1 mask value. (case 1)
01018       if (isa<UndefValue>(RHS))
01019         eltMask = -1;
01020       // If RHS is going to be replaced (case 3 or 4), calculate the
01021       // new mask value for the element.
01022       else if (newRHS != RHS) {
01023         eltMask = RHSMask[Mask[i]-LHSWidth];
01024         // If the value selected is an undef value, explicitly specify it
01025         // with a -1 mask value.
01026         if (eltMask >= (int)RHSOp0Width) {
01027           assert(isa<UndefValue>(RHSShuffle->getOperand(1))
01028                  && "should have been check above");
01029           eltMask = -1;
01030         }
01031       } else
01032         eltMask = Mask[i]-LHSWidth;
01033 
01034       // If LHS's width is changed, shift the mask value accordingly.
01035       // If newRHS == NULL, i.e. LHSOp0 == RHSOp0, we want to remap any
01036       // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
01037       // If newRHS == newLHS, we want to remap any references from newRHS to
01038       // newLHS so that we can properly identify splats that may occur due to
01039       // obfuscation across the two vectors.
01040       if (eltMask >= 0 && newRHS != NULL && newLHS != newRHS)
01041         eltMask += newLHSWidth;
01042     }
01043 
01044     // Check if this could still be a splat.
01045     if (eltMask >= 0) {
01046       if (SplatElt >= 0 && SplatElt != eltMask)
01047         isSplat = false;
01048       SplatElt = eltMask;
01049     }
01050 
01051     newMask.push_back(eltMask);
01052   }
01053 
01054   // If the result mask is equal to one of the original shuffle masks,
01055   // or is a splat, do the replacement.
01056   if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
01057     SmallVector<Constant*, 16> Elts;
01058     Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
01059     for (unsigned i = 0, e = newMask.size(); i != e; ++i) {
01060       if (newMask[i] < 0) {
01061         Elts.push_back(UndefValue::get(Int32Ty));
01062       } else {
01063         Elts.push_back(ConstantInt::get(Int32Ty, newMask[i]));
01064       }
01065     }
01066     if (newRHS == NULL)
01067       newRHS = UndefValue::get(newLHS->getType());
01068     return new ShuffleVectorInst(newLHS, newRHS, ConstantVector::get(Elts));
01069   }
01070 
01071   return MadeChange ? &SVI : 0;
01072 }