LLVM API Documentation

InstCombineSimplifyDemanded.cpp
Go to the documentation of this file.
00001 //===- InstCombineSimplifyDemanded.cpp ------------------------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file contains logic for simplifying instructions based on information
00011 // about how they are used.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #include "InstCombine.h"
00016 #include "llvm/IR/DataLayout.h"
00017 #include "llvm/IR/IntrinsicInst.h"
00018 #include "llvm/IR/PatternMatch.h"
00019 
00020 using namespace llvm;
00021 using namespace llvm::PatternMatch;
00022 
00023 #define DEBUG_TYPE "instcombine"
00024 
00025 /// ShrinkDemandedConstant - Check to see if the specified operand of the
00026 /// specified instruction is a constant integer.  If so, check to see if there
00027 /// are any bits set in the constant that are not demanded.  If so, shrink the
00028 /// constant and return true.
00029 static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
00030                                    APInt Demanded) {
00031   assert(I && "No instruction?");
00032   assert(OpNo < I->getNumOperands() && "Operand index too large");
00033 
00034   // If the operand is not a constant integer, nothing to do.
00035   ConstantInt *OpC = dyn_cast<ConstantInt>(I->getOperand(OpNo));
00036   if (!OpC) return false;
00037 
00038   // If there are no bits set that aren't demanded, nothing to do.
00039   Demanded = Demanded.zextOrTrunc(OpC->getValue().getBitWidth());
00040   if ((~Demanded & OpC->getValue()) == 0)
00041     return false;
00042 
00043   // This instruction is producing bits that are not demanded. Shrink the RHS.
00044   Demanded &= OpC->getValue();
00045   I->setOperand(OpNo, ConstantInt::get(OpC->getType(), Demanded));
00046   return true;
00047 }
00048 
00049 
00050 
00051 /// SimplifyDemandedInstructionBits - Inst is an integer instruction that
00052 /// SimplifyDemandedBits knows about.  See if the instruction has any
00053 /// properties that allow us to simplify its operands.
00054 bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
00055   unsigned BitWidth = Inst.getType()->getScalarSizeInBits();
00056   APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
00057   APInt DemandedMask(APInt::getAllOnesValue(BitWidth));
00058 
00059   Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask,
00060                                      KnownZero, KnownOne, 0);
00061   if (!V) return false;
00062   if (V == &Inst) return true;
00063   ReplaceInstUsesWith(Inst, V);
00064   return true;
00065 }
00066 
00067 /// SimplifyDemandedBits - This form of SimplifyDemandedBits simplifies the
00068 /// specified instruction operand if possible, updating it in place.  It returns
00069 /// true if it made any change and false otherwise.
00070 bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask,
00071                                         APInt &KnownZero, APInt &KnownOne,
00072                                         unsigned Depth) {
00073   Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask,
00074                                           KnownZero, KnownOne, Depth);
00075   if (!NewVal) return false;
00076   U = NewVal;
00077   return true;
00078 }
00079 
00080 
00081 /// SimplifyDemandedUseBits - This function attempts to replace V with a simpler
00082 /// value based on the demanded bits.  When this function is called, it is known
00083 /// that only the bits set in DemandedMask of the result of V are ever used
00084 /// downstream. Consequently, depending on the mask and V, it may be possible
00085 /// to replace V with a constant or one of its operands. In such cases, this
00086 /// function does the replacement and returns true. In all other cases, it
00087 /// returns false after analyzing the expression and setting KnownOne and known
00088 /// to be one in the expression.  KnownZero contains all the bits that are known
00089 /// to be zero in the expression. These are provided to potentially allow the
00090 /// caller (which might recursively be SimplifyDemandedBits itself) to simplify
00091 /// the expression. KnownOne and KnownZero always follow the invariant that
00092 /// KnownOne & KnownZero == 0. That is, a bit can't be both 1 and 0. Note that
00093 /// the bits in KnownOne and KnownZero may only be accurate for those bits set
00094 /// in DemandedMask. Note also that the bitwidth of V, DemandedMask, KnownZero
00095 /// and KnownOne must all be the same.
00096 ///
00097 /// This returns null if it did not change anything and it permits no
00098 /// simplification.  This returns V itself if it did some simplification of V's
00099 /// operands based on the information about what bits are demanded. This returns
00100 /// some other non-null value if it found out that V is equal to another value
00101 /// in the context where the specified bits are demanded, but not for all users.
00102 Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
00103                                              APInt &KnownZero, APInt &KnownOne,
00104                                              unsigned Depth) {
00105   assert(V != nullptr && "Null pointer of Value???");
00106   assert(Depth <= 6 && "Limit Search Depth");
00107   uint32_t BitWidth = DemandedMask.getBitWidth();
00108   Type *VTy = V->getType();
00109   assert((DL || !VTy->isPointerTy()) &&
00110          "SimplifyDemandedBits needs to know bit widths!");
00111   assert((!DL || DL->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) &&
00112          (!VTy->isIntOrIntVectorTy() ||
00113           VTy->getScalarSizeInBits() == BitWidth) &&
00114          KnownZero.getBitWidth() == BitWidth &&
00115          KnownOne.getBitWidth() == BitWidth &&
00116          "Value *V, DemandedMask, KnownZero and KnownOne "
00117          "must have same BitWidth");
00118   if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
00119     // We know all of the bits for a constant!
00120     KnownOne = CI->getValue() & DemandedMask;
00121     KnownZero = ~KnownOne & DemandedMask;
00122     return nullptr;
00123   }
00124   if (isa<ConstantPointerNull>(V)) {
00125     // We know all of the bits for a constant!
00126     KnownOne.clearAllBits();
00127     KnownZero = DemandedMask;
00128     return nullptr;
00129   }
00130 
00131   KnownZero.clearAllBits();
00132   KnownOne.clearAllBits();
00133   if (DemandedMask == 0) {   // Not demanding any bits from V.
00134     if (isa<UndefValue>(V))
00135       return nullptr;
00136     return UndefValue::get(VTy);
00137   }
00138 
00139   if (Depth == 6)        // Limit search depth.
00140     return nullptr;
00141 
00142   APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
00143   APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
00144 
00145   Instruction *I = dyn_cast<Instruction>(V);
00146   if (!I) {
00147     computeKnownBits(V, KnownZero, KnownOne, Depth);
00148     return nullptr;        // Only analyze instructions.
00149   }
00150 
00151   // If there are multiple uses of this value and we aren't at the root, then
00152   // we can't do any simplifications of the operands, because DemandedMask
00153   // only reflects the bits demanded by *one* of the users.
00154   if (Depth != 0 && !I->hasOneUse()) {
00155     // Despite the fact that we can't simplify this instruction in all User's
00156     // context, we can at least compute the knownzero/knownone bits, and we can
00157     // do simplifications that apply to *just* the one user if we know that
00158     // this instruction has a simpler value in that context.
00159     if (I->getOpcode() == Instruction::And) {
00160       // If either the LHS or the RHS are Zero, the result is zero.
00161       computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
00162       computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
00163 
00164       // If all of the demanded bits are known 1 on one side, return the other.
00165       // These bits cannot contribute to the result of the 'and' in this
00166       // context.
00167       if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
00168           (DemandedMask & ~LHSKnownZero))
00169         return I->getOperand(0);
00170       if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
00171           (DemandedMask & ~RHSKnownZero))
00172         return I->getOperand(1);
00173 
00174       // If all of the demanded bits in the inputs are known zeros, return zero.
00175       if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
00176         return Constant::getNullValue(VTy);
00177 
00178     } else if (I->getOpcode() == Instruction::Or) {
00179       // We can simplify (X|Y) -> X or Y in the user's context if we know that
00180       // only bits from X or Y are demanded.
00181 
00182       // If either the LHS or the RHS are One, the result is One.
00183       computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
00184       computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
00185 
00186       // If all of the demanded bits are known zero on one side, return the
00187       // other.  These bits cannot contribute to the result of the 'or' in this
00188       // context.
00189       if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
00190           (DemandedMask & ~LHSKnownOne))
00191         return I->getOperand(0);
00192       if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
00193           (DemandedMask & ~RHSKnownOne))
00194         return I->getOperand(1);
00195 
00196       // If all of the potentially set bits on one side are known to be set on
00197       // the other side, just use the 'other' side.
00198       if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
00199           (DemandedMask & (~RHSKnownZero)))
00200         return I->getOperand(0);
00201       if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
00202           (DemandedMask & (~LHSKnownZero)))
00203         return I->getOperand(1);
00204     } else if (I->getOpcode() == Instruction::Xor) {
00205       // We can simplify (X^Y) -> X or Y in the user's context if we know that
00206       // only bits from X or Y are demanded.
00207 
00208       computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
00209       computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
00210 
00211       // If all of the demanded bits are known zero on one side, return the
00212       // other.
00213       if ((DemandedMask & RHSKnownZero) == DemandedMask)
00214         return I->getOperand(0);
00215       if ((DemandedMask & LHSKnownZero) == DemandedMask)
00216         return I->getOperand(1);
00217     }
00218 
00219     // Compute the KnownZero/KnownOne bits to simplify things downstream.
00220     computeKnownBits(I, KnownZero, KnownOne, Depth);
00221     return nullptr;
00222   }
00223 
00224   // If this is the root being simplified, allow it to have multiple uses,
00225   // just set the DemandedMask to all bits so that we can try to simplify the
00226   // operands.  This allows visitTruncInst (for example) to simplify the
00227   // operand of a trunc without duplicating all the logic below.
00228   if (Depth == 0 && !V->hasOneUse())
00229     DemandedMask = APInt::getAllOnesValue(BitWidth);
00230 
00231   switch (I->getOpcode()) {
00232   default:
00233     computeKnownBits(I, KnownZero, KnownOne, Depth);
00234     break;
00235   case Instruction::And:
00236     // If either the LHS or the RHS are Zero, the result is zero.
00237     if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
00238                              RHSKnownZero, RHSKnownOne, Depth+1) ||
00239         SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownZero,
00240                              LHSKnownZero, LHSKnownOne, Depth+1))
00241       return I;
00242     assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
00243     assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
00244 
00245     // If all of the demanded bits are known 1 on one side, return the other.
00246     // These bits cannot contribute to the result of the 'and'.
00247     if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
00248         (DemandedMask & ~LHSKnownZero))
00249       return I->getOperand(0);
00250     if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
00251         (DemandedMask & ~RHSKnownZero))
00252       return I->getOperand(1);
00253 
00254     // If all of the demanded bits in the inputs are known zeros, return zero.
00255     if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
00256       return Constant::getNullValue(VTy);
00257 
00258     // If the RHS is a constant, see if we can simplify it.
00259     if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero))
00260       return I;
00261 
00262     // Output known-1 bits are only known if set in both the LHS & RHS.
00263     KnownOne = RHSKnownOne & LHSKnownOne;
00264     // Output known-0 are known to be clear if zero in either the LHS | RHS.
00265     KnownZero = RHSKnownZero | LHSKnownZero;
00266     break;
00267   case Instruction::Or:
00268     // If either the LHS or the RHS are One, the result is One.
00269     if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
00270                              RHSKnownZero, RHSKnownOne, Depth+1) ||
00271         SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne,
00272                              LHSKnownZero, LHSKnownOne, Depth+1))
00273       return I;
00274     assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
00275     assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
00276 
00277     // If all of the demanded bits are known zero on one side, return the other.
00278     // These bits cannot contribute to the result of the 'or'.
00279     if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
00280         (DemandedMask & ~LHSKnownOne))
00281       return I->getOperand(0);
00282     if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
00283         (DemandedMask & ~RHSKnownOne))
00284       return I->getOperand(1);
00285 
00286     // If all of the potentially set bits on one side are known to be set on
00287     // the other side, just use the 'other' side.
00288     if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
00289         (DemandedMask & (~RHSKnownZero)))
00290       return I->getOperand(0);
00291     if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
00292         (DemandedMask & (~LHSKnownZero)))
00293       return I->getOperand(1);
00294 
00295     // If the RHS is a constant, see if we can simplify it.
00296     if (ShrinkDemandedConstant(I, 1, DemandedMask))
00297       return I;
00298 
00299     // Output known-0 bits are only known if clear in both the LHS & RHS.
00300     KnownZero = RHSKnownZero & LHSKnownZero;
00301     // Output known-1 are known to be set if set in either the LHS | RHS.
00302     KnownOne = RHSKnownOne | LHSKnownOne;
00303     break;
00304   case Instruction::Xor: {
00305     if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
00306                              RHSKnownZero, RHSKnownOne, Depth+1) ||
00307         SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
00308                              LHSKnownZero, LHSKnownOne, Depth+1))
00309       return I;
00310     assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
00311     assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
00312 
00313     // If all of the demanded bits are known zero on one side, return the other.
00314     // These bits cannot contribute to the result of the 'xor'.
00315     if ((DemandedMask & RHSKnownZero) == DemandedMask)
00316       return I->getOperand(0);
00317     if ((DemandedMask & LHSKnownZero) == DemandedMask)
00318       return I->getOperand(1);
00319 
00320     // If all of the demanded bits are known to be zero on one side or the
00321     // other, turn this into an *inclusive* or.
00322     //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
00323     if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) {
00324       Instruction *Or =
00325         BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
00326                                  I->getName());
00327       return InsertNewInstWith(Or, *I);
00328     }
00329 
00330     // If all of the demanded bits on one side are known, and all of the set
00331     // bits on that side are also known to be set on the other side, turn this
00332     // into an AND, as we know the bits will be cleared.
00333     //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
00334     if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) {
00335       // all known
00336       if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) {
00337         Constant *AndC = Constant::getIntegerValue(VTy,
00338                                                    ~RHSKnownOne & DemandedMask);
00339         Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
00340         return InsertNewInstWith(And, *I);
00341       }
00342     }
00343 
00344     // If the RHS is a constant, see if we can simplify it.
00345     // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1.
00346     if (ShrinkDemandedConstant(I, 1, DemandedMask))
00347       return I;
00348 
00349     // If our LHS is an 'and' and if it has one use, and if any of the bits we
00350     // are flipping are known to be set, then the xor is just resetting those
00351     // bits to zero.  We can just knock out bits from the 'and' and the 'xor',
00352     // simplifying both of them.
00353     if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0)))
00354       if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() &&
00355           isa<ConstantInt>(I->getOperand(1)) &&
00356           isa<ConstantInt>(LHSInst->getOperand(1)) &&
00357           (LHSKnownOne & RHSKnownOne & DemandedMask) != 0) {
00358         ConstantInt *AndRHS = cast<ConstantInt>(LHSInst->getOperand(1));
00359         ConstantInt *XorRHS = cast<ConstantInt>(I->getOperand(1));
00360         APInt NewMask = ~(LHSKnownOne & RHSKnownOne & DemandedMask);
00361 
00362         Constant *AndC =
00363           ConstantInt::get(I->getType(), NewMask & AndRHS->getValue());
00364         Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
00365         InsertNewInstWith(NewAnd, *I);
00366 
00367         Constant *XorC =
00368           ConstantInt::get(I->getType(), NewMask & XorRHS->getValue());
00369         Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC);
00370         return InsertNewInstWith(NewXor, *I);
00371       }
00372 
00373     // Output known-0 bits are known if clear or set in both the LHS & RHS.
00374     KnownZero= (RHSKnownZero & LHSKnownZero) | (RHSKnownOne & LHSKnownOne);
00375     // Output known-1 are known to be set if set in only one of the LHS, RHS.
00376     KnownOne = (RHSKnownZero & LHSKnownOne) | (RHSKnownOne & LHSKnownZero);
00377     break;
00378   }
00379   case Instruction::Select:
00380     if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask,
00381                              RHSKnownZero, RHSKnownOne, Depth+1) ||
00382         SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
00383                              LHSKnownZero, LHSKnownOne, Depth+1))
00384       return I;
00385     assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
00386     assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
00387 
00388     // If the operands are constants, see if we can simplify them.
00389     if (ShrinkDemandedConstant(I, 1, DemandedMask) ||
00390         ShrinkDemandedConstant(I, 2, DemandedMask))
00391       return I;
00392 
00393     // Only known if known in both the LHS and RHS.
00394     KnownOne = RHSKnownOne & LHSKnownOne;
00395     KnownZero = RHSKnownZero & LHSKnownZero;
00396     break;
00397   case Instruction::Trunc: {
00398     unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits();
00399     DemandedMask = DemandedMask.zext(truncBf);
00400     KnownZero = KnownZero.zext(truncBf);
00401     KnownOne = KnownOne.zext(truncBf);
00402     if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
00403                              KnownZero, KnownOne, Depth+1))
00404       return I;
00405     DemandedMask = DemandedMask.trunc(BitWidth);
00406     KnownZero = KnownZero.trunc(BitWidth);
00407     KnownOne = KnownOne.trunc(BitWidth);
00408     assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
00409     break;
00410   }
00411   case Instruction::BitCast:
00412     if (!I->getOperand(0)->getType()->isIntOrIntVectorTy())
00413       return nullptr;  // vector->int or fp->int?
00414 
00415     if (VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) {
00416       if (VectorType *SrcVTy =
00417             dyn_cast<VectorType>(I->getOperand(0)->getType())) {
00418         if (DstVTy->getNumElements() != SrcVTy->getNumElements())
00419           // Don't touch a bitcast between vectors of different element counts.
00420           return nullptr;
00421       } else
00422         // Don't touch a scalar-to-vector bitcast.
00423         return nullptr;
00424     } else if (I->getOperand(0)->getType()->isVectorTy())
00425       // Don't touch a vector-to-scalar bitcast.
00426       return nullptr;
00427 
00428     if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
00429                              KnownZero, KnownOne, Depth+1))
00430       return I;
00431     assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
00432     break;
00433   case Instruction::ZExt: {
00434     // Compute the bits in the result that are not present in the input.
00435     unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
00436 
00437     DemandedMask = DemandedMask.trunc(SrcBitWidth);
00438     KnownZero = KnownZero.trunc(SrcBitWidth);
00439     KnownOne = KnownOne.trunc(SrcBitWidth);
00440     if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
00441                              KnownZero, KnownOne, Depth+1))
00442       return I;
00443     DemandedMask = DemandedMask.zext(BitWidth);
00444     KnownZero = KnownZero.zext(BitWidth);
00445     KnownOne = KnownOne.zext(BitWidth);
00446     assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
00447     // The top bits are known to be zero.
00448     KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
00449     break;
00450   }
00451   case Instruction::SExt: {
00452     // Compute the bits in the result that are not present in the input.
00453     unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
00454 
00455     APInt InputDemandedBits = DemandedMask &
00456                               APInt::getLowBitsSet(BitWidth, SrcBitWidth);
00457 
00458     APInt NewBits(APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth));
00459     // If any of the sign extended bits are demanded, we know that the sign
00460     // bit is demanded.
00461     if ((NewBits & DemandedMask) != 0)
00462       InputDemandedBits.setBit(SrcBitWidth-1);
00463 
00464     InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth);
00465     KnownZero = KnownZero.trunc(SrcBitWidth);
00466     KnownOne = KnownOne.trunc(SrcBitWidth);
00467     if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits,
00468                              KnownZero, KnownOne, Depth+1))
00469       return I;
00470     InputDemandedBits = InputDemandedBits.zext(BitWidth);
00471     KnownZero = KnownZero.zext(BitWidth);
00472     KnownOne = KnownOne.zext(BitWidth);
00473     assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
00474 
00475     // If the sign bit of the input is known set or clear, then we know the
00476     // top bits of the result.
00477 
00478     // If the input sign bit is known zero, or if the NewBits are not demanded
00479     // convert this into a zero extension.
00480     if (KnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) {
00481       // Convert to ZExt cast
00482       CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName());
00483       return InsertNewInstWith(NewCast, *I);
00484     } else if (KnownOne[SrcBitWidth-1]) {    // Input sign bit known set
00485       KnownOne |= NewBits;
00486     }
00487     break;
00488   }
00489   case Instruction::Add: {
00490     // Figure out what the input bits are.  If the top bits of the and result
00491     // are not demanded, then the add doesn't demand them from its input
00492     // either.
00493     unsigned NLZ = DemandedMask.countLeadingZeros();
00494 
00495     // If there is a constant on the RHS, there are a variety of xformations
00496     // we can do.
00497     if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
00498       // If null, this should be simplified elsewhere.  Some of the xforms here
00499       // won't work if the RHS is zero.
00500       if (RHS->isZero())
00501         break;
00502 
00503       // If the top bit of the output is demanded, demand everything from the
00504       // input.  Otherwise, we demand all the input bits except NLZ top bits.
00505       APInt InDemandedBits(APInt::getLowBitsSet(BitWidth, BitWidth - NLZ));
00506 
00507       // Find information about known zero/one bits in the input.
00508       if (SimplifyDemandedBits(I->getOperandUse(0), InDemandedBits,
00509                                LHSKnownZero, LHSKnownOne, Depth+1))
00510         return I;
00511 
00512       // If the RHS of the add has bits set that can't affect the input, reduce
00513       // the constant.
00514       if (ShrinkDemandedConstant(I, 1, InDemandedBits))
00515         return I;
00516 
00517       // Avoid excess work.
00518       if (LHSKnownZero == 0 && LHSKnownOne == 0)
00519         break;
00520 
00521       // Turn it into OR if input bits are zero.
00522       if ((LHSKnownZero & RHS->getValue()) == RHS->getValue()) {
00523         Instruction *Or =
00524           BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
00525                                    I->getName());
00526         return InsertNewInstWith(Or, *I);
00527       }
00528 
00529       // We can say something about the output known-zero and known-one bits,
00530       // depending on potential carries from the input constant and the
00531       // unknowns.  For example if the LHS is known to have at most the 0x0F0F0
00532       // bits set and the RHS constant is 0x01001, then we know we have a known
00533       // one mask of 0x00001 and a known zero mask of 0xE0F0E.
00534 
00535       // To compute this, we first compute the potential carry bits.  These are
00536       // the bits which may be modified.  I'm not aware of a better way to do
00537       // this scan.
00538       const APInt &RHSVal = RHS->getValue();
00539       APInt CarryBits((~LHSKnownZero + RHSVal) ^ (~LHSKnownZero ^ RHSVal));
00540 
00541       // Now that we know which bits have carries, compute the known-1/0 sets.
00542 
00543       // Bits are known one if they are known zero in one operand and one in the
00544       // other, and there is no input carry.
00545       KnownOne = ((LHSKnownZero & RHSVal) |
00546                   (LHSKnownOne & ~RHSVal)) & ~CarryBits;
00547 
00548       // Bits are known zero if they are known zero in both operands and there
00549       // is no input carry.
00550       KnownZero = LHSKnownZero & ~RHSVal & ~CarryBits;
00551     } else {
00552       // If the high-bits of this ADD are not demanded, then it does not demand
00553       // the high bits of its LHS or RHS.
00554       if (DemandedMask[BitWidth-1] == 0) {
00555         // Right fill the mask of bits for this ADD to demand the most
00556         // significant bit and all those below it.
00557         APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
00558         if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
00559                                  LHSKnownZero, LHSKnownOne, Depth+1) ||
00560             SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
00561                                  LHSKnownZero, LHSKnownOne, Depth+1))
00562           return I;
00563       }
00564     }
00565     break;
00566   }
00567   case Instruction::Sub:
00568     // If the high-bits of this SUB are not demanded, then it does not demand
00569     // the high bits of its LHS or RHS.
00570     if (DemandedMask[BitWidth-1] == 0) {
00571       // Right fill the mask of bits for this SUB to demand the most
00572       // significant bit and all those below it.
00573       uint32_t NLZ = DemandedMask.countLeadingZeros();
00574       APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
00575       if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
00576                                LHSKnownZero, LHSKnownOne, Depth+1) ||
00577           SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
00578                                LHSKnownZero, LHSKnownOne, Depth+1))
00579         return I;
00580     }
00581 
00582     // Otherwise just hand the sub off to computeKnownBits to fill in
00583     // the known zeros and ones.
00584     computeKnownBits(V, KnownZero, KnownOne, Depth);
00585 
00586     // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known
00587     // zero.
00588     if (ConstantInt *C0 = dyn_cast<ConstantInt>(I->getOperand(0))) {
00589       APInt I0 = C0->getValue();
00590       if ((I0 + 1).isPowerOf2() && (I0 | KnownZero).isAllOnesValue()) {
00591         Instruction *Xor = BinaryOperator::CreateXor(I->getOperand(1), C0);
00592         return InsertNewInstWith(Xor, *I);
00593       }
00594     }
00595     break;
00596   case Instruction::Shl:
00597     if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
00598       {
00599         Value *VarX; ConstantInt *C1;
00600         if (match(I->getOperand(0), m_Shr(m_Value(VarX), m_ConstantInt(C1)))) {
00601           Instruction *Shr = cast<Instruction>(I->getOperand(0));
00602           Value *R = SimplifyShrShlDemandedBits(Shr, I, DemandedMask,
00603                                                 KnownZero, KnownOne);
00604           if (R)
00605             return R;
00606         }
00607       }
00608 
00609       uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
00610       APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt));
00611 
00612       // If the shift is NUW/NSW, then it does demand the high bits.
00613       ShlOperator *IOp = cast<ShlOperator>(I);
00614       if (IOp->hasNoSignedWrap())
00615         DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1);
00616       else if (IOp->hasNoUnsignedWrap())
00617         DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
00618 
00619       if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
00620                                KnownZero, KnownOne, Depth+1))
00621         return I;
00622       assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
00623       KnownZero <<= ShiftAmt;
00624       KnownOne  <<= ShiftAmt;
00625       // low bits known zero.
00626       if (ShiftAmt)
00627         KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
00628     }
00629     break;
00630   case Instruction::LShr:
00631     // For a logical shift right
00632     if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
00633       uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
00634 
00635       // Unsigned shift right.
00636       APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
00637 
00638       // If the shift is exact, then it does demand the low bits (and knows that
00639       // they are zero).
00640       if (cast<LShrOperator>(I)->isExact())
00641         DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
00642 
00643       if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
00644                                KnownZero, KnownOne, Depth+1))
00645         return I;
00646       assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
00647       KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
00648       KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
00649       if (ShiftAmt) {
00650         // Compute the new bits that are at the top now.
00651         APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
00652         KnownZero |= HighBits;  // high bits known zero.
00653       }
00654     }
00655     break;
00656   case Instruction::AShr:
00657     // If this is an arithmetic shift right and only the low-bit is set, we can
00658     // always convert this into a logical shr, even if the shift amount is
00659     // variable.  The low bit of the shift cannot be an input sign bit unless
00660     // the shift amount is >= the size of the datatype, which is undefined.
00661     if (DemandedMask == 1) {
00662       // Perform the logical shift right.
00663       Instruction *NewVal = BinaryOperator::CreateLShr(
00664                         I->getOperand(0), I->getOperand(1), I->getName());
00665       return InsertNewInstWith(NewVal, *I);
00666     }
00667 
00668     // If the sign bit is the only bit demanded by this ashr, then there is no
00669     // need to do it, the shift doesn't change the high bit.
00670     if (DemandedMask.isSignBit())
00671       return I->getOperand(0);
00672 
00673     if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
00674       uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
00675 
00676       // Signed shift right.
00677       APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
00678       // If any of the "high bits" are demanded, we should set the sign bit as
00679       // demanded.
00680       if (DemandedMask.countLeadingZeros() <= ShiftAmt)
00681         DemandedMaskIn.setBit(BitWidth-1);
00682 
00683       // If the shift is exact, then it does demand the low bits (and knows that
00684       // they are zero).
00685       if (cast<AShrOperator>(I)->isExact())
00686         DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
00687 
00688       if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
00689                                KnownZero, KnownOne, Depth+1))
00690         return I;
00691       assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
00692       // Compute the new bits that are at the top now.
00693       APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
00694       KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
00695       KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
00696 
00697       // Handle the sign bits.
00698       APInt SignBit(APInt::getSignBit(BitWidth));
00699       // Adjust to where it is now in the mask.
00700       SignBit = APIntOps::lshr(SignBit, ShiftAmt);
00701 
00702       // If the input sign bit is known to be zero, or if none of the top bits
00703       // are demanded, turn this into an unsigned shift right.
00704       if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] ||
00705           (HighBits & ~DemandedMask) == HighBits) {
00706         // Perform the logical shift right.
00707         BinaryOperator *NewVal = BinaryOperator::CreateLShr(I->getOperand(0),
00708                                                             SA, I->getName());
00709         NewVal->setIsExact(cast<BinaryOperator>(I)->isExact());
00710         return InsertNewInstWith(NewVal, *I);
00711       } else if ((KnownOne & SignBit) != 0) { // New bits are known one.
00712         KnownOne |= HighBits;
00713       }
00714     }
00715     break;
00716   case Instruction::SRem:
00717     if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
00718       // X % -1 demands all the bits because we don't want to introduce
00719       // INT_MIN % -1 (== undef) by accident.
00720       if (Rem->isAllOnesValue())
00721         break;
00722       APInt RA = Rem->getValue().abs();
00723       if (RA.isPowerOf2()) {
00724         if (DemandedMask.ult(RA))    // srem won't affect demanded bits
00725           return I->getOperand(0);
00726 
00727         APInt LowBits = RA - 1;
00728         APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
00729         if (SimplifyDemandedBits(I->getOperandUse(0), Mask2,
00730                                  LHSKnownZero, LHSKnownOne, Depth+1))
00731           return I;
00732 
00733         // The low bits of LHS are unchanged by the srem.
00734         KnownZero = LHSKnownZero & LowBits;
00735         KnownOne = LHSKnownOne & LowBits;
00736 
00737         // If LHS is non-negative or has all low bits zero, then the upper bits
00738         // are all zero.
00739         if (LHSKnownZero[BitWidth-1] || ((LHSKnownZero & LowBits) == LowBits))
00740           KnownZero |= ~LowBits;
00741 
00742         // If LHS is negative and not all low bits are zero, then the upper bits
00743         // are all one.
00744         if (LHSKnownOne[BitWidth-1] && ((LHSKnownOne & LowBits) != 0))
00745           KnownOne |= ~LowBits;
00746 
00747         assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
00748       }
00749     }
00750 
00751     // The sign bit is the LHS's sign bit, except when the result of the
00752     // remainder is zero.
00753     if (DemandedMask.isNegative() && KnownZero.isNonNegative()) {
00754       APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
00755       computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
00756       // If it's known zero, our sign bit is also zero.
00757       if (LHSKnownZero.isNegative())
00758         KnownZero.setBit(KnownZero.getBitWidth() - 1);
00759     }
00760     break;
00761   case Instruction::URem: {
00762     APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
00763     APInt AllOnes = APInt::getAllOnesValue(BitWidth);
00764     if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes,
00765                              KnownZero2, KnownOne2, Depth+1) ||
00766         SimplifyDemandedBits(I->getOperandUse(1), AllOnes,
00767                              KnownZero2, KnownOne2, Depth+1))
00768       return I;
00769 
00770     unsigned Leaders = KnownZero2.countLeadingOnes();
00771     Leaders = std::max(Leaders,
00772                        KnownZero2.countLeadingOnes());
00773     KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask;
00774     break;
00775   }
00776   case Instruction::Call:
00777     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
00778       switch (II->getIntrinsicID()) {
00779       default: break;
00780       case Intrinsic::bswap: {
00781         // If the only bits demanded come from one byte of the bswap result,
00782         // just shift the input byte into position to eliminate the bswap.
00783         unsigned NLZ = DemandedMask.countLeadingZeros();
00784         unsigned NTZ = DemandedMask.countTrailingZeros();
00785 
00786         // Round NTZ down to the next byte.  If we have 11 trailing zeros, then
00787         // we need all the bits down to bit 8.  Likewise, round NLZ.  If we
00788         // have 14 leading zeros, round to 8.
00789         NLZ &= ~7;
00790         NTZ &= ~7;
00791         // If we need exactly one byte, we can do this transformation.
00792         if (BitWidth-NLZ-NTZ == 8) {
00793           unsigned ResultBit = NTZ;
00794           unsigned InputBit = BitWidth-NTZ-8;
00795 
00796           // Replace this with either a left or right shift to get the byte into
00797           // the right place.
00798           Instruction *NewVal;
00799           if (InputBit > ResultBit)
00800             NewVal = BinaryOperator::CreateLShr(II->getArgOperand(0),
00801                     ConstantInt::get(I->getType(), InputBit-ResultBit));
00802           else
00803             NewVal = BinaryOperator::CreateShl(II->getArgOperand(0),
00804                     ConstantInt::get(I->getType(), ResultBit-InputBit));
00805           NewVal->takeName(I);
00806           return InsertNewInstWith(NewVal, *I);
00807         }
00808 
00809         // TODO: Could compute known zero/one bits based on the input.
00810         break;
00811       }
00812       case Intrinsic::x86_sse42_crc32_64_64:
00813         KnownZero = APInt::getHighBitsSet(64, 32);
00814         return nullptr;
00815       }
00816     }
00817     computeKnownBits(V, KnownZero, KnownOne, Depth);
00818     break;
00819   }
00820 
00821   // If the client is only demanding bits that we know, return the known
00822   // constant.
00823   if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask)
00824     return Constant::getIntegerValue(VTy, KnownOne);
00825   return nullptr;
00826 }
00827 
00828 /// Helper routine of SimplifyDemandedUseBits. It tries to simplify
00829 /// "E1 = (X lsr C1) << C2", where the C1 and C2 are constant, into
00830 /// "E2 = X << (C2 - C1)" or "E2 = X >> (C1 - C2)", depending on the sign
00831 /// of "C2-C1".
00832 ///
00833 /// Suppose E1 and E2 are generally different in bits S={bm, bm+1,
00834 /// ..., bn}, without considering the specific value X is holding.
00835 /// This transformation is legal iff one of following conditions is hold:
00836 ///  1) All the bit in S are 0, in this case E1 == E2.
00837 ///  2) We don't care those bits in S, per the input DemandedMask.
00838 ///  3) Combination of 1) and 2). Some bits in S are 0, and we don't care the
00839 ///     rest bits.
00840 ///
00841 /// Currently we only test condition 2).
00842 ///
00843 /// As with SimplifyDemandedUseBits, it returns NULL if the simplification was
00844 /// not successful.
00845 Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr,
00846   Instruction *Shl, APInt DemandedMask, APInt &KnownZero, APInt &KnownOne) {
00847 
00848   const APInt &ShlOp1 = cast<ConstantInt>(Shl->getOperand(1))->getValue();
00849   const APInt &ShrOp1 = cast<ConstantInt>(Shr->getOperand(1))->getValue();
00850   if (!ShlOp1 || !ShrOp1)
00851       return nullptr; // Noop.
00852 
00853   Value *VarX = Shr->getOperand(0);
00854   Type *Ty = VarX->getType();
00855   unsigned BitWidth = Ty->getIntegerBitWidth();
00856   if (ShlOp1.uge(BitWidth) || ShrOp1.uge(BitWidth))
00857     return nullptr; // Undef.
00858 
00859   unsigned ShlAmt = ShlOp1.getZExtValue();
00860   unsigned ShrAmt = ShrOp1.getZExtValue();
00861 
00862   KnownOne.clearAllBits();
00863   KnownZero = APInt::getBitsSet(KnownZero.getBitWidth(), 0, ShlAmt-1);
00864   KnownZero &= DemandedMask;
00865 
00866   APInt BitMask1(APInt::getAllOnesValue(BitWidth));
00867   APInt BitMask2(APInt::getAllOnesValue(BitWidth));
00868 
00869   bool isLshr = (Shr->getOpcode() == Instruction::LShr);
00870   BitMask1 = isLshr ? (BitMask1.lshr(ShrAmt) << ShlAmt) :
00871                       (BitMask1.ashr(ShrAmt) << ShlAmt);
00872 
00873   if (ShrAmt <= ShlAmt) {
00874     BitMask2 <<= (ShlAmt - ShrAmt);
00875   } else {
00876     BitMask2 = isLshr ? BitMask2.lshr(ShrAmt - ShlAmt):
00877                         BitMask2.ashr(ShrAmt - ShlAmt);
00878   }
00879 
00880   // Check if condition-2 (see the comment to this function) is satified.
00881   if ((BitMask1 & DemandedMask) == (BitMask2 & DemandedMask)) {
00882     if (ShrAmt == ShlAmt)
00883       return VarX;
00884 
00885     if (!Shr->hasOneUse())
00886       return nullptr;
00887 
00888     BinaryOperator *New;
00889     if (ShrAmt < ShlAmt) {
00890       Constant *Amt = ConstantInt::get(VarX->getType(), ShlAmt - ShrAmt);
00891       New = BinaryOperator::CreateShl(VarX, Amt);
00892       BinaryOperator *Orig = cast<BinaryOperator>(Shl);
00893       New->setHasNoSignedWrap(Orig->hasNoSignedWrap());
00894       New->setHasNoUnsignedWrap(Orig->hasNoUnsignedWrap());
00895     } else {
00896       Constant *Amt = ConstantInt::get(VarX->getType(), ShrAmt - ShlAmt);
00897       New = isLshr ? BinaryOperator::CreateLShr(VarX, Amt) :
00898                      BinaryOperator::CreateAShr(VarX, Amt);
00899       if (cast<BinaryOperator>(Shr)->isExact())
00900         New->setIsExact(true);
00901     }
00902 
00903     return InsertNewInstWith(New, *Shl);
00904   }
00905 
00906   return nullptr;
00907 }
00908 
00909 /// SimplifyDemandedVectorElts - The specified value produces a vector with
00910 /// any number of elements. DemandedElts contains the set of elements that are
00911 /// actually used by the caller.  This method analyzes which elements of the
00912 /// operand are undef and returns that information in UndefElts.
00913 ///
00914 /// If the information about demanded elements can be used to simplify the
00915 /// operation, the operation is simplified, then the resultant value is
00916 /// returned.  This returns null if no change was made.
00917 Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
00918                                                 APInt &UndefElts,
00919                                                 unsigned Depth) {
00920   unsigned VWidth = cast<VectorType>(V->getType())->getNumElements();
00921   APInt EltMask(APInt::getAllOnesValue(VWidth));
00922   assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!");
00923 
00924   if (isa<UndefValue>(V)) {
00925     // If the entire vector is undefined, just return this info.
00926     UndefElts = EltMask;
00927     return nullptr;
00928   }
00929 
00930   if (DemandedElts == 0) { // If nothing is demanded, provide undef.
00931     UndefElts = EltMask;
00932     return UndefValue::get(V->getType());
00933   }
00934 
00935   UndefElts = 0;
00936 
00937   // Handle ConstantAggregateZero, ConstantVector, ConstantDataSequential.
00938   if (Constant *C = dyn_cast<Constant>(V)) {
00939     // Check if this is identity. If so, return 0 since we are not simplifying
00940     // anything.
00941     if (DemandedElts.isAllOnesValue())
00942       return nullptr;
00943 
00944     Type *EltTy = cast<VectorType>(V->getType())->getElementType();
00945     Constant *Undef = UndefValue::get(EltTy);
00946 
00947     SmallVector<Constant*, 16> Elts;
00948     for (unsigned i = 0; i != VWidth; ++i) {
00949       if (!DemandedElts[i]) {   // If not demanded, set to undef.
00950         Elts.push_back(Undef);
00951         UndefElts.setBit(i);
00952         continue;
00953       }
00954 
00955       Constant *Elt = C->getAggregateElement(i);
00956       if (!Elt) return nullptr;
00957 
00958       if (isa<UndefValue>(Elt)) {   // Already undef.
00959         Elts.push_back(Undef);
00960         UndefElts.setBit(i);
00961       } else {                               // Otherwise, defined.
00962         Elts.push_back(Elt);
00963       }
00964     }
00965 
00966     // If we changed the constant, return it.
00967     Constant *NewCV = ConstantVector::get(Elts);
00968     return NewCV != C ? NewCV : nullptr;
00969   }
00970 
00971   // Limit search depth.
00972   if (Depth == 10)
00973     return nullptr;
00974 
00975   // If multiple users are using the root value, proceed with
00976   // simplification conservatively assuming that all elements
00977   // are needed.
00978   if (!V->hasOneUse()) {
00979     // Quit if we find multiple users of a non-root value though.
00980     // They'll be handled when it's their turn to be visited by
00981     // the main instcombine process.
00982     if (Depth != 0)
00983       // TODO: Just compute the UndefElts information recursively.
00984       return nullptr;
00985 
00986     // Conservatively assume that all elements are needed.
00987     DemandedElts = EltMask;
00988   }
00989 
00990   Instruction *I = dyn_cast<Instruction>(V);
00991   if (!I) return nullptr;        // Only analyze instructions.
00992 
00993   bool MadeChange = false;
00994   APInt UndefElts2(VWidth, 0);
00995   Value *TmpV;
00996   switch (I->getOpcode()) {
00997   default: break;
00998 
00999   case Instruction::InsertElement: {
01000     // If this is a variable index, we don't know which element it overwrites.
01001     // demand exactly the same input as we produce.
01002     ConstantInt *Idx = dyn_cast<ConstantInt>(I->getOperand(2));
01003     if (!Idx) {
01004       // Note that we can't propagate undef elt info, because we don't know
01005       // which elt is getting updated.
01006       TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
01007                                         UndefElts2, Depth+1);
01008       if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
01009       break;
01010     }
01011 
01012     // If this is inserting an element that isn't demanded, remove this
01013     // insertelement.
01014     unsigned IdxNo = Idx->getZExtValue();
01015     if (IdxNo >= VWidth || !DemandedElts[IdxNo]) {
01016       Worklist.Add(I);
01017       return I->getOperand(0);
01018     }
01019 
01020     // Otherwise, the element inserted overwrites whatever was there, so the
01021     // input demanded set is simpler than the output set.
01022     APInt DemandedElts2 = DemandedElts;
01023     DemandedElts2.clearBit(IdxNo);
01024     TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2,
01025                                       UndefElts, Depth+1);
01026     if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
01027 
01028     // The inserted element is defined.
01029     UndefElts.clearBit(IdxNo);
01030     break;
01031   }
01032   case Instruction::ShuffleVector: {
01033     ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
01034     uint64_t LHSVWidth =
01035       cast<VectorType>(Shuffle->getOperand(0)->getType())->getNumElements();
01036     APInt LeftDemanded(LHSVWidth, 0), RightDemanded(LHSVWidth, 0);
01037     for (unsigned i = 0; i < VWidth; i++) {
01038       if (DemandedElts[i]) {
01039         unsigned MaskVal = Shuffle->getMaskValue(i);
01040         if (MaskVal != -1u) {
01041           assert(MaskVal < LHSVWidth * 2 &&
01042                  "shufflevector mask index out of range!");
01043           if (MaskVal < LHSVWidth)
01044             LeftDemanded.setBit(MaskVal);
01045           else
01046             RightDemanded.setBit(MaskVal - LHSVWidth);
01047         }
01048       }
01049     }
01050 
01051     APInt UndefElts4(LHSVWidth, 0);
01052     TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded,
01053                                       UndefElts4, Depth+1);
01054     if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
01055 
01056     APInt UndefElts3(LHSVWidth, 0);
01057     TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded,
01058                                       UndefElts3, Depth+1);
01059     if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
01060 
01061     bool NewUndefElts = false;
01062     for (unsigned i = 0; i < VWidth; i++) {
01063       unsigned MaskVal = Shuffle->getMaskValue(i);
01064       if (MaskVal == -1u) {
01065         UndefElts.setBit(i);
01066       } else if (!DemandedElts[i]) {
01067         NewUndefElts = true;
01068         UndefElts.setBit(i);
01069       } else if (MaskVal < LHSVWidth) {
01070         if (UndefElts4[MaskVal]) {
01071           NewUndefElts = true;
01072           UndefElts.setBit(i);
01073         }
01074       } else {
01075         if (UndefElts3[MaskVal - LHSVWidth]) {
01076           NewUndefElts = true;
01077           UndefElts.setBit(i);
01078         }
01079       }
01080     }
01081 
01082     if (NewUndefElts) {
01083       // Add additional discovered undefs.
01084       SmallVector<Constant*, 16> Elts;
01085       for (unsigned i = 0; i < VWidth; ++i) {
01086         if (UndefElts[i])
01087           Elts.push_back(UndefValue::get(Type::getInt32Ty(I->getContext())));
01088         else
01089           Elts.push_back(ConstantInt::get(Type::getInt32Ty(I->getContext()),
01090                                           Shuffle->getMaskValue(i)));
01091       }
01092       I->setOperand(2, ConstantVector::get(Elts));
01093       MadeChange = true;
01094     }
01095     break;
01096   }
01097   case Instruction::Select: {
01098     APInt LeftDemanded(DemandedElts), RightDemanded(DemandedElts);
01099     if (ConstantVector* CV = dyn_cast<ConstantVector>(I->getOperand(0))) {
01100       for (unsigned i = 0; i < VWidth; i++) {
01101         if (CV->getAggregateElement(i)->isNullValue())
01102           LeftDemanded.clearBit(i);
01103         else
01104           RightDemanded.clearBit(i);
01105       }
01106     }
01107 
01108     TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded,
01109                                       UndefElts, Depth+1);
01110     if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
01111 
01112     TmpV = SimplifyDemandedVectorElts(I->getOperand(2), RightDemanded,
01113                                       UndefElts2, Depth+1);
01114     if (TmpV) { I->setOperand(2, TmpV); MadeChange = true; }
01115 
01116     // Output elements are undefined if both are undefined.
01117     UndefElts &= UndefElts2;
01118     break;
01119   }
01120   case Instruction::BitCast: {
01121     // Vector->vector casts only.
01122     VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());
01123     if (!VTy) break;
01124     unsigned InVWidth = VTy->getNumElements();
01125     APInt InputDemandedElts(InVWidth, 0);
01126     unsigned Ratio;
01127 
01128     if (VWidth == InVWidth) {
01129       // If we are converting from <4 x i32> -> <4 x f32>, we demand the same
01130       // elements as are demanded of us.
01131       Ratio = 1;
01132       InputDemandedElts = DemandedElts;
01133     } else if (VWidth > InVWidth) {
01134       // Untested so far.
01135       break;
01136 
01137       // If there are more elements in the result than there are in the source,
01138       // then an input element is live if any of the corresponding output
01139       // elements are live.
01140       Ratio = VWidth/InVWidth;
01141       for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
01142         if (DemandedElts[OutIdx])
01143           InputDemandedElts.setBit(OutIdx/Ratio);
01144       }
01145     } else {
01146       // Untested so far.
01147       break;
01148 
01149       // If there are more elements in the source than there are in the result,
01150       // then an input element is live if the corresponding output element is
01151       // live.
01152       Ratio = InVWidth/VWidth;
01153       for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
01154         if (DemandedElts[InIdx/Ratio])
01155           InputDemandedElts.setBit(InIdx);
01156     }
01157 
01158     // div/rem demand all inputs, because they don't want divide by zero.
01159     TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts,
01160                                       UndefElts2, Depth+1);
01161     if (TmpV) {
01162       I->setOperand(0, TmpV);
01163       MadeChange = true;
01164     }
01165 
01166     UndefElts = UndefElts2;
01167     if (VWidth > InVWidth) {
01168       llvm_unreachable("Unimp");
01169       // If there are more elements in the result than there are in the source,
01170       // then an output element is undef if the corresponding input element is
01171       // undef.
01172       for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
01173         if (UndefElts2[OutIdx/Ratio])
01174           UndefElts.setBit(OutIdx);
01175     } else if (VWidth < InVWidth) {
01176       llvm_unreachable("Unimp");
01177       // If there are more elements in the source than there are in the result,
01178       // then a result element is undef if all of the corresponding input
01179       // elements are undef.
01180       UndefElts = ~0ULL >> (64-VWidth);  // Start out all undef.
01181       for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
01182         if (!UndefElts2[InIdx])            // Not undef?
01183           UndefElts.clearBit(InIdx/Ratio);    // Clear undef bit.
01184     }
01185     break;
01186   }
01187   case Instruction::And:
01188   case Instruction::Or:
01189   case Instruction::Xor:
01190   case Instruction::Add:
01191   case Instruction::Sub:
01192   case Instruction::Mul:
01193     // div/rem demand all inputs, because they don't want divide by zero.
01194     TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
01195                                       UndefElts, Depth+1);
01196     if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
01197     TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts,
01198                                       UndefElts2, Depth+1);
01199     if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
01200 
01201     // Output elements are undefined if both are undefined.  Consider things
01202     // like undef&0.  The result is known zero, not undef.
01203     UndefElts &= UndefElts2;
01204     break;
01205   case Instruction::FPTrunc:
01206   case Instruction::FPExt:
01207     TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
01208                                       UndefElts, Depth+1);
01209     if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
01210     break;
01211 
01212   case Instruction::Call: {
01213     IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
01214     if (!II) break;
01215     switch (II->getIntrinsicID()) {
01216     default: break;
01217 
01218     // Binary vector operations that work column-wise.  A dest element is a
01219     // function of the corresponding input elements from the two inputs.
01220     case Intrinsic::x86_sse_sub_ss:
01221     case Intrinsic::x86_sse_mul_ss:
01222     case Intrinsic::x86_sse_min_ss:
01223     case Intrinsic::x86_sse_max_ss:
01224     case Intrinsic::x86_sse2_sub_sd:
01225     case Intrinsic::x86_sse2_mul_sd:
01226     case Intrinsic::x86_sse2_min_sd:
01227     case Intrinsic::x86_sse2_max_sd:
01228       TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
01229                                         UndefElts, Depth+1);
01230       if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
01231       TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
01232                                         UndefElts2, Depth+1);
01233       if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
01234 
01235       // If only the low elt is demanded and this is a scalarizable intrinsic,
01236       // scalarize it now.
01237       if (DemandedElts == 1) {
01238         switch (II->getIntrinsicID()) {
01239         default: break;
01240         case Intrinsic::x86_sse_sub_ss:
01241         case Intrinsic::x86_sse_mul_ss:
01242         case Intrinsic::x86_sse2_sub_sd:
01243         case Intrinsic::x86_sse2_mul_sd:
01244           // TODO: Lower MIN/MAX/ABS/etc
01245           Value *LHS = II->getArgOperand(0);
01246           Value *RHS = II->getArgOperand(1);
01247           // Extract the element as scalars.
01248           LHS = InsertNewInstWith(ExtractElementInst::Create(LHS,
01249             ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
01250           RHS = InsertNewInstWith(ExtractElementInst::Create(RHS,
01251             ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
01252 
01253           switch (II->getIntrinsicID()) {
01254           default: llvm_unreachable("Case stmts out of sync!");
01255           case Intrinsic::x86_sse_sub_ss:
01256           case Intrinsic::x86_sse2_sub_sd:
01257             TmpV = InsertNewInstWith(BinaryOperator::CreateFSub(LHS, RHS,
01258                                                         II->getName()), *II);
01259             break;
01260           case Intrinsic::x86_sse_mul_ss:
01261           case Intrinsic::x86_sse2_mul_sd:
01262             TmpV = InsertNewInstWith(BinaryOperator::CreateFMul(LHS, RHS,
01263                                                          II->getName()), *II);
01264             break;
01265           }
01266 
01267           Instruction *New =
01268             InsertElementInst::Create(
01269               UndefValue::get(II->getType()), TmpV,
01270               ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U, false),
01271                                       II->getName());
01272           InsertNewInstWith(New, *II);
01273           return New;
01274         }
01275       }
01276 
01277       // Output elements are undefined if both are undefined.  Consider things
01278       // like undef&0.  The result is known zero, not undef.
01279       UndefElts &= UndefElts2;
01280       break;
01281     }
01282     break;
01283   }
01284   }
01285   return MadeChange ? I : nullptr;
01286 }