LLVM API Documentation

InstCombineSimplifyDemanded.cpp
Go to the documentation of this file.
00001 //===- InstCombineSimplifyDemanded.cpp ------------------------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file contains logic for simplifying instructions based on information
00011 // about how they are used.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #include "InstCombine.h"
00016 #include "llvm/IR/DataLayout.h"
00017 #include "llvm/IR/IntrinsicInst.h"
00018 #include "llvm/IR/PatternMatch.h"
00019 
00020 using namespace llvm;
00021 using namespace llvm::PatternMatch;
00022 
00023 #define DEBUG_TYPE "instcombine"
00024 
00025 /// ShrinkDemandedConstant - Check to see if the specified operand of the
00026 /// specified instruction is a constant integer.  If so, check to see if there
00027 /// are any bits set in the constant that are not demanded.  If so, shrink the
00028 /// constant and return true.
00029 static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
00030                                    APInt Demanded) {
00031   assert(I && "No instruction?");
00032   assert(OpNo < I->getNumOperands() && "Operand index too large");
00033 
00034   // If the operand is not a constant integer, nothing to do.
00035   ConstantInt *OpC = dyn_cast<ConstantInt>(I->getOperand(OpNo));
00036   if (!OpC) return false;
00037 
00038   // If there are no bits set that aren't demanded, nothing to do.
00039   Demanded = Demanded.zextOrTrunc(OpC->getValue().getBitWidth());
00040   if ((~Demanded & OpC->getValue()) == 0)
00041     return false;
00042 
00043   // This instruction is producing bits that are not demanded. Shrink the RHS.
00044   Demanded &= OpC->getValue();
00045   I->setOperand(OpNo, ConstantInt::get(OpC->getType(), Demanded));
00046 
00047   // If either 'nsw' or 'nuw' is set and the constant is negative,
00048   // removing *any* bits from the constant could make overflow occur.
00049   // Remove 'nsw' and 'nuw' from the instruction in this case.
00050   if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(I)) {
00051     assert(OBO->getOpcode() == Instruction::Add);
00052     if (OBO->hasNoSignedWrap() || OBO->hasNoUnsignedWrap()) {
00053       if (OpC->getValue().isNegative()) {
00054         cast<BinaryOperator>(OBO)->setHasNoSignedWrap(false);
00055         cast<BinaryOperator>(OBO)->setHasNoUnsignedWrap(false);
00056       }
00057     }
00058   }
00059 
00060   return true;
00061 }
00062 
00063 
00064 
00065 /// SimplifyDemandedInstructionBits - Inst is an integer instruction that
00066 /// SimplifyDemandedBits knows about.  See if the instruction has any
00067 /// properties that allow us to simplify its operands.
00068 bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
00069   unsigned BitWidth = Inst.getType()->getScalarSizeInBits();
00070   APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
00071   APInt DemandedMask(APInt::getAllOnesValue(BitWidth));
00072 
00073   Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask,
00074                                      KnownZero, KnownOne, 0);
00075   if (!V) return false;
00076   if (V == &Inst) return true;
00077   ReplaceInstUsesWith(Inst, V);
00078   return true;
00079 }
00080 
00081 /// SimplifyDemandedBits - This form of SimplifyDemandedBits simplifies the
00082 /// specified instruction operand if possible, updating it in place.  It returns
00083 /// true if it made any change and false otherwise.
00084 bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask,
00085                                         APInt &KnownZero, APInt &KnownOne,
00086                                         unsigned Depth) {
00087   Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask,
00088                                           KnownZero, KnownOne, Depth);
00089   if (!NewVal) return false;
00090   U = NewVal;
00091   return true;
00092 }
00093 
00094 
00095 /// SimplifyDemandedUseBits - This function attempts to replace V with a simpler
00096 /// value based on the demanded bits.  When this function is called, it is known
00097 /// that only the bits set in DemandedMask of the result of V are ever used
00098 /// downstream. Consequently, depending on the mask and V, it may be possible
00099 /// to replace V with a constant or one of its operands. In such cases, this
00100 /// function does the replacement and returns true. In all other cases, it
00101 /// returns false after analyzing the expression and setting KnownOne and known
00102 /// to be one in the expression.  KnownZero contains all the bits that are known
00103 /// to be zero in the expression. These are provided to potentially allow the
00104 /// caller (which might recursively be SimplifyDemandedBits itself) to simplify
00105 /// the expression. KnownOne and KnownZero always follow the invariant that
00106 /// KnownOne & KnownZero == 0. That is, a bit can't be both 1 and 0. Note that
00107 /// the bits in KnownOne and KnownZero may only be accurate for those bits set
00108 /// in DemandedMask. Note also that the bitwidth of V, DemandedMask, KnownZero
00109 /// and KnownOne must all be the same.
00110 ///
00111 /// This returns null if it did not change anything and it permits no
00112 /// simplification.  This returns V itself if it did some simplification of V's
00113 /// operands based on the information about what bits are demanded. This returns
00114 /// some other non-null value if it found out that V is equal to another value
00115 /// in the context where the specified bits are demanded, but not for all users.
00116 Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
00117                                              APInt &KnownZero, APInt &KnownOne,
00118                                              unsigned Depth) {
00119   assert(V != nullptr && "Null pointer of Value???");
00120   assert(Depth <= 6 && "Limit Search Depth");
00121   uint32_t BitWidth = DemandedMask.getBitWidth();
00122   Type *VTy = V->getType();
00123   assert((DL || !VTy->isPointerTy()) &&
00124          "SimplifyDemandedBits needs to know bit widths!");
00125   assert((!DL || DL->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) &&
00126          (!VTy->isIntOrIntVectorTy() ||
00127           VTy->getScalarSizeInBits() == BitWidth) &&
00128          KnownZero.getBitWidth() == BitWidth &&
00129          KnownOne.getBitWidth() == BitWidth &&
00130          "Value *V, DemandedMask, KnownZero and KnownOne "
00131          "must have same BitWidth");
00132   if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
00133     // We know all of the bits for a constant!
00134     KnownOne = CI->getValue() & DemandedMask;
00135     KnownZero = ~KnownOne & DemandedMask;
00136     return nullptr;
00137   }
00138   if (isa<ConstantPointerNull>(V)) {
00139     // We know all of the bits for a constant!
00140     KnownOne.clearAllBits();
00141     KnownZero = DemandedMask;
00142     return nullptr;
00143   }
00144 
00145   KnownZero.clearAllBits();
00146   KnownOne.clearAllBits();
00147   if (DemandedMask == 0) {   // Not demanding any bits from V.
00148     if (isa<UndefValue>(V))
00149       return nullptr;
00150     return UndefValue::get(VTy);
00151   }
00152 
00153   if (Depth == 6)        // Limit search depth.
00154     return nullptr;
00155 
00156   APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
00157   APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
00158 
00159   Instruction *I = dyn_cast<Instruction>(V);
00160   if (!I) {
00161     computeKnownBits(V, KnownZero, KnownOne, Depth);
00162     return nullptr;        // Only analyze instructions.
00163   }
00164 
00165   // If there are multiple uses of this value and we aren't at the root, then
00166   // we can't do any simplifications of the operands, because DemandedMask
00167   // only reflects the bits demanded by *one* of the users.
00168   if (Depth != 0 && !I->hasOneUse()) {
00169     // Despite the fact that we can't simplify this instruction in all User's
00170     // context, we can at least compute the knownzero/knownone bits, and we can
00171     // do simplifications that apply to *just* the one user if we know that
00172     // this instruction has a simpler value in that context.
00173     if (I->getOpcode() == Instruction::And) {
00174       // If either the LHS or the RHS are Zero, the result is zero.
00175       computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
00176       computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
00177 
00178       // If all of the demanded bits are known 1 on one side, return the other.
00179       // These bits cannot contribute to the result of the 'and' in this
00180       // context.
00181       if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
00182           (DemandedMask & ~LHSKnownZero))
00183         return I->getOperand(0);
00184       if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
00185           (DemandedMask & ~RHSKnownZero))
00186         return I->getOperand(1);
00187 
00188       // If all of the demanded bits in the inputs are known zeros, return zero.
00189       if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
00190         return Constant::getNullValue(VTy);
00191 
00192     } else if (I->getOpcode() == Instruction::Or) {
00193       // We can simplify (X|Y) -> X or Y in the user's context if we know that
00194       // only bits from X or Y are demanded.
00195 
00196       // If either the LHS or the RHS are One, the result is One.
00197       computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
00198       computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
00199 
00200       // If all of the demanded bits are known zero on one side, return the
00201       // other.  These bits cannot contribute to the result of the 'or' in this
00202       // context.
00203       if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
00204           (DemandedMask & ~LHSKnownOne))
00205         return I->getOperand(0);
00206       if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
00207           (DemandedMask & ~RHSKnownOne))
00208         return I->getOperand(1);
00209 
00210       // If all of the potentially set bits on one side are known to be set on
00211       // the other side, just use the 'other' side.
00212       if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
00213           (DemandedMask & (~RHSKnownZero)))
00214         return I->getOperand(0);
00215       if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
00216           (DemandedMask & (~LHSKnownZero)))
00217         return I->getOperand(1);
00218     } else if (I->getOpcode() == Instruction::Xor) {
00219       // We can simplify (X^Y) -> X or Y in the user's context if we know that
00220       // only bits from X or Y are demanded.
00221 
00222       computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
00223       computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
00224 
00225       // If all of the demanded bits are known zero on one side, return the
00226       // other.
00227       if ((DemandedMask & RHSKnownZero) == DemandedMask)
00228         return I->getOperand(0);
00229       if ((DemandedMask & LHSKnownZero) == DemandedMask)
00230         return I->getOperand(1);
00231     }
00232 
00233     // Compute the KnownZero/KnownOne bits to simplify things downstream.
00234     computeKnownBits(I, KnownZero, KnownOne, Depth);
00235     return nullptr;
00236   }
00237 
00238   // If this is the root being simplified, allow it to have multiple uses,
00239   // just set the DemandedMask to all bits so that we can try to simplify the
00240   // operands.  This allows visitTruncInst (for example) to simplify the
00241   // operand of a trunc without duplicating all the logic below.
00242   if (Depth == 0 && !V->hasOneUse())
00243     DemandedMask = APInt::getAllOnesValue(BitWidth);
00244 
00245   switch (I->getOpcode()) {
00246   default:
00247     computeKnownBits(I, KnownZero, KnownOne, Depth);
00248     break;
00249   case Instruction::And:
00250     // If either the LHS or the RHS are Zero, the result is zero.
00251     if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
00252                              RHSKnownZero, RHSKnownOne, Depth+1) ||
00253         SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownZero,
00254                              LHSKnownZero, LHSKnownOne, Depth+1))
00255       return I;
00256     assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
00257     assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
00258 
00259     // If all of the demanded bits are known 1 on one side, return the other.
00260     // These bits cannot contribute to the result of the 'and'.
00261     if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
00262         (DemandedMask & ~LHSKnownZero))
00263       return I->getOperand(0);
00264     if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
00265         (DemandedMask & ~RHSKnownZero))
00266       return I->getOperand(1);
00267 
00268     // If all of the demanded bits in the inputs are known zeros, return zero.
00269     if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
00270       return Constant::getNullValue(VTy);
00271 
00272     // If the RHS is a constant, see if we can simplify it.
00273     if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero))
00274       return I;
00275 
00276     // Output known-1 bits are only known if set in both the LHS & RHS.
00277     KnownOne = RHSKnownOne & LHSKnownOne;
00278     // Output known-0 are known to be clear if zero in either the LHS | RHS.
00279     KnownZero = RHSKnownZero | LHSKnownZero;
00280     break;
00281   case Instruction::Or:
00282     // If either the LHS or the RHS are One, the result is One.
00283     if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
00284                              RHSKnownZero, RHSKnownOne, Depth+1) ||
00285         SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne,
00286                              LHSKnownZero, LHSKnownOne, Depth+1))
00287       return I;
00288     assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
00289     assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
00290 
00291     // If all of the demanded bits are known zero on one side, return the other.
00292     // These bits cannot contribute to the result of the 'or'.
00293     if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
00294         (DemandedMask & ~LHSKnownOne))
00295       return I->getOperand(0);
00296     if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
00297         (DemandedMask & ~RHSKnownOne))
00298       return I->getOperand(1);
00299 
00300     // If all of the potentially set bits on one side are known to be set on
00301     // the other side, just use the 'other' side.
00302     if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
00303         (DemandedMask & (~RHSKnownZero)))
00304       return I->getOperand(0);
00305     if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
00306         (DemandedMask & (~LHSKnownZero)))
00307       return I->getOperand(1);
00308 
00309     // If the RHS is a constant, see if we can simplify it.
00310     if (ShrinkDemandedConstant(I, 1, DemandedMask))
00311       return I;
00312 
00313     // Output known-0 bits are only known if clear in both the LHS & RHS.
00314     KnownZero = RHSKnownZero & LHSKnownZero;
00315     // Output known-1 are known to be set if set in either the LHS | RHS.
00316     KnownOne = RHSKnownOne | LHSKnownOne;
00317     break;
00318   case Instruction::Xor: {
00319     if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
00320                              RHSKnownZero, RHSKnownOne, Depth+1) ||
00321         SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
00322                              LHSKnownZero, LHSKnownOne, Depth+1))
00323       return I;
00324     assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
00325     assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
00326 
00327     // If all of the demanded bits are known zero on one side, return the other.
00328     // These bits cannot contribute to the result of the 'xor'.
00329     if ((DemandedMask & RHSKnownZero) == DemandedMask)
00330       return I->getOperand(0);
00331     if ((DemandedMask & LHSKnownZero) == DemandedMask)
00332       return I->getOperand(1);
00333 
00334     // If all of the demanded bits are known to be zero on one side or the
00335     // other, turn this into an *inclusive* or.
00336     //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
00337     if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) {
00338       Instruction *Or =
00339         BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
00340                                  I->getName());
00341       return InsertNewInstWith(Or, *I);
00342     }
00343 
00344     // If all of the demanded bits on one side are known, and all of the set
00345     // bits on that side are also known to be set on the other side, turn this
00346     // into an AND, as we know the bits will be cleared.
00347     //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
00348     if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) {
00349       // all known
00350       if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) {
00351         Constant *AndC = Constant::getIntegerValue(VTy,
00352                                                    ~RHSKnownOne & DemandedMask);
00353         Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
00354         return InsertNewInstWith(And, *I);
00355       }
00356     }
00357 
00358     // If the RHS is a constant, see if we can simplify it.
00359     // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1.
00360     if (ShrinkDemandedConstant(I, 1, DemandedMask))
00361       return I;
00362 
00363     // If our LHS is an 'and' and if it has one use, and if any of the bits we
00364     // are flipping are known to be set, then the xor is just resetting those
00365     // bits to zero.  We can just knock out bits from the 'and' and the 'xor',
00366     // simplifying both of them.
00367     if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0)))
00368       if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() &&
00369           isa<ConstantInt>(I->getOperand(1)) &&
00370           isa<ConstantInt>(LHSInst->getOperand(1)) &&
00371           (LHSKnownOne & RHSKnownOne & DemandedMask) != 0) {
00372         ConstantInt *AndRHS = cast<ConstantInt>(LHSInst->getOperand(1));
00373         ConstantInt *XorRHS = cast<ConstantInt>(I->getOperand(1));
00374         APInt NewMask = ~(LHSKnownOne & RHSKnownOne & DemandedMask);
00375 
00376         Constant *AndC =
00377           ConstantInt::get(I->getType(), NewMask & AndRHS->getValue());
00378         Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
00379         InsertNewInstWith(NewAnd, *I);
00380 
00381         Constant *XorC =
00382           ConstantInt::get(I->getType(), NewMask & XorRHS->getValue());
00383         Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC);
00384         return InsertNewInstWith(NewXor, *I);
00385       }
00386 
00387     // Output known-0 bits are known if clear or set in both the LHS & RHS.
00388     KnownZero= (RHSKnownZero & LHSKnownZero) | (RHSKnownOne & LHSKnownOne);
00389     // Output known-1 are known to be set if set in only one of the LHS, RHS.
00390     KnownOne = (RHSKnownZero & LHSKnownOne) | (RHSKnownOne & LHSKnownZero);
00391     break;
00392   }
00393   case Instruction::Select:
00394     if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask,
00395                              RHSKnownZero, RHSKnownOne, Depth+1) ||
00396         SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
00397                              LHSKnownZero, LHSKnownOne, Depth+1))
00398       return I;
00399     assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
00400     assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
00401 
00402     // If the operands are constants, see if we can simplify them.
00403     if (ShrinkDemandedConstant(I, 1, DemandedMask) ||
00404         ShrinkDemandedConstant(I, 2, DemandedMask))
00405       return I;
00406 
00407     // Only known if known in both the LHS and RHS.
00408     KnownOne = RHSKnownOne & LHSKnownOne;
00409     KnownZero = RHSKnownZero & LHSKnownZero;
00410     break;
00411   case Instruction::Trunc: {
00412     unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits();
00413     DemandedMask = DemandedMask.zext(truncBf);
00414     KnownZero = KnownZero.zext(truncBf);
00415     KnownOne = KnownOne.zext(truncBf);
00416     if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
00417                              KnownZero, KnownOne, Depth+1))
00418       return I;
00419     DemandedMask = DemandedMask.trunc(BitWidth);
00420     KnownZero = KnownZero.trunc(BitWidth);
00421     KnownOne = KnownOne.trunc(BitWidth);
00422     assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
00423     break;
00424   }
00425   case Instruction::BitCast:
00426     if (!I->getOperand(0)->getType()->isIntOrIntVectorTy())
00427       return nullptr;  // vector->int or fp->int?
00428 
00429     if (VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) {
00430       if (VectorType *SrcVTy =
00431             dyn_cast<VectorType>(I->getOperand(0)->getType())) {
00432         if (DstVTy->getNumElements() != SrcVTy->getNumElements())
00433           // Don't touch a bitcast between vectors of different element counts.
00434           return nullptr;
00435       } else
00436         // Don't touch a scalar-to-vector bitcast.
00437         return nullptr;
00438     } else if (I->getOperand(0)->getType()->isVectorTy())
00439       // Don't touch a vector-to-scalar bitcast.
00440       return nullptr;
00441 
00442     if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
00443                              KnownZero, KnownOne, Depth+1))
00444       return I;
00445     assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
00446     break;
00447   case Instruction::ZExt: {
00448     // Compute the bits in the result that are not present in the input.
00449     unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
00450 
00451     DemandedMask = DemandedMask.trunc(SrcBitWidth);
00452     KnownZero = KnownZero.trunc(SrcBitWidth);
00453     KnownOne = KnownOne.trunc(SrcBitWidth);
00454     if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
00455                              KnownZero, KnownOne, Depth+1))
00456       return I;
00457     DemandedMask = DemandedMask.zext(BitWidth);
00458     KnownZero = KnownZero.zext(BitWidth);
00459     KnownOne = KnownOne.zext(BitWidth);
00460     assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
00461     // The top bits are known to be zero.
00462     KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
00463     break;
00464   }
00465   case Instruction::SExt: {
00466     // Compute the bits in the result that are not present in the input.
00467     unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
00468 
00469     APInt InputDemandedBits = DemandedMask &
00470                               APInt::getLowBitsSet(BitWidth, SrcBitWidth);
00471 
00472     APInt NewBits(APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth));
00473     // If any of the sign extended bits are demanded, we know that the sign
00474     // bit is demanded.
00475     if ((NewBits & DemandedMask) != 0)
00476       InputDemandedBits.setBit(SrcBitWidth-1);
00477 
00478     InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth);
00479     KnownZero = KnownZero.trunc(SrcBitWidth);
00480     KnownOne = KnownOne.trunc(SrcBitWidth);
00481     if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits,
00482                              KnownZero, KnownOne, Depth+1))
00483       return I;
00484     InputDemandedBits = InputDemandedBits.zext(BitWidth);
00485     KnownZero = KnownZero.zext(BitWidth);
00486     KnownOne = KnownOne.zext(BitWidth);
00487     assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
00488 
00489     // If the sign bit of the input is known set or clear, then we know the
00490     // top bits of the result.
00491 
00492     // If the input sign bit is known zero, or if the NewBits are not demanded
00493     // convert this into a zero extension.
00494     if (KnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) {
00495       // Convert to ZExt cast
00496       CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName());
00497       return InsertNewInstWith(NewCast, *I);
00498     } else if (KnownOne[SrcBitWidth-1]) {    // Input sign bit known set
00499       KnownOne |= NewBits;
00500     }
00501     break;
00502   }
00503   case Instruction::Add: {
00504     // Figure out what the input bits are.  If the top bits of the and result
00505     // are not demanded, then the add doesn't demand them from its input
00506     // either.
00507     unsigned NLZ = DemandedMask.countLeadingZeros();
00508 
00509     // If there is a constant on the RHS, there are a variety of xformations
00510     // we can do.
00511     if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
00512       // If null, this should be simplified elsewhere.  Some of the xforms here
00513       // won't work if the RHS is zero.
00514       if (RHS->isZero())
00515         break;
00516 
00517       // If the top bit of the output is demanded, demand everything from the
00518       // input.  Otherwise, we demand all the input bits except NLZ top bits.
00519       APInt InDemandedBits(APInt::getLowBitsSet(BitWidth, BitWidth - NLZ));
00520 
00521       // Find information about known zero/one bits in the input.
00522       if (SimplifyDemandedBits(I->getOperandUse(0), InDemandedBits,
00523                                LHSKnownZero, LHSKnownOne, Depth+1))
00524         return I;
00525 
00526       // If the RHS of the add has bits set that can't affect the input, reduce
00527       // the constant.
00528       if (ShrinkDemandedConstant(I, 1, InDemandedBits))
00529         return I;
00530 
00531       // Avoid excess work.
00532       if (LHSKnownZero == 0 && LHSKnownOne == 0)
00533         break;
00534 
00535       // Turn it into OR if input bits are zero.
00536       if ((LHSKnownZero & RHS->getValue()) == RHS->getValue()) {
00537         Instruction *Or =
00538           BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
00539                                    I->getName());
00540         return InsertNewInstWith(Or, *I);
00541       }
00542 
00543       // We can say something about the output known-zero and known-one bits,
00544       // depending on potential carries from the input constant and the
00545       // unknowns.  For example if the LHS is known to have at most the 0x0F0F0
00546       // bits set and the RHS constant is 0x01001, then we know we have a known
00547       // one mask of 0x00001 and a known zero mask of 0xE0F0E.
00548 
00549       // To compute this, we first compute the potential carry bits.  These are
00550       // the bits which may be modified.  I'm not aware of a better way to do
00551       // this scan.
00552       const APInt &RHSVal = RHS->getValue();
00553       APInt CarryBits((~LHSKnownZero + RHSVal) ^ (~LHSKnownZero ^ RHSVal));
00554 
00555       // Now that we know which bits have carries, compute the known-1/0 sets.
00556 
00557       // Bits are known one if they are known zero in one operand and one in the
00558       // other, and there is no input carry.
00559       KnownOne = ((LHSKnownZero & RHSVal) |
00560                   (LHSKnownOne & ~RHSVal)) & ~CarryBits;
00561 
00562       // Bits are known zero if they are known zero in both operands and there
00563       // is no input carry.
00564       KnownZero = LHSKnownZero & ~RHSVal & ~CarryBits;
00565     } else {
00566       // If the high-bits of this ADD are not demanded, then it does not demand
00567       // the high bits of its LHS or RHS.
00568       if (DemandedMask[BitWidth-1] == 0) {
00569         // Right fill the mask of bits for this ADD to demand the most
00570         // significant bit and all those below it.
00571         APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
00572         if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
00573                                  LHSKnownZero, LHSKnownOne, Depth+1) ||
00574             SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
00575                                  LHSKnownZero, LHSKnownOne, Depth+1))
00576           return I;
00577       }
00578     }
00579     break;
00580   }
00581   case Instruction::Sub:
00582     // If the high-bits of this SUB are not demanded, then it does not demand
00583     // the high bits of its LHS or RHS.
00584     if (DemandedMask[BitWidth-1] == 0) {
00585       // Right fill the mask of bits for this SUB to demand the most
00586       // significant bit and all those below it.
00587       uint32_t NLZ = DemandedMask.countLeadingZeros();
00588       APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
00589       if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
00590                                LHSKnownZero, LHSKnownOne, Depth+1) ||
00591           SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
00592                                LHSKnownZero, LHSKnownOne, Depth+1))
00593         return I;
00594     }
00595 
00596     // Otherwise just hand the sub off to computeKnownBits to fill in
00597     // the known zeros and ones.
00598     computeKnownBits(V, KnownZero, KnownOne, Depth);
00599 
00600     // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known
00601     // zero.
00602     if (ConstantInt *C0 = dyn_cast<ConstantInt>(I->getOperand(0))) {
00603       APInt I0 = C0->getValue();
00604       if ((I0 + 1).isPowerOf2() && (I0 | KnownZero).isAllOnesValue()) {
00605         Instruction *Xor = BinaryOperator::CreateXor(I->getOperand(1), C0);
00606         return InsertNewInstWith(Xor, *I);
00607       }
00608     }
00609     break;
00610   case Instruction::Shl:
00611     if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
00612       {
00613         Value *VarX; ConstantInt *C1;
00614         if (match(I->getOperand(0), m_Shr(m_Value(VarX), m_ConstantInt(C1)))) {
00615           Instruction *Shr = cast<Instruction>(I->getOperand(0));
00616           Value *R = SimplifyShrShlDemandedBits(Shr, I, DemandedMask,
00617                                                 KnownZero, KnownOne);
00618           if (R)
00619             return R;
00620         }
00621       }
00622 
00623       uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
00624       APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt));
00625 
00626       // If the shift is NUW/NSW, then it does demand the high bits.
00627       ShlOperator *IOp = cast<ShlOperator>(I);
00628       if (IOp->hasNoSignedWrap())
00629         DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1);
00630       else if (IOp->hasNoUnsignedWrap())
00631         DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
00632 
00633       if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
00634                                KnownZero, KnownOne, Depth+1))
00635         return I;
00636       assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
00637       KnownZero <<= ShiftAmt;
00638       KnownOne  <<= ShiftAmt;
00639       // low bits known zero.
00640       if (ShiftAmt)
00641         KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
00642     }
00643     break;
00644   case Instruction::LShr:
00645     // For a logical shift right
00646     if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
00647       uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
00648 
00649       // Unsigned shift right.
00650       APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
00651 
00652       // If the shift is exact, then it does demand the low bits (and knows that
00653       // they are zero).
00654       if (cast<LShrOperator>(I)->isExact())
00655         DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
00656 
00657       if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
00658                                KnownZero, KnownOne, Depth+1))
00659         return I;
00660       assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
00661       KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
00662       KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
00663       if (ShiftAmt) {
00664         // Compute the new bits that are at the top now.
00665         APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
00666         KnownZero |= HighBits;  // high bits known zero.
00667       }
00668     }
00669     break;
00670   case Instruction::AShr:
00671     // If this is an arithmetic shift right and only the low-bit is set, we can
00672     // always convert this into a logical shr, even if the shift amount is
00673     // variable.  The low bit of the shift cannot be an input sign bit unless
00674     // the shift amount is >= the size of the datatype, which is undefined.
00675     if (DemandedMask == 1) {
00676       // Perform the logical shift right.
00677       Instruction *NewVal = BinaryOperator::CreateLShr(
00678                         I->getOperand(0), I->getOperand(1), I->getName());
00679       return InsertNewInstWith(NewVal, *I);
00680     }
00681 
00682     // If the sign bit is the only bit demanded by this ashr, then there is no
00683     // need to do it, the shift doesn't change the high bit.
00684     if (DemandedMask.isSignBit())
00685       return I->getOperand(0);
00686 
00687     if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
00688       uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
00689 
00690       // Signed shift right.
00691       APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
00692       // If any of the "high bits" are demanded, we should set the sign bit as
00693       // demanded.
00694       if (DemandedMask.countLeadingZeros() <= ShiftAmt)
00695         DemandedMaskIn.setBit(BitWidth-1);
00696 
00697       // If the shift is exact, then it does demand the low bits (and knows that
00698       // they are zero).
00699       if (cast<AShrOperator>(I)->isExact())
00700         DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
00701 
00702       if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
00703                                KnownZero, KnownOne, Depth+1))
00704         return I;
00705       assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
00706       // Compute the new bits that are at the top now.
00707       APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
00708       KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
00709       KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
00710 
00711       // Handle the sign bits.
00712       APInt SignBit(APInt::getSignBit(BitWidth));
00713       // Adjust to where it is now in the mask.
00714       SignBit = APIntOps::lshr(SignBit, ShiftAmt);
00715 
00716       // If the input sign bit is known to be zero, or if none of the top bits
00717       // are demanded, turn this into an unsigned shift right.
00718       if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] ||
00719           (HighBits & ~DemandedMask) == HighBits) {
00720         // Perform the logical shift right.
00721         BinaryOperator *NewVal = BinaryOperator::CreateLShr(I->getOperand(0),
00722                                                             SA, I->getName());
00723         NewVal->setIsExact(cast<BinaryOperator>(I)->isExact());
00724         return InsertNewInstWith(NewVal, *I);
00725       } else if ((KnownOne & SignBit) != 0) { // New bits are known one.
00726         KnownOne |= HighBits;
00727       }
00728     }
00729     break;
00730   case Instruction::SRem:
00731     if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
00732       // X % -1 demands all the bits because we don't want to introduce
00733       // INT_MIN % -1 (== undef) by accident.
00734       if (Rem->isAllOnesValue())
00735         break;
00736       APInt RA = Rem->getValue().abs();
00737       if (RA.isPowerOf2()) {
00738         if (DemandedMask.ult(RA))    // srem won't affect demanded bits
00739           return I->getOperand(0);
00740 
00741         APInt LowBits = RA - 1;
00742         APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
00743         if (SimplifyDemandedBits(I->getOperandUse(0), Mask2,
00744                                  LHSKnownZero, LHSKnownOne, Depth+1))
00745           return I;
00746 
00747         // The low bits of LHS are unchanged by the srem.
00748         KnownZero = LHSKnownZero & LowBits;
00749         KnownOne = LHSKnownOne & LowBits;
00750 
00751         // If LHS is non-negative or has all low bits zero, then the upper bits
00752         // are all zero.
00753         if (LHSKnownZero[BitWidth-1] || ((LHSKnownZero & LowBits) == LowBits))
00754           KnownZero |= ~LowBits;
00755 
00756         // If LHS is negative and not all low bits are zero, then the upper bits
00757         // are all one.
00758         if (LHSKnownOne[BitWidth-1] && ((LHSKnownOne & LowBits) != 0))
00759           KnownOne |= ~LowBits;
00760 
00761         assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
00762       }
00763     }
00764 
00765     // The sign bit is the LHS's sign bit, except when the result of the
00766     // remainder is zero.
00767     if (DemandedMask.isNegative() && KnownZero.isNonNegative()) {
00768       APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
00769       computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
00770       // If it's known zero, our sign bit is also zero.
00771       if (LHSKnownZero.isNegative())
00772         KnownZero.setBit(KnownZero.getBitWidth() - 1);
00773     }
00774     break;
00775   case Instruction::URem: {
00776     APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
00777     APInt AllOnes = APInt::getAllOnesValue(BitWidth);
00778     if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes,
00779                              KnownZero2, KnownOne2, Depth+1) ||
00780         SimplifyDemandedBits(I->getOperandUse(1), AllOnes,
00781                              KnownZero2, KnownOne2, Depth+1))
00782       return I;
00783 
00784     unsigned Leaders = KnownZero2.countLeadingOnes();
00785     Leaders = std::max(Leaders,
00786                        KnownZero2.countLeadingOnes());
00787     KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask;
00788     break;
00789   }
00790   case Instruction::Call:
00791     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
00792       switch (II->getIntrinsicID()) {
00793       default: break;
00794       case Intrinsic::bswap: {
00795         // If the only bits demanded come from one byte of the bswap result,
00796         // just shift the input byte into position to eliminate the bswap.
00797         unsigned NLZ = DemandedMask.countLeadingZeros();
00798         unsigned NTZ = DemandedMask.countTrailingZeros();
00799 
00800         // Round NTZ down to the next byte.  If we have 11 trailing zeros, then
00801         // we need all the bits down to bit 8.  Likewise, round NLZ.  If we
00802         // have 14 leading zeros, round to 8.
00803         NLZ &= ~7;
00804         NTZ &= ~7;
00805         // If we need exactly one byte, we can do this transformation.
00806         if (BitWidth-NLZ-NTZ == 8) {
00807           unsigned ResultBit = NTZ;
00808           unsigned InputBit = BitWidth-NTZ-8;
00809 
00810           // Replace this with either a left or right shift to get the byte into
00811           // the right place.
00812           Instruction *NewVal;
00813           if (InputBit > ResultBit)
00814             NewVal = BinaryOperator::CreateLShr(II->getArgOperand(0),
00815                     ConstantInt::get(I->getType(), InputBit-ResultBit));
00816           else
00817             NewVal = BinaryOperator::CreateShl(II->getArgOperand(0),
00818                     ConstantInt::get(I->getType(), ResultBit-InputBit));
00819           NewVal->takeName(I);
00820           return InsertNewInstWith(NewVal, *I);
00821         }
00822 
00823         // TODO: Could compute known zero/one bits based on the input.
00824         break;
00825       }
00826       case Intrinsic::x86_sse42_crc32_64_64:
00827         KnownZero = APInt::getHighBitsSet(64, 32);
00828         return nullptr;
00829       }
00830     }
00831     computeKnownBits(V, KnownZero, KnownOne, Depth);
00832     break;
00833   }
00834 
00835   // If the client is only demanding bits that we know, return the known
00836   // constant.
00837   if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask)
00838     return Constant::getIntegerValue(VTy, KnownOne);
00839   return nullptr;
00840 }
00841 
00842 /// Helper routine of SimplifyDemandedUseBits. It tries to simplify
00843 /// "E1 = (X lsr C1) << C2", where the C1 and C2 are constant, into
00844 /// "E2 = X << (C2 - C1)" or "E2 = X >> (C1 - C2)", depending on the sign
00845 /// of "C2-C1".
00846 ///
00847 /// Suppose E1 and E2 are generally different in bits S={bm, bm+1,
00848 /// ..., bn}, without considering the specific value X is holding.
00849 /// This transformation is legal iff one of following conditions is hold:
00850 ///  1) All the bit in S are 0, in this case E1 == E2.
00851 ///  2) We don't care those bits in S, per the input DemandedMask.
00852 ///  3) Combination of 1) and 2). Some bits in S are 0, and we don't care the
00853 ///     rest bits.
00854 ///
00855 /// Currently we only test condition 2).
00856 ///
00857 /// As with SimplifyDemandedUseBits, it returns NULL if the simplification was
00858 /// not successful.
00859 Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr,
00860   Instruction *Shl, APInt DemandedMask, APInt &KnownZero, APInt &KnownOne) {
00861 
00862   const APInt &ShlOp1 = cast<ConstantInt>(Shl->getOperand(1))->getValue();
00863   const APInt &ShrOp1 = cast<ConstantInt>(Shr->getOperand(1))->getValue();
00864   if (!ShlOp1 || !ShrOp1)
00865       return nullptr; // Noop.
00866 
00867   Value *VarX = Shr->getOperand(0);
00868   Type *Ty = VarX->getType();
00869   unsigned BitWidth = Ty->getIntegerBitWidth();
00870   if (ShlOp1.uge(BitWidth) || ShrOp1.uge(BitWidth))
00871     return nullptr; // Undef.
00872 
00873   unsigned ShlAmt = ShlOp1.getZExtValue();
00874   unsigned ShrAmt = ShrOp1.getZExtValue();
00875 
00876   KnownOne.clearAllBits();
00877   KnownZero = APInt::getBitsSet(KnownZero.getBitWidth(), 0, ShlAmt-1);
00878   KnownZero &= DemandedMask;
00879 
00880   APInt BitMask1(APInt::getAllOnesValue(BitWidth));
00881   APInt BitMask2(APInt::getAllOnesValue(BitWidth));
00882 
00883   bool isLshr = (Shr->getOpcode() == Instruction::LShr);
00884   BitMask1 = isLshr ? (BitMask1.lshr(ShrAmt) << ShlAmt) :
00885                       (BitMask1.ashr(ShrAmt) << ShlAmt);
00886 
00887   if (ShrAmt <= ShlAmt) {
00888     BitMask2 <<= (ShlAmt - ShrAmt);
00889   } else {
00890     BitMask2 = isLshr ? BitMask2.lshr(ShrAmt - ShlAmt):
00891                         BitMask2.ashr(ShrAmt - ShlAmt);
00892   }
00893 
00894   // Check if condition-2 (see the comment to this function) is satified.
00895   if ((BitMask1 & DemandedMask) == (BitMask2 & DemandedMask)) {
00896     if (ShrAmt == ShlAmt)
00897       return VarX;
00898 
00899     if (!Shr->hasOneUse())
00900       return nullptr;
00901 
00902     BinaryOperator *New;
00903     if (ShrAmt < ShlAmt) {
00904       Constant *Amt = ConstantInt::get(VarX->getType(), ShlAmt - ShrAmt);
00905       New = BinaryOperator::CreateShl(VarX, Amt);
00906       BinaryOperator *Orig = cast<BinaryOperator>(Shl);
00907       New->setHasNoSignedWrap(Orig->hasNoSignedWrap());
00908       New->setHasNoUnsignedWrap(Orig->hasNoUnsignedWrap());
00909     } else {
00910       Constant *Amt = ConstantInt::get(VarX->getType(), ShrAmt - ShlAmt);
00911       New = isLshr ? BinaryOperator::CreateLShr(VarX, Amt) :
00912                      BinaryOperator::CreateAShr(VarX, Amt);
00913       if (cast<BinaryOperator>(Shr)->isExact())
00914         New->setIsExact(true);
00915     }
00916 
00917     return InsertNewInstWith(New, *Shl);
00918   }
00919 
00920   return nullptr;
00921 }
00922 
00923 /// SimplifyDemandedVectorElts - The specified value produces a vector with
00924 /// any number of elements. DemandedElts contains the set of elements that are
00925 /// actually used by the caller.  This method analyzes which elements of the
00926 /// operand are undef and returns that information in UndefElts.
00927 ///
00928 /// If the information about demanded elements can be used to simplify the
00929 /// operation, the operation is simplified, then the resultant value is
00930 /// returned.  This returns null if no change was made.
00931 Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
00932                                                 APInt &UndefElts,
00933                                                 unsigned Depth) {
00934   unsigned VWidth = cast<VectorType>(V->getType())->getNumElements();
00935   APInt EltMask(APInt::getAllOnesValue(VWidth));
00936   assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!");
00937 
00938   if (isa<UndefValue>(V)) {
00939     // If the entire vector is undefined, just return this info.
00940     UndefElts = EltMask;
00941     return nullptr;
00942   }
00943 
00944   if (DemandedElts == 0) { // If nothing is demanded, provide undef.
00945     UndefElts = EltMask;
00946     return UndefValue::get(V->getType());
00947   }
00948 
00949   UndefElts = 0;
00950 
00951   // Handle ConstantAggregateZero, ConstantVector, ConstantDataSequential.
00952   if (Constant *C = dyn_cast<Constant>(V)) {
00953     // Check if this is identity. If so, return 0 since we are not simplifying
00954     // anything.
00955     if (DemandedElts.isAllOnesValue())
00956       return nullptr;
00957 
00958     Type *EltTy = cast<VectorType>(V->getType())->getElementType();
00959     Constant *Undef = UndefValue::get(EltTy);
00960 
00961     SmallVector<Constant*, 16> Elts;
00962     for (unsigned i = 0; i != VWidth; ++i) {
00963       if (!DemandedElts[i]) {   // If not demanded, set to undef.
00964         Elts.push_back(Undef);
00965         UndefElts.setBit(i);
00966         continue;
00967       }
00968 
00969       Constant *Elt = C->getAggregateElement(i);
00970       if (!Elt) return nullptr;
00971 
00972       if (isa<UndefValue>(Elt)) {   // Already undef.
00973         Elts.push_back(Undef);
00974         UndefElts.setBit(i);
00975       } else {                               // Otherwise, defined.
00976         Elts.push_back(Elt);
00977       }
00978     }
00979 
00980     // If we changed the constant, return it.
00981     Constant *NewCV = ConstantVector::get(Elts);
00982     return NewCV != C ? NewCV : nullptr;
00983   }
00984 
00985   // Limit search depth.
00986   if (Depth == 10)
00987     return nullptr;
00988 
00989   // If multiple users are using the root value, proceed with
00990   // simplification conservatively assuming that all elements
00991   // are needed.
00992   if (!V->hasOneUse()) {
00993     // Quit if we find multiple users of a non-root value though.
00994     // They'll be handled when it's their turn to be visited by
00995     // the main instcombine process.
00996     if (Depth != 0)
00997       // TODO: Just compute the UndefElts information recursively.
00998       return nullptr;
00999 
01000     // Conservatively assume that all elements are needed.
01001     DemandedElts = EltMask;
01002   }
01003 
01004   Instruction *I = dyn_cast<Instruction>(V);
01005   if (!I) return nullptr;        // Only analyze instructions.
01006 
01007   bool MadeChange = false;
01008   APInt UndefElts2(VWidth, 0);
01009   Value *TmpV;
01010   switch (I->getOpcode()) {
01011   default: break;
01012 
01013   case Instruction::InsertElement: {
01014     // If this is a variable index, we don't know which element it overwrites.
01015     // demand exactly the same input as we produce.
01016     ConstantInt *Idx = dyn_cast<ConstantInt>(I->getOperand(2));
01017     if (!Idx) {
01018       // Note that we can't propagate undef elt info, because we don't know
01019       // which elt is getting updated.
01020       TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
01021                                         UndefElts2, Depth+1);
01022       if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
01023       break;
01024     }
01025 
01026     // If this is inserting an element that isn't demanded, remove this
01027     // insertelement.
01028     unsigned IdxNo = Idx->getZExtValue();
01029     if (IdxNo >= VWidth || !DemandedElts[IdxNo]) {
01030       Worklist.Add(I);
01031       return I->getOperand(0);
01032     }
01033 
01034     // Otherwise, the element inserted overwrites whatever was there, so the
01035     // input demanded set is simpler than the output set.
01036     APInt DemandedElts2 = DemandedElts;
01037     DemandedElts2.clearBit(IdxNo);
01038     TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2,
01039                                       UndefElts, Depth+1);
01040     if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
01041 
01042     // The inserted element is defined.
01043     UndefElts.clearBit(IdxNo);
01044     break;
01045   }
01046   case Instruction::ShuffleVector: {
01047     ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
01048     uint64_t LHSVWidth =
01049       cast<VectorType>(Shuffle->getOperand(0)->getType())->getNumElements();
01050     APInt LeftDemanded(LHSVWidth, 0), RightDemanded(LHSVWidth, 0);
01051     for (unsigned i = 0; i < VWidth; i++) {
01052       if (DemandedElts[i]) {
01053         unsigned MaskVal = Shuffle->getMaskValue(i);
01054         if (MaskVal != -1u) {
01055           assert(MaskVal < LHSVWidth * 2 &&
01056                  "shufflevector mask index out of range!");
01057           if (MaskVal < LHSVWidth)
01058             LeftDemanded.setBit(MaskVal);
01059           else
01060             RightDemanded.setBit(MaskVal - LHSVWidth);
01061         }
01062       }
01063     }
01064 
01065     APInt UndefElts4(LHSVWidth, 0);
01066     TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded,
01067                                       UndefElts4, Depth+1);
01068     if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
01069 
01070     APInt UndefElts3(LHSVWidth, 0);
01071     TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded,
01072                                       UndefElts3, Depth+1);
01073     if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
01074 
01075     bool NewUndefElts = false;
01076     for (unsigned i = 0; i < VWidth; i++) {
01077       unsigned MaskVal = Shuffle->getMaskValue(i);
01078       if (MaskVal == -1u) {
01079         UndefElts.setBit(i);
01080       } else if (!DemandedElts[i]) {
01081         NewUndefElts = true;
01082         UndefElts.setBit(i);
01083       } else if (MaskVal < LHSVWidth) {
01084         if (UndefElts4[MaskVal]) {
01085           NewUndefElts = true;
01086           UndefElts.setBit(i);
01087         }
01088       } else {
01089         if (UndefElts3[MaskVal - LHSVWidth]) {
01090           NewUndefElts = true;
01091           UndefElts.setBit(i);
01092         }
01093       }
01094     }
01095 
01096     if (NewUndefElts) {
01097       // Add additional discovered undefs.
01098       SmallVector<Constant*, 16> Elts;
01099       for (unsigned i = 0; i < VWidth; ++i) {
01100         if (UndefElts[i])
01101           Elts.push_back(UndefValue::get(Type::getInt32Ty(I->getContext())));
01102         else
01103           Elts.push_back(ConstantInt::get(Type::getInt32Ty(I->getContext()),
01104                                           Shuffle->getMaskValue(i)));
01105       }
01106       I->setOperand(2, ConstantVector::get(Elts));
01107       MadeChange = true;
01108     }
01109     break;
01110   }
01111   case Instruction::Select: {
01112     APInt LeftDemanded(DemandedElts), RightDemanded(DemandedElts);
01113     if (ConstantVector* CV = dyn_cast<ConstantVector>(I->getOperand(0))) {
01114       for (unsigned i = 0; i < VWidth; i++) {
01115         if (CV->getAggregateElement(i)->isNullValue())
01116           LeftDemanded.clearBit(i);
01117         else
01118           RightDemanded.clearBit(i);
01119       }
01120     }
01121 
01122     TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded,
01123                                       UndefElts, Depth+1);
01124     if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
01125 
01126     TmpV = SimplifyDemandedVectorElts(I->getOperand(2), RightDemanded,
01127                                       UndefElts2, Depth+1);
01128     if (TmpV) { I->setOperand(2, TmpV); MadeChange = true; }
01129 
01130     // Output elements are undefined if both are undefined.
01131     UndefElts &= UndefElts2;
01132     break;
01133   }
01134   case Instruction::BitCast: {
01135     // Vector->vector casts only.
01136     VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());
01137     if (!VTy) break;
01138     unsigned InVWidth = VTy->getNumElements();
01139     APInt InputDemandedElts(InVWidth, 0);
01140     unsigned Ratio;
01141 
01142     if (VWidth == InVWidth) {
01143       // If we are converting from <4 x i32> -> <4 x f32>, we demand the same
01144       // elements as are demanded of us.
01145       Ratio = 1;
01146       InputDemandedElts = DemandedElts;
01147     } else if (VWidth > InVWidth) {
01148       // Untested so far.
01149       break;
01150 
01151       // If there are more elements in the result than there are in the source,
01152       // then an input element is live if any of the corresponding output
01153       // elements are live.
01154       Ratio = VWidth/InVWidth;
01155       for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
01156         if (DemandedElts[OutIdx])
01157           InputDemandedElts.setBit(OutIdx/Ratio);
01158       }
01159     } else {
01160       // Untested so far.
01161       break;
01162 
01163       // If there are more elements in the source than there are in the result,
01164       // then an input element is live if the corresponding output element is
01165       // live.
01166       Ratio = InVWidth/VWidth;
01167       for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
01168         if (DemandedElts[InIdx/Ratio])
01169           InputDemandedElts.setBit(InIdx);
01170     }
01171 
01172     // div/rem demand all inputs, because they don't want divide by zero.
01173     TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts,
01174                                       UndefElts2, Depth+1);
01175     if (TmpV) {
01176       I->setOperand(0, TmpV);
01177       MadeChange = true;
01178     }
01179 
01180     UndefElts = UndefElts2;
01181     if (VWidth > InVWidth) {
01182       llvm_unreachable("Unimp");
01183       // If there are more elements in the result than there are in the source,
01184       // then an output element is undef if the corresponding input element is
01185       // undef.
01186       for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
01187         if (UndefElts2[OutIdx/Ratio])
01188           UndefElts.setBit(OutIdx);
01189     } else if (VWidth < InVWidth) {
01190       llvm_unreachable("Unimp");
01191       // If there are more elements in the source than there are in the result,
01192       // then a result element is undef if all of the corresponding input
01193       // elements are undef.
01194       UndefElts = ~0ULL >> (64-VWidth);  // Start out all undef.
01195       for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
01196         if (!UndefElts2[InIdx])            // Not undef?
01197           UndefElts.clearBit(InIdx/Ratio);    // Clear undef bit.
01198     }
01199     break;
01200   }
01201   case Instruction::And:
01202   case Instruction::Or:
01203   case Instruction::Xor:
01204   case Instruction::Add:
01205   case Instruction::Sub:
01206   case Instruction::Mul:
01207     // div/rem demand all inputs, because they don't want divide by zero.
01208     TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
01209                                       UndefElts, Depth+1);
01210     if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
01211     TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts,
01212                                       UndefElts2, Depth+1);
01213     if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
01214 
01215     // Output elements are undefined if both are undefined.  Consider things
01216     // like undef&0.  The result is known zero, not undef.
01217     UndefElts &= UndefElts2;
01218     break;
01219   case Instruction::FPTrunc:
01220   case Instruction::FPExt:
01221     TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
01222                                       UndefElts, Depth+1);
01223     if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
01224     break;
01225 
01226   case Instruction::Call: {
01227     IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
01228     if (!II) break;
01229     switch (II->getIntrinsicID()) {
01230     default: break;
01231 
01232     // Binary vector operations that work column-wise.  A dest element is a
01233     // function of the corresponding input elements from the two inputs.
01234     case Intrinsic::x86_sse_sub_ss:
01235     case Intrinsic::x86_sse_mul_ss:
01236     case Intrinsic::x86_sse_min_ss:
01237     case Intrinsic::x86_sse_max_ss:
01238     case Intrinsic::x86_sse2_sub_sd:
01239     case Intrinsic::x86_sse2_mul_sd:
01240     case Intrinsic::x86_sse2_min_sd:
01241     case Intrinsic::x86_sse2_max_sd:
01242       TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
01243                                         UndefElts, Depth+1);
01244       if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
01245       TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
01246                                         UndefElts2, Depth+1);
01247       if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
01248 
01249       // If only the low elt is demanded and this is a scalarizable intrinsic,
01250       // scalarize it now.
01251       if (DemandedElts == 1) {
01252         switch (II->getIntrinsicID()) {
01253         default: break;
01254         case Intrinsic::x86_sse_sub_ss:
01255         case Intrinsic::x86_sse_mul_ss:
01256         case Intrinsic::x86_sse2_sub_sd:
01257         case Intrinsic::x86_sse2_mul_sd:
01258           // TODO: Lower MIN/MAX/ABS/etc
01259           Value *LHS = II->getArgOperand(0);
01260           Value *RHS = II->getArgOperand(1);
01261           // Extract the element as scalars.
01262           LHS = InsertNewInstWith(ExtractElementInst::Create(LHS,
01263             ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
01264           RHS = InsertNewInstWith(ExtractElementInst::Create(RHS,
01265             ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
01266 
01267           switch (II->getIntrinsicID()) {
01268           default: llvm_unreachable("Case stmts out of sync!");
01269           case Intrinsic::x86_sse_sub_ss:
01270           case Intrinsic::x86_sse2_sub_sd:
01271             TmpV = InsertNewInstWith(BinaryOperator::CreateFSub(LHS, RHS,
01272                                                         II->getName()), *II);
01273             break;
01274           case Intrinsic::x86_sse_mul_ss:
01275           case Intrinsic::x86_sse2_mul_sd:
01276             TmpV = InsertNewInstWith(BinaryOperator::CreateFMul(LHS, RHS,
01277                                                          II->getName()), *II);
01278             break;
01279           }
01280 
01281           Instruction *New =
01282             InsertElementInst::Create(
01283               UndefValue::get(II->getType()), TmpV,
01284               ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U, false),
01285                                       II->getName());
01286           InsertNewInstWith(New, *II);
01287           return New;
01288         }
01289       }
01290 
01291       // Output elements are undefined if both are undefined.  Consider things
01292       // like undef&0.  The result is known zero, not undef.
01293       UndefElts &= UndefElts2;
01294       break;
01295     }
01296     break;
01297   }
01298   }
01299   return MadeChange ? I : nullptr;
01300 }