LLVM  mainline
InstCombineSimplifyDemanded.cpp
Go to the documentation of this file.
00001 //===- InstCombineSimplifyDemanded.cpp ------------------------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file contains logic for simplifying instructions based on information
00011 // about how they are used.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #include "InstCombineInternal.h"
00016 #include "llvm/IR/IntrinsicInst.h"
00017 #include "llvm/IR/PatternMatch.h"
00018 
00019 using namespace llvm;
00020 using namespace llvm::PatternMatch;
00021 
00022 #define DEBUG_TYPE "instcombine"
00023 
00024 /// ShrinkDemandedConstant - Check to see if the specified operand of the
00025 /// specified instruction is a constant integer.  If so, check to see if there
00026 /// are any bits set in the constant that are not demanded.  If so, shrink the
00027 /// constant and return true.
00028 static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
00029                                    APInt Demanded) {
00030   assert(I && "No instruction?");
00031   assert(OpNo < I->getNumOperands() && "Operand index too large");
00032 
00033   // If the operand is not a constant integer, nothing to do.
00034   ConstantInt *OpC = dyn_cast<ConstantInt>(I->getOperand(OpNo));
00035   if (!OpC) return false;
00036 
00037   // If there are no bits set that aren't demanded, nothing to do.
00038   Demanded = Demanded.zextOrTrunc(OpC->getValue().getBitWidth());
00039   if ((~Demanded & OpC->getValue()) == 0)
00040     return false;
00041 
00042   // This instruction is producing bits that are not demanded. Shrink the RHS.
00043   Demanded &= OpC->getValue();
00044   I->setOperand(OpNo, ConstantInt::get(OpC->getType(), Demanded));
00045 
00046   // If either 'nsw' or 'nuw' is set and the constant is negative,
00047   // removing *any* bits from the constant could make overflow occur.
00048   // Remove 'nsw' and 'nuw' from the instruction in this case.
00049   if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(I)) {
00050     assert(OBO->getOpcode() == Instruction::Add);
00051     if (OBO->hasNoSignedWrap() || OBO->hasNoUnsignedWrap()) {
00052       if (OpC->getValue().isNegative()) {
00053         cast<BinaryOperator>(OBO)->setHasNoSignedWrap(false);
00054         cast<BinaryOperator>(OBO)->setHasNoUnsignedWrap(false);
00055       }
00056     }
00057   }
00058 
00059   return true;
00060 }
00061 
00062 
00063 
00064 /// SimplifyDemandedInstructionBits - Inst is an integer instruction that
00065 /// SimplifyDemandedBits knows about.  See if the instruction has any
00066 /// properties that allow us to simplify its operands.
00067 bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
00068   unsigned BitWidth = Inst.getType()->getScalarSizeInBits();
00069   APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
00070   APInt DemandedMask(APInt::getAllOnesValue(BitWidth));
00071 
00072   Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, KnownZero, KnownOne,
00073                                      0, &Inst);
00074   if (!V) return false;
00075   if (V == &Inst) return true;
00076   ReplaceInstUsesWith(Inst, V);
00077   return true;
00078 }
00079 
00080 /// SimplifyDemandedBits - This form of SimplifyDemandedBits simplifies the
00081 /// specified instruction operand if possible, updating it in place.  It returns
00082 /// true if it made any change and false otherwise.
00083 bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask,
00084                                         APInt &KnownZero, APInt &KnownOne,
00085                                         unsigned Depth) {
00086   auto *UserI = dyn_cast<Instruction>(U.getUser());
00087   Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, KnownZero,
00088                                           KnownOne, Depth, UserI);
00089   if (!NewVal) return false;
00090   U = NewVal;
00091   return true;
00092 }
00093 
00094 
00095 /// SimplifyDemandedUseBits - This function attempts to replace V with a simpler
00096 /// value based on the demanded bits.  When this function is called, it is known
00097 /// that only the bits set in DemandedMask of the result of V are ever used
00098 /// downstream. Consequently, depending on the mask and V, it may be possible
00099 /// to replace V with a constant or one of its operands. In such cases, this
00100 /// function does the replacement and returns true. In all other cases, it
00101 /// returns false after analyzing the expression and setting KnownOne and known
00102 /// to be one in the expression.  KnownZero contains all the bits that are known
00103 /// to be zero in the expression. These are provided to potentially allow the
00104 /// caller (which might recursively be SimplifyDemandedBits itself) to simplify
00105 /// the expression. KnownOne and KnownZero always follow the invariant that
00106 /// KnownOne & KnownZero == 0. That is, a bit can't be both 1 and 0. Note that
00107 /// the bits in KnownOne and KnownZero may only be accurate for those bits set
00108 /// in DemandedMask. Note also that the bitwidth of V, DemandedMask, KnownZero
00109 /// and KnownOne must all be the same.
00110 ///
00111 /// This returns null if it did not change anything and it permits no
00112 /// simplification.  This returns V itself if it did some simplification of V's
00113 /// operands based on the information about what bits are demanded. This returns
00114 /// some other non-null value if it found out that V is equal to another value
00115 /// in the context where the specified bits are demanded, but not for all users.
00116 Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
00117                                              APInt &KnownZero, APInt &KnownOne,
00118                                              unsigned Depth,
00119                                              Instruction *CxtI) {
00120   assert(V != nullptr && "Null pointer of Value???");
00121   assert(Depth <= 6 && "Limit Search Depth");
00122   uint32_t BitWidth = DemandedMask.getBitWidth();
00123   Type *VTy = V->getType();
00124   assert(
00125       (!VTy->isIntOrIntVectorTy() || VTy->getScalarSizeInBits() == BitWidth) &&
00126       KnownZero.getBitWidth() == BitWidth &&
00127       KnownOne.getBitWidth() == BitWidth &&
00128       "Value *V, DemandedMask, KnownZero and KnownOne "
00129       "must have same BitWidth");
00130   if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
00131     // We know all of the bits for a constant!
00132     KnownOne = CI->getValue() & DemandedMask;
00133     KnownZero = ~KnownOne & DemandedMask;
00134     return nullptr;
00135   }
00136   if (isa<ConstantPointerNull>(V)) {
00137     // We know all of the bits for a constant!
00138     KnownOne.clearAllBits();
00139     KnownZero = DemandedMask;
00140     return nullptr;
00141   }
00142 
00143   KnownZero.clearAllBits();
00144   KnownOne.clearAllBits();
00145   if (DemandedMask == 0) {   // Not demanding any bits from V.
00146     if (isa<UndefValue>(V))
00147       return nullptr;
00148     return UndefValue::get(VTy);
00149   }
00150 
00151   if (Depth == 6)        // Limit search depth.
00152     return nullptr;
00153 
00154   APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
00155   APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
00156 
00157   Instruction *I = dyn_cast<Instruction>(V);
00158   if (!I) {
00159     computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI);
00160     return nullptr;        // Only analyze instructions.
00161   }
00162 
00163   // If there are multiple uses of this value and we aren't at the root, then
00164   // we can't do any simplifications of the operands, because DemandedMask
00165   // only reflects the bits demanded by *one* of the users.
00166   if (Depth != 0 && !I->hasOneUse()) {
00167     // Despite the fact that we can't simplify this instruction in all User's
00168     // context, we can at least compute the knownzero/knownone bits, and we can
00169     // do simplifications that apply to *just* the one user if we know that
00170     // this instruction has a simpler value in that context.
00171     if (I->getOpcode() == Instruction::And) {
00172       // If either the LHS or the RHS are Zero, the result is zero.
00173       computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
00174                        CxtI);
00175       computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
00176                        CxtI);
00177 
00178       // If all of the demanded bits are known 1 on one side, return the other.
00179       // These bits cannot contribute to the result of the 'and' in this
00180       // context.
00181       if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
00182           (DemandedMask & ~LHSKnownZero))
00183         return I->getOperand(0);
00184       if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
00185           (DemandedMask & ~RHSKnownZero))
00186         return I->getOperand(1);
00187 
00188       // If all of the demanded bits in the inputs are known zeros, return zero.
00189       if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
00190         return Constant::getNullValue(VTy);
00191 
00192     } else if (I->getOpcode() == Instruction::Or) {
00193       // We can simplify (X|Y) -> X or Y in the user's context if we know that
00194       // only bits from X or Y are demanded.
00195 
00196       // If either the LHS or the RHS are One, the result is One.
00197       computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
00198                        CxtI);
00199       computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
00200                        CxtI);
00201 
00202       // If all of the demanded bits are known zero on one side, return the
00203       // other.  These bits cannot contribute to the result of the 'or' in this
00204       // context.
00205       if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
00206           (DemandedMask & ~LHSKnownOne))
00207         return I->getOperand(0);
00208       if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
00209           (DemandedMask & ~RHSKnownOne))
00210         return I->getOperand(1);
00211 
00212       // If all of the potentially set bits on one side are known to be set on
00213       // the other side, just use the 'other' side.
00214       if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
00215           (DemandedMask & (~RHSKnownZero)))
00216         return I->getOperand(0);
00217       if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
00218           (DemandedMask & (~LHSKnownZero)))
00219         return I->getOperand(1);
00220     } else if (I->getOpcode() == Instruction::Xor) {
00221       // We can simplify (X^Y) -> X or Y in the user's context if we know that
00222       // only bits from X or Y are demanded.
00223 
00224       computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
00225                        CxtI);
00226       computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
00227                        CxtI);
00228 
00229       // If all of the demanded bits are known zero on one side, return the
00230       // other.
00231       if ((DemandedMask & RHSKnownZero) == DemandedMask)
00232         return I->getOperand(0);
00233       if ((DemandedMask & LHSKnownZero) == DemandedMask)
00234         return I->getOperand(1);
00235     }
00236 
00237     // Compute the KnownZero/KnownOne bits to simplify things downstream.
00238     computeKnownBits(I, KnownZero, KnownOne, Depth, CxtI);
00239     return nullptr;
00240   }
00241 
00242   // If this is the root being simplified, allow it to have multiple uses,
00243   // just set the DemandedMask to all bits so that we can try to simplify the
00244   // operands.  This allows visitTruncInst (for example) to simplify the
00245   // operand of a trunc without duplicating all the logic below.
00246   if (Depth == 0 && !V->hasOneUse())
00247     DemandedMask = APInt::getAllOnesValue(BitWidth);
00248 
00249   switch (I->getOpcode()) {
00250   default:
00251     computeKnownBits(I, KnownZero, KnownOne, Depth, CxtI);
00252     break;
00253   case Instruction::And:
00254     // If either the LHS or the RHS are Zero, the result is zero.
00255     if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero,
00256                              RHSKnownOne, Depth + 1) ||
00257         SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownZero,
00258                              LHSKnownZero, LHSKnownOne, Depth + 1))
00259       return I;
00260     assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
00261     assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
00262 
00263     // If the client is only demanding bits that we know, return the known
00264     // constant.
00265     if ((DemandedMask & ((RHSKnownZero | LHSKnownZero)|
00266                          (RHSKnownOne & LHSKnownOne))) == DemandedMask)
00267       return Constant::getIntegerValue(VTy, RHSKnownOne & LHSKnownOne);
00268 
00269     // If all of the demanded bits are known 1 on one side, return the other.
00270     // These bits cannot contribute to the result of the 'and'.
00271     if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
00272         (DemandedMask & ~LHSKnownZero))
00273       return I->getOperand(0);
00274     if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
00275         (DemandedMask & ~RHSKnownZero))
00276       return I->getOperand(1);
00277 
00278     // If all of the demanded bits in the inputs are known zeros, return zero.
00279     if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
00280       return Constant::getNullValue(VTy);
00281 
00282     // If the RHS is a constant, see if we can simplify it.
00283     if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero))
00284       return I;
00285 
00286     // Output known-1 bits are only known if set in both the LHS & RHS.
00287     KnownOne = RHSKnownOne & LHSKnownOne;
00288     // Output known-0 are known to be clear if zero in either the LHS | RHS.
00289     KnownZero = RHSKnownZero | LHSKnownZero;
00290     break;
00291   case Instruction::Or:
00292     // If either the LHS or the RHS are One, the result is One.
00293     if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero,
00294                              RHSKnownOne, Depth + 1) ||
00295         SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne,
00296                              LHSKnownZero, LHSKnownOne, Depth + 1))
00297       return I;
00298     assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
00299     assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
00300 
00301     // If the client is only demanding bits that we know, return the known
00302     // constant.
00303     if ((DemandedMask & ((RHSKnownZero & LHSKnownZero)|
00304                          (RHSKnownOne | LHSKnownOne))) == DemandedMask)
00305       return Constant::getIntegerValue(VTy, RHSKnownOne | LHSKnownOne);
00306 
00307     // If all of the demanded bits are known zero on one side, return the other.
00308     // These bits cannot contribute to the result of the 'or'.
00309     if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
00310         (DemandedMask & ~LHSKnownOne))
00311       return I->getOperand(0);
00312     if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
00313         (DemandedMask & ~RHSKnownOne))
00314       return I->getOperand(1);
00315 
00316     // If all of the potentially set bits on one side are known to be set on
00317     // the other side, just use the 'other' side.
00318     if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
00319         (DemandedMask & (~RHSKnownZero)))
00320       return I->getOperand(0);
00321     if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
00322         (DemandedMask & (~LHSKnownZero)))
00323       return I->getOperand(1);
00324 
00325     // If the RHS is a constant, see if we can simplify it.
00326     if (ShrinkDemandedConstant(I, 1, DemandedMask))
00327       return I;
00328 
00329     // Output known-0 bits are only known if clear in both the LHS & RHS.
00330     KnownZero = RHSKnownZero & LHSKnownZero;
00331     // Output known-1 are known to be set if set in either the LHS | RHS.
00332     KnownOne = RHSKnownOne | LHSKnownOne;
00333     break;
00334   case Instruction::Xor: {
00335     if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero,
00336                              RHSKnownOne, Depth + 1) ||
00337         SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, LHSKnownZero,
00338                              LHSKnownOne, Depth + 1))
00339       return I;
00340     assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
00341     assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
00342 
00343     // Output known-0 bits are known if clear or set in both the LHS & RHS.
00344     APInt IKnownZero = (RHSKnownZero & LHSKnownZero) |
00345                        (RHSKnownOne & LHSKnownOne);
00346     // Output known-1 are known to be set if set in only one of the LHS, RHS.
00347     APInt IKnownOne =  (RHSKnownZero & LHSKnownOne) |
00348                        (RHSKnownOne & LHSKnownZero);
00349 
00350     // If the client is only demanding bits that we know, return the known
00351     // constant.
00352     if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask)
00353       return Constant::getIntegerValue(VTy, IKnownOne);
00354 
00355     // If all of the demanded bits are known zero on one side, return the other.
00356     // These bits cannot contribute to the result of the 'xor'.
00357     if ((DemandedMask & RHSKnownZero) == DemandedMask)
00358       return I->getOperand(0);
00359     if ((DemandedMask & LHSKnownZero) == DemandedMask)
00360       return I->getOperand(1);
00361 
00362     // If all of the demanded bits are known to be zero on one side or the
00363     // other, turn this into an *inclusive* or.
00364     //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
00365     if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) {
00366       Instruction *Or =
00367         BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
00368                                  I->getName());
00369       return InsertNewInstWith(Or, *I);
00370     }
00371 
00372     // If all of the demanded bits on one side are known, and all of the set
00373     // bits on that side are also known to be set on the other side, turn this
00374     // into an AND, as we know the bits will be cleared.
00375     //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
00376     if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) {
00377       // all known
00378       if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) {
00379         Constant *AndC = Constant::getIntegerValue(VTy,
00380                                                    ~RHSKnownOne & DemandedMask);
00381         Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
00382         return InsertNewInstWith(And, *I);
00383       }
00384     }
00385 
00386     // If the RHS is a constant, see if we can simplify it.
00387     // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1.
00388     if (ShrinkDemandedConstant(I, 1, DemandedMask))
00389       return I;
00390 
00391     // If our LHS is an 'and' and if it has one use, and if any of the bits we
00392     // are flipping are known to be set, then the xor is just resetting those
00393     // bits to zero.  We can just knock out bits from the 'and' and the 'xor',
00394     // simplifying both of them.
00395     if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0)))
00396       if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() &&
00397           isa<ConstantInt>(I->getOperand(1)) &&
00398           isa<ConstantInt>(LHSInst->getOperand(1)) &&
00399           (LHSKnownOne & RHSKnownOne & DemandedMask) != 0) {
00400         ConstantInt *AndRHS = cast<ConstantInt>(LHSInst->getOperand(1));
00401         ConstantInt *XorRHS = cast<ConstantInt>(I->getOperand(1));
00402         APInt NewMask = ~(LHSKnownOne & RHSKnownOne & DemandedMask);
00403 
00404         Constant *AndC =
00405           ConstantInt::get(I->getType(), NewMask & AndRHS->getValue());
00406         Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
00407         InsertNewInstWith(NewAnd, *I);
00408 
00409         Constant *XorC =
00410           ConstantInt::get(I->getType(), NewMask & XorRHS->getValue());
00411         Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC);
00412         return InsertNewInstWith(NewXor, *I);
00413       }
00414 
00415     // Output known-0 bits are known if clear or set in both the LHS & RHS.
00416     KnownZero= (RHSKnownZero & LHSKnownZero) | (RHSKnownOne & LHSKnownOne);
00417     // Output known-1 are known to be set if set in only one of the LHS, RHS.
00418     KnownOne = (RHSKnownZero & LHSKnownOne) | (RHSKnownOne & LHSKnownZero);
00419     break;
00420   }
00421   case Instruction::Select:
00422     if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask, RHSKnownZero,
00423                              RHSKnownOne, Depth + 1) ||
00424         SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, LHSKnownZero,
00425                              LHSKnownOne, Depth + 1))
00426       return I;
00427     assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
00428     assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
00429 
00430     // If the operands are constants, see if we can simplify them.
00431     if (ShrinkDemandedConstant(I, 1, DemandedMask) ||
00432         ShrinkDemandedConstant(I, 2, DemandedMask))
00433       return I;
00434 
00435     // Only known if known in both the LHS and RHS.
00436     KnownOne = RHSKnownOne & LHSKnownOne;
00437     KnownZero = RHSKnownZero & LHSKnownZero;
00438     break;
00439   case Instruction::Trunc: {
00440     unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits();
00441     DemandedMask = DemandedMask.zext(truncBf);
00442     KnownZero = KnownZero.zext(truncBf);
00443     KnownOne = KnownOne.zext(truncBf);
00444     if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero,
00445                              KnownOne, Depth + 1))
00446       return I;
00447     DemandedMask = DemandedMask.trunc(BitWidth);
00448     KnownZero = KnownZero.trunc(BitWidth);
00449     KnownOne = KnownOne.trunc(BitWidth);
00450     assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
00451     break;
00452   }
00453   case Instruction::BitCast:
00454     if (!I->getOperand(0)->getType()->isIntOrIntVectorTy())
00455       return nullptr;  // vector->int or fp->int?
00456 
00457     if (VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) {
00458       if (VectorType *SrcVTy =
00459             dyn_cast<VectorType>(I->getOperand(0)->getType())) {
00460         if (DstVTy->getNumElements() != SrcVTy->getNumElements())
00461           // Don't touch a bitcast between vectors of different element counts.
00462           return nullptr;
00463       } else
00464         // Don't touch a scalar-to-vector bitcast.
00465         return nullptr;
00466     } else if (I->getOperand(0)->getType()->isVectorTy())
00467       // Don't touch a vector-to-scalar bitcast.
00468       return nullptr;
00469 
00470     if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero,
00471                              KnownOne, Depth + 1))
00472       return I;
00473     assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
00474     break;
00475   case Instruction::ZExt: {
00476     // Compute the bits in the result that are not present in the input.
00477     unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
00478 
00479     DemandedMask = DemandedMask.trunc(SrcBitWidth);
00480     KnownZero = KnownZero.trunc(SrcBitWidth);
00481     KnownOne = KnownOne.trunc(SrcBitWidth);
00482     if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero,
00483                              KnownOne, Depth + 1))
00484       return I;
00485     DemandedMask = DemandedMask.zext(BitWidth);
00486     KnownZero = KnownZero.zext(BitWidth);
00487     KnownOne = KnownOne.zext(BitWidth);
00488     assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
00489     // The top bits are known to be zero.
00490     KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
00491     break;
00492   }
00493   case Instruction::SExt: {
00494     // Compute the bits in the result that are not present in the input.
00495     unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
00496 
00497     APInt InputDemandedBits = DemandedMask &
00498                               APInt::getLowBitsSet(BitWidth, SrcBitWidth);
00499 
00500     APInt NewBits(APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth));
00501     // If any of the sign extended bits are demanded, we know that the sign
00502     // bit is demanded.
00503     if ((NewBits & DemandedMask) != 0)
00504       InputDemandedBits.setBit(SrcBitWidth-1);
00505 
00506     InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth);
00507     KnownZero = KnownZero.trunc(SrcBitWidth);
00508     KnownOne = KnownOne.trunc(SrcBitWidth);
00509     if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits, KnownZero,
00510                              KnownOne, Depth + 1))
00511       return I;
00512     InputDemandedBits = InputDemandedBits.zext(BitWidth);
00513     KnownZero = KnownZero.zext(BitWidth);
00514     KnownOne = KnownOne.zext(BitWidth);
00515     assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
00516 
00517     // If the sign bit of the input is known set or clear, then we know the
00518     // top bits of the result.
00519 
00520     // If the input sign bit is known zero, or if the NewBits are not demanded
00521     // convert this into a zero extension.
00522     if (KnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) {
00523       // Convert to ZExt cast
00524       CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName());
00525       return InsertNewInstWith(NewCast, *I);
00526     } else if (KnownOne[SrcBitWidth-1]) {    // Input sign bit known set
00527       KnownOne |= NewBits;
00528     }
00529     break;
00530   }
00531   case Instruction::Add: {
00532     // Figure out what the input bits are.  If the top bits of the and result
00533     // are not demanded, then the add doesn't demand them from its input
00534     // either.
00535     unsigned NLZ = DemandedMask.countLeadingZeros();
00536 
00537     // If there is a constant on the RHS, there are a variety of xformations
00538     // we can do.
00539     if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
00540       // If null, this should be simplified elsewhere.  Some of the xforms here
00541       // won't work if the RHS is zero.
00542       if (RHS->isZero())
00543         break;
00544 
00545       // If the top bit of the output is demanded, demand everything from the
00546       // input.  Otherwise, we demand all the input bits except NLZ top bits.
00547       APInt InDemandedBits(APInt::getLowBitsSet(BitWidth, BitWidth - NLZ));
00548 
00549       // Find information about known zero/one bits in the input.
00550       if (SimplifyDemandedBits(I->getOperandUse(0), InDemandedBits,
00551                                LHSKnownZero, LHSKnownOne, Depth + 1))
00552         return I;
00553 
00554       // If the RHS of the add has bits set that can't affect the input, reduce
00555       // the constant.
00556       if (ShrinkDemandedConstant(I, 1, InDemandedBits))
00557         return I;
00558 
00559       // Avoid excess work.
00560       if (LHSKnownZero == 0 && LHSKnownOne == 0)
00561         break;
00562 
00563       // Turn it into OR if input bits are zero.
00564       if ((LHSKnownZero & RHS->getValue()) == RHS->getValue()) {
00565         Instruction *Or =
00566           BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
00567                                    I->getName());
00568         return InsertNewInstWith(Or, *I);
00569       }
00570 
00571       // We can say something about the output known-zero and known-one bits,
00572       // depending on potential carries from the input constant and the
00573       // unknowns.  For example if the LHS is known to have at most the 0x0F0F0
00574       // bits set and the RHS constant is 0x01001, then we know we have a known
00575       // one mask of 0x00001 and a known zero mask of 0xE0F0E.
00576 
00577       // To compute this, we first compute the potential carry bits.  These are
00578       // the bits which may be modified.  I'm not aware of a better way to do
00579       // this scan.
00580       const APInt &RHSVal = RHS->getValue();
00581       APInt CarryBits((~LHSKnownZero + RHSVal) ^ (~LHSKnownZero ^ RHSVal));
00582 
00583       // Now that we know which bits have carries, compute the known-1/0 sets.
00584 
00585       // Bits are known one if they are known zero in one operand and one in the
00586       // other, and there is no input carry.
00587       KnownOne = ((LHSKnownZero & RHSVal) |
00588                   (LHSKnownOne & ~RHSVal)) & ~CarryBits;
00589 
00590       // Bits are known zero if they are known zero in both operands and there
00591       // is no input carry.
00592       KnownZero = LHSKnownZero & ~RHSVal & ~CarryBits;
00593     } else {
00594       // If the high-bits of this ADD are not demanded, then it does not demand
00595       // the high bits of its LHS or RHS.
00596       if (DemandedMask[BitWidth-1] == 0) {
00597         // Right fill the mask of bits for this ADD to demand the most
00598         // significant bit and all those below it.
00599         APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
00600         if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
00601                                  LHSKnownZero, LHSKnownOne, Depth + 1) ||
00602             SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
00603                                  LHSKnownZero, LHSKnownOne, Depth + 1)) {
00604           cast<BinaryOperator>(I)->setHasNoSignedWrap(false);
00605           cast<BinaryOperator>(I)->setHasNoUnsignedWrap(false);
00606           return I;
00607         }
00608       }
00609     }
00610     break;
00611   }
00612   case Instruction::Sub:
00613     // If the high-bits of this SUB are not demanded, then it does not demand
00614     // the high bits of its LHS or RHS.
00615     if (DemandedMask[BitWidth-1] == 0) {
00616       // Right fill the mask of bits for this SUB to demand the most
00617       // significant bit and all those below it.
00618       uint32_t NLZ = DemandedMask.countLeadingZeros();
00619       APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
00620       if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
00621                                LHSKnownZero, LHSKnownOne, Depth + 1) ||
00622           SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
00623                                LHSKnownZero, LHSKnownOne, Depth + 1)) {
00624         cast<BinaryOperator>(I)->setHasNoSignedWrap(false);
00625         cast<BinaryOperator>(I)->setHasNoUnsignedWrap(false);
00626         return I;
00627       }
00628     }
00629 
00630     // Otherwise just hand the sub off to computeKnownBits to fill in
00631     // the known zeros and ones.
00632     computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI);
00633 
00634     // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known
00635     // zero.
00636     if (ConstantInt *C0 = dyn_cast<ConstantInt>(I->getOperand(0))) {
00637       APInt I0 = C0->getValue();
00638       if ((I0 + 1).isPowerOf2() && (I0 | KnownZero).isAllOnesValue()) {
00639         Instruction *Xor = BinaryOperator::CreateXor(I->getOperand(1), C0);
00640         return InsertNewInstWith(Xor, *I);
00641       }
00642     }
00643     break;
00644   case Instruction::Shl:
00645     if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
00646       {
00647         Value *VarX; ConstantInt *C1;
00648         if (match(I->getOperand(0), m_Shr(m_Value(VarX), m_ConstantInt(C1)))) {
00649           Instruction *Shr = cast<Instruction>(I->getOperand(0));
00650           Value *R = SimplifyShrShlDemandedBits(Shr, I, DemandedMask,
00651                                                 KnownZero, KnownOne);
00652           if (R)
00653             return R;
00654         }
00655       }
00656 
00657       uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
00658       APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt));
00659 
00660       // If the shift is NUW/NSW, then it does demand the high bits.
00661       ShlOperator *IOp = cast<ShlOperator>(I);
00662       if (IOp->hasNoSignedWrap())
00663         DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1);
00664       else if (IOp->hasNoUnsignedWrap())
00665         DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
00666 
00667       if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero,
00668                                KnownOne, Depth + 1))
00669         return I;
00670       assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
00671       KnownZero <<= ShiftAmt;
00672       KnownOne  <<= ShiftAmt;
00673       // low bits known zero.
00674       if (ShiftAmt)
00675         KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
00676     }
00677     break;
00678   case Instruction::LShr:
00679     // For a logical shift right
00680     if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
00681       uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
00682 
00683       // Unsigned shift right.
00684       APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
00685 
00686       // If the shift is exact, then it does demand the low bits (and knows that
00687       // they are zero).
00688       if (cast<LShrOperator>(I)->isExact())
00689         DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
00690 
00691       if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero,
00692                                KnownOne, Depth + 1))
00693         return I;
00694       assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
00695       KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
00696       KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
00697       if (ShiftAmt) {
00698         // Compute the new bits that are at the top now.
00699         APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
00700         KnownZero |= HighBits;  // high bits known zero.
00701       }
00702     }
00703     break;
00704   case Instruction::AShr:
00705     // If this is an arithmetic shift right and only the low-bit is set, we can
00706     // always convert this into a logical shr, even if the shift amount is
00707     // variable.  The low bit of the shift cannot be an input sign bit unless
00708     // the shift amount is >= the size of the datatype, which is undefined.
00709     if (DemandedMask == 1) {
00710       // Perform the logical shift right.
00711       Instruction *NewVal = BinaryOperator::CreateLShr(
00712                         I->getOperand(0), I->getOperand(1), I->getName());
00713       return InsertNewInstWith(NewVal, *I);
00714     }
00715 
00716     // If the sign bit is the only bit demanded by this ashr, then there is no
00717     // need to do it, the shift doesn't change the high bit.
00718     if (DemandedMask.isSignBit())
00719       return I->getOperand(0);
00720 
00721     if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
00722       uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
00723 
00724       // Signed shift right.
00725       APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
00726       // If any of the "high bits" are demanded, we should set the sign bit as
00727       // demanded.
00728       if (DemandedMask.countLeadingZeros() <= ShiftAmt)
00729         DemandedMaskIn.setBit(BitWidth-1);
00730 
00731       // If the shift is exact, then it does demand the low bits (and knows that
00732       // they are zero).
00733       if (cast<AShrOperator>(I)->isExact())
00734         DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
00735 
00736       if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero,
00737                                KnownOne, Depth + 1))
00738         return I;
00739       assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
00740       // Compute the new bits that are at the top now.
00741       APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
00742       KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
00743       KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
00744 
00745       // Handle the sign bits.
00746       APInt SignBit(APInt::getSignBit(BitWidth));
00747       // Adjust to where it is now in the mask.
00748       SignBit = APIntOps::lshr(SignBit, ShiftAmt);
00749 
00750       // If the input sign bit is known to be zero, or if none of the top bits
00751       // are demanded, turn this into an unsigned shift right.
00752       if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] ||
00753           (HighBits & ~DemandedMask) == HighBits) {
00754         // Perform the logical shift right.
00755         BinaryOperator *NewVal = BinaryOperator::CreateLShr(I->getOperand(0),
00756                                                             SA, I->getName());
00757         NewVal->setIsExact(cast<BinaryOperator>(I)->isExact());
00758         return InsertNewInstWith(NewVal, *I);
00759       } else if ((KnownOne & SignBit) != 0) { // New bits are known one.
00760         KnownOne |= HighBits;
00761       }
00762     }
00763     break;
00764   case Instruction::SRem:
00765     if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
00766       // X % -1 demands all the bits because we don't want to introduce
00767       // INT_MIN % -1 (== undef) by accident.
00768       if (Rem->isAllOnesValue())
00769         break;
00770       APInt RA = Rem->getValue().abs();
00771       if (RA.isPowerOf2()) {
00772         if (DemandedMask.ult(RA))    // srem won't affect demanded bits
00773           return I->getOperand(0);
00774 
00775         APInt LowBits = RA - 1;
00776         APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
00777         if (SimplifyDemandedBits(I->getOperandUse(0), Mask2, LHSKnownZero,
00778                                  LHSKnownOne, Depth + 1))
00779           return I;
00780 
00781         // The low bits of LHS are unchanged by the srem.
00782         KnownZero = LHSKnownZero & LowBits;
00783         KnownOne = LHSKnownOne & LowBits;
00784 
00785         // If LHS is non-negative or has all low bits zero, then the upper bits
00786         // are all zero.
00787         if (LHSKnownZero[BitWidth-1] || ((LHSKnownZero & LowBits) == LowBits))
00788           KnownZero |= ~LowBits;
00789 
00790         // If LHS is negative and not all low bits are zero, then the upper bits
00791         // are all one.
00792         if (LHSKnownOne[BitWidth-1] && ((LHSKnownOne & LowBits) != 0))
00793           KnownOne |= ~LowBits;
00794 
00795         assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
00796       }
00797     }
00798 
00799     // The sign bit is the LHS's sign bit, except when the result of the
00800     // remainder is zero.
00801     if (DemandedMask.isNegative() && KnownZero.isNonNegative()) {
00802       APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
00803       computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
00804                        CxtI);
00805       // If it's known zero, our sign bit is also zero.
00806       if (LHSKnownZero.isNegative())
00807         KnownZero.setBit(KnownZero.getBitWidth() - 1);
00808     }
00809     break;
00810   case Instruction::URem: {
00811     APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
00812     APInt AllOnes = APInt::getAllOnesValue(BitWidth);
00813     if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes, KnownZero2,
00814                              KnownOne2, Depth + 1) ||
00815         SimplifyDemandedBits(I->getOperandUse(1), AllOnes, KnownZero2,
00816                              KnownOne2, Depth + 1))
00817       return I;
00818 
00819     unsigned Leaders = KnownZero2.countLeadingOnes();
00820     Leaders = std::max(Leaders,
00821                        KnownZero2.countLeadingOnes());
00822     KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask;
00823     break;
00824   }
00825   case Instruction::Call:
00826     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
00827       switch (II->getIntrinsicID()) {
00828       default: break;
00829       case Intrinsic::bswap: {
00830         // If the only bits demanded come from one byte of the bswap result,
00831         // just shift the input byte into position to eliminate the bswap.
00832         unsigned NLZ = DemandedMask.countLeadingZeros();
00833         unsigned NTZ = DemandedMask.countTrailingZeros();
00834 
00835         // Round NTZ down to the next byte.  If we have 11 trailing zeros, then
00836         // we need all the bits down to bit 8.  Likewise, round NLZ.  If we
00837         // have 14 leading zeros, round to 8.
00838         NLZ &= ~7;
00839         NTZ &= ~7;
00840         // If we need exactly one byte, we can do this transformation.
00841         if (BitWidth-NLZ-NTZ == 8) {
00842           unsigned ResultBit = NTZ;
00843           unsigned InputBit = BitWidth-NTZ-8;
00844 
00845           // Replace this with either a left or right shift to get the byte into
00846           // the right place.
00847           Instruction *NewVal;
00848           if (InputBit > ResultBit)
00849             NewVal = BinaryOperator::CreateLShr(II->getArgOperand(0),
00850                     ConstantInt::get(I->getType(), InputBit-ResultBit));
00851           else
00852             NewVal = BinaryOperator::CreateShl(II->getArgOperand(0),
00853                     ConstantInt::get(I->getType(), ResultBit-InputBit));
00854           NewVal->takeName(I);
00855           return InsertNewInstWith(NewVal, *I);
00856         }
00857 
00858         // TODO: Could compute known zero/one bits based on the input.
00859         break;
00860       }
00861       case Intrinsic::x86_sse42_crc32_64_64:
00862         KnownZero = APInt::getHighBitsSet(64, 32);
00863         return nullptr;
00864       }
00865     }
00866     computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI);
00867     break;
00868   }
00869 
00870   // If the client is only demanding bits that we know, return the known
00871   // constant.
00872   if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask)
00873     return Constant::getIntegerValue(VTy, KnownOne);
00874   return nullptr;
00875 }
00876 
00877 /// Helper routine of SimplifyDemandedUseBits. It tries to simplify
00878 /// "E1 = (X lsr C1) << C2", where the C1 and C2 are constant, into
00879 /// "E2 = X << (C2 - C1)" or "E2 = X >> (C1 - C2)", depending on the sign
00880 /// of "C2-C1".
00881 ///
00882 /// Suppose E1 and E2 are generally different in bits S={bm, bm+1,
00883 /// ..., bn}, without considering the specific value X is holding.
00884 /// This transformation is legal iff one of following conditions is hold:
00885 ///  1) All the bit in S are 0, in this case E1 == E2.
00886 ///  2) We don't care those bits in S, per the input DemandedMask.
00887 ///  3) Combination of 1) and 2). Some bits in S are 0, and we don't care the
00888 ///     rest bits.
00889 ///
00890 /// Currently we only test condition 2).
00891 ///
00892 /// As with SimplifyDemandedUseBits, it returns NULL if the simplification was
00893 /// not successful.
00894 Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr,
00895   Instruction *Shl, APInt DemandedMask, APInt &KnownZero, APInt &KnownOne) {
00896 
00897   const APInt &ShlOp1 = cast<ConstantInt>(Shl->getOperand(1))->getValue();
00898   const APInt &ShrOp1 = cast<ConstantInt>(Shr->getOperand(1))->getValue();
00899   if (!ShlOp1 || !ShrOp1)
00900       return nullptr; // Noop.
00901 
00902   Value *VarX = Shr->getOperand(0);
00903   Type *Ty = VarX->getType();
00904   unsigned BitWidth = Ty->getIntegerBitWidth();
00905   if (ShlOp1.uge(BitWidth) || ShrOp1.uge(BitWidth))
00906     return nullptr; // Undef.
00907 
00908   unsigned ShlAmt = ShlOp1.getZExtValue();
00909   unsigned ShrAmt = ShrOp1.getZExtValue();
00910 
00911   KnownOne.clearAllBits();
00912   KnownZero = APInt::getBitsSet(KnownZero.getBitWidth(), 0, ShlAmt-1);
00913   KnownZero &= DemandedMask;
00914 
00915   APInt BitMask1(APInt::getAllOnesValue(BitWidth));
00916   APInt BitMask2(APInt::getAllOnesValue(BitWidth));
00917 
00918   bool isLshr = (Shr->getOpcode() == Instruction::LShr);
00919   BitMask1 = isLshr ? (BitMask1.lshr(ShrAmt) << ShlAmt) :
00920                       (BitMask1.ashr(ShrAmt) << ShlAmt);
00921 
00922   if (ShrAmt <= ShlAmt) {
00923     BitMask2 <<= (ShlAmt - ShrAmt);
00924   } else {
00925     BitMask2 = isLshr ? BitMask2.lshr(ShrAmt - ShlAmt):
00926                         BitMask2.ashr(ShrAmt - ShlAmt);
00927   }
00928 
00929   // Check if condition-2 (see the comment to this function) is satified.
00930   if ((BitMask1 & DemandedMask) == (BitMask2 & DemandedMask)) {
00931     if (ShrAmt == ShlAmt)
00932       return VarX;
00933 
00934     if (!Shr->hasOneUse())
00935       return nullptr;
00936 
00937     BinaryOperator *New;
00938     if (ShrAmt < ShlAmt) {
00939       Constant *Amt = ConstantInt::get(VarX->getType(), ShlAmt - ShrAmt);
00940       New = BinaryOperator::CreateShl(VarX, Amt);
00941       BinaryOperator *Orig = cast<BinaryOperator>(Shl);
00942       New->setHasNoSignedWrap(Orig->hasNoSignedWrap());
00943       New->setHasNoUnsignedWrap(Orig->hasNoUnsignedWrap());
00944     } else {
00945       Constant *Amt = ConstantInt::get(VarX->getType(), ShrAmt - ShlAmt);
00946       New = isLshr ? BinaryOperator::CreateLShr(VarX, Amt) :
00947                      BinaryOperator::CreateAShr(VarX, Amt);
00948       if (cast<BinaryOperator>(Shr)->isExact())
00949         New->setIsExact(true);
00950     }
00951 
00952     return InsertNewInstWith(New, *Shl);
00953   }
00954 
00955   return nullptr;
00956 }
00957 
00958 /// SimplifyDemandedVectorElts - The specified value produces a vector with
00959 /// any number of elements. DemandedElts contains the set of elements that are
00960 /// actually used by the caller.  This method analyzes which elements of the
00961 /// operand are undef and returns that information in UndefElts.
00962 ///
00963 /// If the information about demanded elements can be used to simplify the
00964 /// operation, the operation is simplified, then the resultant value is
00965 /// returned.  This returns null if no change was made.
00966 Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
00967                                                 APInt &UndefElts,
00968                                                 unsigned Depth) {
00969   unsigned VWidth = cast<VectorType>(V->getType())->getNumElements();
00970   APInt EltMask(APInt::getAllOnesValue(VWidth));
00971   assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!");
00972 
00973   if (isa<UndefValue>(V)) {
00974     // If the entire vector is undefined, just return this info.
00975     UndefElts = EltMask;
00976     return nullptr;
00977   }
00978 
00979   if (DemandedElts == 0) { // If nothing is demanded, provide undef.
00980     UndefElts = EltMask;
00981     return UndefValue::get(V->getType());
00982   }
00983 
00984   UndefElts = 0;
00985 
00986   // Handle ConstantAggregateZero, ConstantVector, ConstantDataSequential.
00987   if (Constant *C = dyn_cast<Constant>(V)) {
00988     // Check if this is identity. If so, return 0 since we are not simplifying
00989     // anything.
00990     if (DemandedElts.isAllOnesValue())
00991       return nullptr;
00992 
00993     Type *EltTy = cast<VectorType>(V->getType())->getElementType();
00994     Constant *Undef = UndefValue::get(EltTy);
00995 
00996     SmallVector<Constant*, 16> Elts;
00997     for (unsigned i = 0; i != VWidth; ++i) {
00998       if (!DemandedElts[i]) {   // If not demanded, set to undef.
00999         Elts.push_back(Undef);
01000         UndefElts.setBit(i);
01001         continue;
01002       }
01003 
01004       Constant *Elt = C->getAggregateElement(i);
01005       if (!Elt) return nullptr;
01006 
01007       if (isa<UndefValue>(Elt)) {   // Already undef.
01008         Elts.push_back(Undef);
01009         UndefElts.setBit(i);
01010       } else {                               // Otherwise, defined.
01011         Elts.push_back(Elt);
01012       }
01013     }
01014 
01015     // If we changed the constant, return it.
01016     Constant *NewCV = ConstantVector::get(Elts);
01017     return NewCV != C ? NewCV : nullptr;
01018   }
01019 
01020   // Limit search depth.
01021   if (Depth == 10)
01022     return nullptr;
01023 
01024   // If multiple users are using the root value, proceed with
01025   // simplification conservatively assuming that all elements
01026   // are needed.
01027   if (!V->hasOneUse()) {
01028     // Quit if we find multiple users of a non-root value though.
01029     // They'll be handled when it's their turn to be visited by
01030     // the main instcombine process.
01031     if (Depth != 0)
01032       // TODO: Just compute the UndefElts information recursively.
01033       return nullptr;
01034 
01035     // Conservatively assume that all elements are needed.
01036     DemandedElts = EltMask;
01037   }
01038 
01039   Instruction *I = dyn_cast<Instruction>(V);
01040   if (!I) return nullptr;        // Only analyze instructions.
01041 
01042   bool MadeChange = false;
01043   APInt UndefElts2(VWidth, 0);
01044   Value *TmpV;
01045   switch (I->getOpcode()) {
01046   default: break;
01047 
01048   case Instruction::InsertElement: {
01049     // If this is a variable index, we don't know which element it overwrites.
01050     // demand exactly the same input as we produce.
01051     ConstantInt *Idx = dyn_cast<ConstantInt>(I->getOperand(2));
01052     if (!Idx) {
01053       // Note that we can't propagate undef elt info, because we don't know
01054       // which elt is getting updated.
01055       TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
01056                                         UndefElts2, Depth + 1);
01057       if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
01058       break;
01059     }
01060 
01061     // If this is inserting an element that isn't demanded, remove this
01062     // insertelement.
01063     unsigned IdxNo = Idx->getZExtValue();
01064     if (IdxNo >= VWidth || !DemandedElts[IdxNo]) {
01065       Worklist.Add(I);
01066       return I->getOperand(0);
01067     }
01068 
01069     // Otherwise, the element inserted overwrites whatever was there, so the
01070     // input demanded set is simpler than the output set.
01071     APInt DemandedElts2 = DemandedElts;
01072     DemandedElts2.clearBit(IdxNo);
01073     TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2,
01074                                       UndefElts, Depth + 1);
01075     if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
01076 
01077     // The inserted element is defined.
01078     UndefElts.clearBit(IdxNo);
01079     break;
01080   }
01081   case Instruction::ShuffleVector: {
01082     ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
01083     uint64_t LHSVWidth =
01084       cast<VectorType>(Shuffle->getOperand(0)->getType())->getNumElements();
01085     APInt LeftDemanded(LHSVWidth, 0), RightDemanded(LHSVWidth, 0);
01086     for (unsigned i = 0; i < VWidth; i++) {
01087       if (DemandedElts[i]) {
01088         unsigned MaskVal = Shuffle->getMaskValue(i);
01089         if (MaskVal != -1u) {
01090           assert(MaskVal < LHSVWidth * 2 &&
01091                  "shufflevector mask index out of range!");
01092           if (MaskVal < LHSVWidth)
01093             LeftDemanded.setBit(MaskVal);
01094           else
01095             RightDemanded.setBit(MaskVal - LHSVWidth);
01096         }
01097       }
01098     }
01099 
01100     APInt UndefElts4(LHSVWidth, 0);
01101     TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded,
01102                                       UndefElts4, Depth + 1);
01103     if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
01104 
01105     APInt UndefElts3(LHSVWidth, 0);
01106     TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded,
01107                                       UndefElts3, Depth + 1);
01108     if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
01109 
01110     bool NewUndefElts = false;
01111     for (unsigned i = 0; i < VWidth; i++) {
01112       unsigned MaskVal = Shuffle->getMaskValue(i);
01113       if (MaskVal == -1u) {
01114         UndefElts.setBit(i);
01115       } else if (!DemandedElts[i]) {
01116         NewUndefElts = true;
01117         UndefElts.setBit(i);
01118       } else if (MaskVal < LHSVWidth) {
01119         if (UndefElts4[MaskVal]) {
01120           NewUndefElts = true;
01121           UndefElts.setBit(i);
01122         }
01123       } else {
01124         if (UndefElts3[MaskVal - LHSVWidth]) {
01125           NewUndefElts = true;
01126           UndefElts.setBit(i);
01127         }
01128       }
01129     }
01130 
01131     if (NewUndefElts) {
01132       // Add additional discovered undefs.
01133       SmallVector<Constant*, 16> Elts;
01134       for (unsigned i = 0; i < VWidth; ++i) {
01135         if (UndefElts[i])
01136           Elts.push_back(UndefValue::get(Type::getInt32Ty(I->getContext())));
01137         else
01138           Elts.push_back(ConstantInt::get(Type::getInt32Ty(I->getContext()),
01139                                           Shuffle->getMaskValue(i)));
01140       }
01141       I->setOperand(2, ConstantVector::get(Elts));
01142       MadeChange = true;
01143     }
01144     break;
01145   }
01146   case Instruction::Select: {
01147     APInt LeftDemanded(DemandedElts), RightDemanded(DemandedElts);
01148     if (ConstantVector* CV = dyn_cast<ConstantVector>(I->getOperand(0))) {
01149       for (unsigned i = 0; i < VWidth; i++) {
01150         if (CV->getAggregateElement(i)->isNullValue())
01151           LeftDemanded.clearBit(i);
01152         else
01153           RightDemanded.clearBit(i);
01154       }
01155     }
01156 
01157     TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded, UndefElts,
01158                                       Depth + 1);
01159     if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
01160 
01161     TmpV = SimplifyDemandedVectorElts(I->getOperand(2), RightDemanded,
01162                                       UndefElts2, Depth + 1);
01163     if (TmpV) { I->setOperand(2, TmpV); MadeChange = true; }
01164 
01165     // Output elements are undefined if both are undefined.
01166     UndefElts &= UndefElts2;
01167     break;
01168   }
01169   case Instruction::BitCast: {
01170     // Vector->vector casts only.
01171     VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());
01172     if (!VTy) break;
01173     unsigned InVWidth = VTy->getNumElements();
01174     APInt InputDemandedElts(InVWidth, 0);
01175     unsigned Ratio;
01176 
01177     if (VWidth == InVWidth) {
01178       // If we are converting from <4 x i32> -> <4 x f32>, we demand the same
01179       // elements as are demanded of us.
01180       Ratio = 1;
01181       InputDemandedElts = DemandedElts;
01182     } else if (VWidth > InVWidth) {
01183       // Untested so far.
01184       break;
01185 
01186       // If there are more elements in the result than there are in the source,
01187       // then an input element is live if any of the corresponding output
01188       // elements are live.
01189       Ratio = VWidth/InVWidth;
01190       for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
01191         if (DemandedElts[OutIdx])
01192           InputDemandedElts.setBit(OutIdx/Ratio);
01193       }
01194     } else {
01195       // Untested so far.
01196       break;
01197 
01198       // If there are more elements in the source than there are in the result,
01199       // then an input element is live if the corresponding output element is
01200       // live.
01201       Ratio = InVWidth/VWidth;
01202       for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
01203         if (DemandedElts[InIdx/Ratio])
01204           InputDemandedElts.setBit(InIdx);
01205     }
01206 
01207     // div/rem demand all inputs, because they don't want divide by zero.
01208     TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts,
01209                                       UndefElts2, Depth + 1);
01210     if (TmpV) {
01211       I->setOperand(0, TmpV);
01212       MadeChange = true;
01213     }
01214 
01215     UndefElts = UndefElts2;
01216     if (VWidth > InVWidth) {
01217       llvm_unreachable("Unimp");
01218       // If there are more elements in the result than there are in the source,
01219       // then an output element is undef if the corresponding input element is
01220       // undef.
01221       for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
01222         if (UndefElts2[OutIdx/Ratio])
01223           UndefElts.setBit(OutIdx);
01224     } else if (VWidth < InVWidth) {
01225       llvm_unreachable("Unimp");
01226       // If there are more elements in the source than there are in the result,
01227       // then a result element is undef if all of the corresponding input
01228       // elements are undef.
01229       UndefElts = ~0ULL >> (64-VWidth);  // Start out all undef.
01230       for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
01231         if (!UndefElts2[InIdx])            // Not undef?
01232           UndefElts.clearBit(InIdx/Ratio);    // Clear undef bit.
01233     }
01234     break;
01235   }
01236   case Instruction::And:
01237   case Instruction::Or:
01238   case Instruction::Xor:
01239   case Instruction::Add:
01240   case Instruction::Sub:
01241   case Instruction::Mul:
01242     // div/rem demand all inputs, because they don't want divide by zero.
01243     TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, UndefElts,
01244                                       Depth + 1);
01245     if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
01246     TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts,
01247                                       UndefElts2, Depth + 1);
01248     if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
01249 
01250     // Output elements are undefined if both are undefined.  Consider things
01251     // like undef&0.  The result is known zero, not undef.
01252     UndefElts &= UndefElts2;
01253     break;
01254   case Instruction::FPTrunc:
01255   case Instruction::FPExt:
01256     TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, UndefElts,
01257                                       Depth + 1);
01258     if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
01259     break;
01260 
01261   case Instruction::Call: {
01262     IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
01263     if (!II) break;
01264     switch (II->getIntrinsicID()) {
01265     default: break;
01266 
01267     // Binary vector operations that work column-wise.  A dest element is a
01268     // function of the corresponding input elements from the two inputs.
01269     case Intrinsic::x86_sse_sub_ss:
01270     case Intrinsic::x86_sse_mul_ss:
01271     case Intrinsic::x86_sse_min_ss:
01272     case Intrinsic::x86_sse_max_ss:
01273     case Intrinsic::x86_sse2_sub_sd:
01274     case Intrinsic::x86_sse2_mul_sd:
01275     case Intrinsic::x86_sse2_min_sd:
01276     case Intrinsic::x86_sse2_max_sd:
01277       TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
01278                                         UndefElts, Depth + 1);
01279       if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
01280       TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
01281                                         UndefElts2, Depth + 1);
01282       if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
01283 
01284       // If only the low elt is demanded and this is a scalarizable intrinsic,
01285       // scalarize it now.
01286       if (DemandedElts == 1) {
01287         switch (II->getIntrinsicID()) {
01288         default: break;
01289         case Intrinsic::x86_sse_sub_ss:
01290         case Intrinsic::x86_sse_mul_ss:
01291         case Intrinsic::x86_sse2_sub_sd:
01292         case Intrinsic::x86_sse2_mul_sd:
01293           // TODO: Lower MIN/MAX/ABS/etc
01294           Value *LHS = II->getArgOperand(0);
01295           Value *RHS = II->getArgOperand(1);
01296           // Extract the element as scalars.
01297           LHS = InsertNewInstWith(ExtractElementInst::Create(LHS,
01298             ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
01299           RHS = InsertNewInstWith(ExtractElementInst::Create(RHS,
01300             ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
01301 
01302           switch (II->getIntrinsicID()) {
01303           default: llvm_unreachable("Case stmts out of sync!");
01304           case Intrinsic::x86_sse_sub_ss:
01305           case Intrinsic::x86_sse2_sub_sd:
01306             TmpV = InsertNewInstWith(BinaryOperator::CreateFSub(LHS, RHS,
01307                                                         II->getName()), *II);
01308             break;
01309           case Intrinsic::x86_sse_mul_ss:
01310           case Intrinsic::x86_sse2_mul_sd:
01311             TmpV = InsertNewInstWith(BinaryOperator::CreateFMul(LHS, RHS,
01312                                                          II->getName()), *II);
01313             break;
01314           }
01315 
01316           Instruction *New =
01317             InsertElementInst::Create(
01318               UndefValue::get(II->getType()), TmpV,
01319               ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U, false),
01320                                       II->getName());
01321           InsertNewInstWith(New, *II);
01322           return New;
01323         }
01324       }
01325 
01326       // Output elements are undefined if both are undefined.  Consider things
01327       // like undef&0.  The result is known zero, not undef.
01328       UndefElts &= UndefElts2;
01329       break;
01330     }
01331     break;
01332   }
01333   }
01334   return MadeChange ? I : nullptr;
01335 }