LLVM  mainline
InstCombineSimplifyDemanded.cpp
Go to the documentation of this file.
00001 //===- InstCombineSimplifyDemanded.cpp ------------------------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file contains logic for simplifying instructions based on information
00011 // about how they are used.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #include "InstCombineInternal.h"
00016 #include "llvm/IR/IntrinsicInst.h"
00017 #include "llvm/IR/PatternMatch.h"
00018 
00019 using namespace llvm;
00020 using namespace llvm::PatternMatch;
00021 
00022 #define DEBUG_TYPE "instcombine"
00023 
00024 /// ShrinkDemandedConstant - Check to see if the specified operand of the
00025 /// specified instruction is a constant integer.  If so, check to see if there
00026 /// are any bits set in the constant that are not demanded.  If so, shrink the
00027 /// constant and return true.
00028 static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
00029                                    APInt Demanded) {
00030   assert(I && "No instruction?");
00031   assert(OpNo < I->getNumOperands() && "Operand index too large");
00032 
00033   // If the operand is not a constant integer, nothing to do.
00034   ConstantInt *OpC = dyn_cast<ConstantInt>(I->getOperand(OpNo));
00035   if (!OpC) return false;
00036 
00037   // If there are no bits set that aren't demanded, nothing to do.
00038   Demanded = Demanded.zextOrTrunc(OpC->getValue().getBitWidth());
00039   if ((~Demanded & OpC->getValue()) == 0)
00040     return false;
00041 
00042   // This instruction is producing bits that are not demanded. Shrink the RHS.
00043   Demanded &= OpC->getValue();
00044   I->setOperand(OpNo, ConstantInt::get(OpC->getType(), Demanded));
00045 
00046   // If either 'nsw' or 'nuw' is set and the constant is negative,
00047   // removing *any* bits from the constant could make overflow occur.
00048   // Remove 'nsw' and 'nuw' from the instruction in this case.
00049   if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(I)) {
00050     assert(OBO->getOpcode() == Instruction::Add);
00051     if (OBO->hasNoSignedWrap() || OBO->hasNoUnsignedWrap()) {
00052       if (OpC->getValue().isNegative()) {
00053         cast<BinaryOperator>(OBO)->setHasNoSignedWrap(false);
00054         cast<BinaryOperator>(OBO)->setHasNoUnsignedWrap(false);
00055       }
00056     }
00057   }
00058 
00059   return true;
00060 }
00061 
00062 
00063 
00064 /// SimplifyDemandedInstructionBits - Inst is an integer instruction that
00065 /// SimplifyDemandedBits knows about.  See if the instruction has any
00066 /// properties that allow us to simplify its operands.
00067 bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
00068   unsigned BitWidth = Inst.getType()->getScalarSizeInBits();
00069   APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
00070   APInt DemandedMask(APInt::getAllOnesValue(BitWidth));
00071 
00072   Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, KnownZero, KnownOne,
00073                                      0, &Inst);
00074   if (!V) return false;
00075   if (V == &Inst) return true;
00076   ReplaceInstUsesWith(Inst, V);
00077   return true;
00078 }
00079 
00080 /// SimplifyDemandedBits - This form of SimplifyDemandedBits simplifies the
00081 /// specified instruction operand if possible, updating it in place.  It returns
00082 /// true if it made any change and false otherwise.
00083 bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask,
00084                                         APInt &KnownZero, APInt &KnownOne,
00085                                         unsigned Depth) {
00086   Value *NewVal =
00087       SimplifyDemandedUseBits(U.get(), DemandedMask, KnownZero, KnownOne, Depth,
00088                               dyn_cast<Instruction>(U.getUser()));
00089   if (!NewVal) return false;
00090   U = NewVal;
00091   return true;
00092 }
00093 
00094 
00095 /// SimplifyDemandedUseBits - This function attempts to replace V with a simpler
00096 /// value based on the demanded bits.  When this function is called, it is known
00097 /// that only the bits set in DemandedMask of the result of V are ever used
00098 /// downstream. Consequently, depending on the mask and V, it may be possible
00099 /// to replace V with a constant or one of its operands. In such cases, this
00100 /// function does the replacement and returns true. In all other cases, it
00101 /// returns false after analyzing the expression and setting KnownOne and known
00102 /// to be one in the expression.  KnownZero contains all the bits that are known
00103 /// to be zero in the expression. These are provided to potentially allow the
00104 /// caller (which might recursively be SimplifyDemandedBits itself) to simplify
00105 /// the expression. KnownOne and KnownZero always follow the invariant that
00106 /// KnownOne & KnownZero == 0. That is, a bit can't be both 1 and 0. Note that
00107 /// the bits in KnownOne and KnownZero may only be accurate for those bits set
00108 /// in DemandedMask. Note also that the bitwidth of V, DemandedMask, KnownZero
00109 /// and KnownOne must all be the same.
00110 ///
00111 /// This returns null if it did not change anything and it permits no
00112 /// simplification.  This returns V itself if it did some simplification of V's
00113 /// operands based on the information about what bits are demanded. This returns
00114 /// some other non-null value if it found out that V is equal to another value
00115 /// in the context where the specified bits are demanded, but not for all users.
00116 Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
00117                                              APInt &KnownZero, APInt &KnownOne,
00118                                              unsigned Depth,
00119                                              Instruction *CxtI) {
00120   assert(V != nullptr && "Null pointer of Value???");
00121   assert(Depth <= 6 && "Limit Search Depth");
00122   uint32_t BitWidth = DemandedMask.getBitWidth();
00123   Type *VTy = V->getType();
00124   assert(
00125       (!VTy->isIntOrIntVectorTy() || VTy->getScalarSizeInBits() == BitWidth) &&
00126       KnownZero.getBitWidth() == BitWidth &&
00127       KnownOne.getBitWidth() == BitWidth &&
00128       "Value *V, DemandedMask, KnownZero and KnownOne "
00129       "must have same BitWidth");
00130   if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
00131     // We know all of the bits for a constant!
00132     KnownOne = CI->getValue() & DemandedMask;
00133     KnownZero = ~KnownOne & DemandedMask;
00134     return nullptr;
00135   }
00136   if (isa<ConstantPointerNull>(V)) {
00137     // We know all of the bits for a constant!
00138     KnownOne.clearAllBits();
00139     KnownZero = DemandedMask;
00140     return nullptr;
00141   }
00142 
00143   KnownZero.clearAllBits();
00144   KnownOne.clearAllBits();
00145   if (DemandedMask == 0) {   // Not demanding any bits from V.
00146     if (isa<UndefValue>(V))
00147       return nullptr;
00148     return UndefValue::get(VTy);
00149   }
00150 
00151   if (Depth == 6)        // Limit search depth.
00152     return nullptr;
00153 
00154   APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
00155   APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
00156 
00157   Instruction *I = dyn_cast<Instruction>(V);
00158   if (!I) {
00159     computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI);
00160     return nullptr;        // Only analyze instructions.
00161   }
00162 
00163   // If there are multiple uses of this value and we aren't at the root, then
00164   // we can't do any simplifications of the operands, because DemandedMask
00165   // only reflects the bits demanded by *one* of the users.
00166   if (Depth != 0 && !I->hasOneUse()) {
00167     // Despite the fact that we can't simplify this instruction in all User's
00168     // context, we can at least compute the knownzero/knownone bits, and we can
00169     // do simplifications that apply to *just* the one user if we know that
00170     // this instruction has a simpler value in that context.
00171     if (I->getOpcode() == Instruction::And) {
00172       // If either the LHS or the RHS are Zero, the result is zero.
00173       computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
00174                        CxtI);
00175       computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
00176                        CxtI);
00177 
00178       // If all of the demanded bits are known 1 on one side, return the other.
00179       // These bits cannot contribute to the result of the 'and' in this
00180       // context.
00181       if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
00182           (DemandedMask & ~LHSKnownZero))
00183         return I->getOperand(0);
00184       if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
00185           (DemandedMask & ~RHSKnownZero))
00186         return I->getOperand(1);
00187 
00188       // If all of the demanded bits in the inputs are known zeros, return zero.
00189       if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
00190         return Constant::getNullValue(VTy);
00191 
00192     } else if (I->getOpcode() == Instruction::Or) {
00193       // We can simplify (X|Y) -> X or Y in the user's context if we know that
00194       // only bits from X or Y are demanded.
00195 
00196       // If either the LHS or the RHS are One, the result is One.
00197       computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
00198                        CxtI);
00199       computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
00200                        CxtI);
00201 
00202       // If all of the demanded bits are known zero on one side, return the
00203       // other.  These bits cannot contribute to the result of the 'or' in this
00204       // context.
00205       if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
00206           (DemandedMask & ~LHSKnownOne))
00207         return I->getOperand(0);
00208       if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
00209           (DemandedMask & ~RHSKnownOne))
00210         return I->getOperand(1);
00211 
00212       // If all of the potentially set bits on one side are known to be set on
00213       // the other side, just use the 'other' side.
00214       if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
00215           (DemandedMask & (~RHSKnownZero)))
00216         return I->getOperand(0);
00217       if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
00218           (DemandedMask & (~LHSKnownZero)))
00219         return I->getOperand(1);
00220     } else if (I->getOpcode() == Instruction::Xor) {
00221       // We can simplify (X^Y) -> X or Y in the user's context if we know that
00222       // only bits from X or Y are demanded.
00223 
00224       computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
00225                        CxtI);
00226       computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
00227                        CxtI);
00228 
00229       // If all of the demanded bits are known zero on one side, return the
00230       // other.
00231       if ((DemandedMask & RHSKnownZero) == DemandedMask)
00232         return I->getOperand(0);
00233       if ((DemandedMask & LHSKnownZero) == DemandedMask)
00234         return I->getOperand(1);
00235     }
00236 
00237     // Compute the KnownZero/KnownOne bits to simplify things downstream.
00238     computeKnownBits(I, KnownZero, KnownOne, Depth, CxtI);
00239     return nullptr;
00240   }
00241 
00242   // If this is the root being simplified, allow it to have multiple uses,
00243   // just set the DemandedMask to all bits so that we can try to simplify the
00244   // operands.  This allows visitTruncInst (for example) to simplify the
00245   // operand of a trunc without duplicating all the logic below.
00246   if (Depth == 0 && !V->hasOneUse())
00247     DemandedMask = APInt::getAllOnesValue(BitWidth);
00248 
00249   switch (I->getOpcode()) {
00250   default:
00251     computeKnownBits(I, KnownZero, KnownOne, Depth, CxtI);
00252     break;
00253   case Instruction::And:
00254     // If either the LHS or the RHS are Zero, the result is zero.
00255     if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero,
00256                              RHSKnownOne, Depth + 1) ||
00257         SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownZero,
00258                              LHSKnownZero, LHSKnownOne, Depth + 1))
00259       return I;
00260     assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
00261     assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
00262 
00263     // If the client is only demanding bits that we know, return the known
00264     // constant.
00265     if ((DemandedMask & ((RHSKnownZero | LHSKnownZero)|
00266                          (RHSKnownOne & LHSKnownOne))) == DemandedMask)
00267       return Constant::getIntegerValue(VTy, RHSKnownOne & LHSKnownOne);
00268 
00269     // If all of the demanded bits are known 1 on one side, return the other.
00270     // These bits cannot contribute to the result of the 'and'.
00271     if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
00272         (DemandedMask & ~LHSKnownZero))
00273       return I->getOperand(0);
00274     if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
00275         (DemandedMask & ~RHSKnownZero))
00276       return I->getOperand(1);
00277 
00278     // If all of the demanded bits in the inputs are known zeros, return zero.
00279     if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
00280       return Constant::getNullValue(VTy);
00281 
00282     // If the RHS is a constant, see if we can simplify it.
00283     if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero))
00284       return I;
00285 
00286     // Output known-1 bits are only known if set in both the LHS & RHS.
00287     KnownOne = RHSKnownOne & LHSKnownOne;
00288     // Output known-0 are known to be clear if zero in either the LHS | RHS.
00289     KnownZero = RHSKnownZero | LHSKnownZero;
00290     break;
00291   case Instruction::Or:
00292     // If either the LHS or the RHS are One, the result is One.
00293     if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero,
00294                              RHSKnownOne, Depth + 1) ||
00295         SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne,
00296                              LHSKnownZero, LHSKnownOne, Depth + 1))
00297       return I;
00298     assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
00299     assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
00300 
00301     // If the client is only demanding bits that we know, return the known
00302     // constant.
00303     if ((DemandedMask & ((RHSKnownZero & LHSKnownZero)|
00304                          (RHSKnownOne | LHSKnownOne))) == DemandedMask)
00305       return Constant::getIntegerValue(VTy, RHSKnownOne | LHSKnownOne);
00306 
00307     // If all of the demanded bits are known zero on one side, return the other.
00308     // These bits cannot contribute to the result of the 'or'.
00309     if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
00310         (DemandedMask & ~LHSKnownOne))
00311       return I->getOperand(0);
00312     if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
00313         (DemandedMask & ~RHSKnownOne))
00314       return I->getOperand(1);
00315 
00316     // If all of the potentially set bits on one side are known to be set on
00317     // the other side, just use the 'other' side.
00318     if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
00319         (DemandedMask & (~RHSKnownZero)))
00320       return I->getOperand(0);
00321     if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
00322         (DemandedMask & (~LHSKnownZero)))
00323       return I->getOperand(1);
00324 
00325     // If the RHS is a constant, see if we can simplify it.
00326     if (ShrinkDemandedConstant(I, 1, DemandedMask))
00327       return I;
00328 
00329     // Output known-0 bits are only known if clear in both the LHS & RHS.
00330     KnownZero = RHSKnownZero & LHSKnownZero;
00331     // Output known-1 are known to be set if set in either the LHS | RHS.
00332     KnownOne = RHSKnownOne | LHSKnownOne;
00333     break;
00334   case Instruction::Xor: {
00335     if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero,
00336                              RHSKnownOne, Depth + 1) ||
00337         SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, LHSKnownZero,
00338                              LHSKnownOne, Depth + 1))
00339       return I;
00340     assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
00341     assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
00342 
00343     // Output known-0 bits are known if clear or set in both the LHS & RHS.
00344     APInt IKnownZero = (RHSKnownZero & LHSKnownZero) |
00345                        (RHSKnownOne & LHSKnownOne);
00346     // Output known-1 are known to be set if set in only one of the LHS, RHS.
00347     APInt IKnownOne =  (RHSKnownZero & LHSKnownOne) |
00348                        (RHSKnownOne & LHSKnownZero);
00349 
00350     // If the client is only demanding bits that we know, return the known
00351     // constant.
00352     if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask)
00353       return Constant::getIntegerValue(VTy, IKnownOne);
00354 
00355     // If all of the demanded bits are known zero on one side, return the other.
00356     // These bits cannot contribute to the result of the 'xor'.
00357     if ((DemandedMask & RHSKnownZero) == DemandedMask)
00358       return I->getOperand(0);
00359     if ((DemandedMask & LHSKnownZero) == DemandedMask)
00360       return I->getOperand(1);
00361 
00362     // If all of the demanded bits are known to be zero on one side or the
00363     // other, turn this into an *inclusive* or.
00364     //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
00365     if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) {
00366       Instruction *Or =
00367         BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
00368                                  I->getName());
00369       return InsertNewInstWith(Or, *I);
00370     }
00371 
00372     // If all of the demanded bits on one side are known, and all of the set
00373     // bits on that side are also known to be set on the other side, turn this
00374     // into an AND, as we know the bits will be cleared.
00375     //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
00376     if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) {
00377       // all known
00378       if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) {
00379         Constant *AndC = Constant::getIntegerValue(VTy,
00380                                                    ~RHSKnownOne & DemandedMask);
00381         Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
00382         return InsertNewInstWith(And, *I);
00383       }
00384     }
00385 
00386     // If the RHS is a constant, see if we can simplify it.
00387     // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1.
00388     if (ShrinkDemandedConstant(I, 1, DemandedMask))
00389       return I;
00390 
00391     // If our LHS is an 'and' and if it has one use, and if any of the bits we
00392     // are flipping are known to be set, then the xor is just resetting those
00393     // bits to zero.  We can just knock out bits from the 'and' and the 'xor',
00394     // simplifying both of them.
00395     if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0)))
00396       if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() &&
00397           isa<ConstantInt>(I->getOperand(1)) &&
00398           isa<ConstantInt>(LHSInst->getOperand(1)) &&
00399           (LHSKnownOne & RHSKnownOne & DemandedMask) != 0) {
00400         ConstantInt *AndRHS = cast<ConstantInt>(LHSInst->getOperand(1));
00401         ConstantInt *XorRHS = cast<ConstantInt>(I->getOperand(1));
00402         APInt NewMask = ~(LHSKnownOne & RHSKnownOne & DemandedMask);
00403 
00404         Constant *AndC =
00405           ConstantInt::get(I->getType(), NewMask & AndRHS->getValue());
00406         Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
00407         InsertNewInstWith(NewAnd, *I);
00408 
00409         Constant *XorC =
00410           ConstantInt::get(I->getType(), NewMask & XorRHS->getValue());
00411         Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC);
00412         return InsertNewInstWith(NewXor, *I);
00413       }
00414 
00415     // Output known-0 bits are known if clear or set in both the LHS & RHS.
00416     KnownZero= (RHSKnownZero & LHSKnownZero) | (RHSKnownOne & LHSKnownOne);
00417     // Output known-1 are known to be set if set in only one of the LHS, RHS.
00418     KnownOne = (RHSKnownZero & LHSKnownOne) | (RHSKnownOne & LHSKnownZero);
00419     break;
00420   }
00421   case Instruction::Select:
00422     if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask, RHSKnownZero,
00423                              RHSKnownOne, Depth + 1) ||
00424         SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, LHSKnownZero,
00425                              LHSKnownOne, Depth + 1))
00426       return I;
00427     assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
00428     assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
00429 
00430     // If the operands are constants, see if we can simplify them.
00431     if (ShrinkDemandedConstant(I, 1, DemandedMask) ||
00432         ShrinkDemandedConstant(I, 2, DemandedMask))
00433       return I;
00434 
00435     // Only known if known in both the LHS and RHS.
00436     KnownOne = RHSKnownOne & LHSKnownOne;
00437     KnownZero = RHSKnownZero & LHSKnownZero;
00438     break;
00439   case Instruction::Trunc: {
00440     unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits();
00441     DemandedMask = DemandedMask.zext(truncBf);
00442     KnownZero = KnownZero.zext(truncBf);
00443     KnownOne = KnownOne.zext(truncBf);
00444     if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero,
00445                              KnownOne, Depth + 1))
00446       return I;
00447     DemandedMask = DemandedMask.trunc(BitWidth);
00448     KnownZero = KnownZero.trunc(BitWidth);
00449     KnownOne = KnownOne.trunc(BitWidth);
00450     assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
00451     break;
00452   }
00453   case Instruction::BitCast:
00454     if (!I->getOperand(0)->getType()->isIntOrIntVectorTy())
00455       return nullptr;  // vector->int or fp->int?
00456 
00457     if (VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) {
00458       if (VectorType *SrcVTy =
00459             dyn_cast<VectorType>(I->getOperand(0)->getType())) {
00460         if (DstVTy->getNumElements() != SrcVTy->getNumElements())
00461           // Don't touch a bitcast between vectors of different element counts.
00462           return nullptr;
00463       } else
00464         // Don't touch a scalar-to-vector bitcast.
00465         return nullptr;
00466     } else if (I->getOperand(0)->getType()->isVectorTy())
00467       // Don't touch a vector-to-scalar bitcast.
00468       return nullptr;
00469 
00470     if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero,
00471                              KnownOne, Depth + 1))
00472       return I;
00473     assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
00474     break;
00475   case Instruction::ZExt: {
00476     // Compute the bits in the result that are not present in the input.
00477     unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
00478 
00479     DemandedMask = DemandedMask.trunc(SrcBitWidth);
00480     KnownZero = KnownZero.trunc(SrcBitWidth);
00481     KnownOne = KnownOne.trunc(SrcBitWidth);
00482     if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero,
00483                              KnownOne, Depth + 1))
00484       return I;
00485     DemandedMask = DemandedMask.zext(BitWidth);
00486     KnownZero = KnownZero.zext(BitWidth);
00487     KnownOne = KnownOne.zext(BitWidth);
00488     assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
00489     // The top bits are known to be zero.
00490     KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
00491     break;
00492   }
00493   case Instruction::SExt: {
00494     // Compute the bits in the result that are not present in the input.
00495     unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
00496 
00497     APInt InputDemandedBits = DemandedMask &
00498                               APInt::getLowBitsSet(BitWidth, SrcBitWidth);
00499 
00500     APInt NewBits(APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth));
00501     // If any of the sign extended bits are demanded, we know that the sign
00502     // bit is demanded.
00503     if ((NewBits & DemandedMask) != 0)
00504       InputDemandedBits.setBit(SrcBitWidth-1);
00505 
00506     InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth);
00507     KnownZero = KnownZero.trunc(SrcBitWidth);
00508     KnownOne = KnownOne.trunc(SrcBitWidth);
00509     if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits, KnownZero,
00510                              KnownOne, Depth + 1))
00511       return I;
00512     InputDemandedBits = InputDemandedBits.zext(BitWidth);
00513     KnownZero = KnownZero.zext(BitWidth);
00514     KnownOne = KnownOne.zext(BitWidth);
00515     assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
00516 
00517     // If the sign bit of the input is known set or clear, then we know the
00518     // top bits of the result.
00519 
00520     // If the input sign bit is known zero, or if the NewBits are not demanded
00521     // convert this into a zero extension.
00522     if (KnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) {
00523       // Convert to ZExt cast
00524       CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName());
00525       return InsertNewInstWith(NewCast, *I);
00526     } else if (KnownOne[SrcBitWidth-1]) {    // Input sign bit known set
00527       KnownOne |= NewBits;
00528     }
00529     break;
00530   }
00531   case Instruction::Add: {
00532     // Figure out what the input bits are.  If the top bits of the and result
00533     // are not demanded, then the add doesn't demand them from its input
00534     // either.
00535     unsigned NLZ = DemandedMask.countLeadingZeros();
00536 
00537     // If there is a constant on the RHS, there are a variety of xformations
00538     // we can do.
00539     if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
00540       // If null, this should be simplified elsewhere.  Some of the xforms here
00541       // won't work if the RHS is zero.
00542       if (RHS->isZero())
00543         break;
00544 
00545       // If the top bit of the output is demanded, demand everything from the
00546       // input.  Otherwise, we demand all the input bits except NLZ top bits.
00547       APInt InDemandedBits(APInt::getLowBitsSet(BitWidth, BitWidth - NLZ));
00548 
00549       // Find information about known zero/one bits in the input.
00550       if (SimplifyDemandedBits(I->getOperandUse(0), InDemandedBits,
00551                                LHSKnownZero, LHSKnownOne, Depth + 1))
00552         return I;
00553 
00554       // If the RHS of the add has bits set that can't affect the input, reduce
00555       // the constant.
00556       if (ShrinkDemandedConstant(I, 1, InDemandedBits))
00557         return I;
00558 
00559       // Avoid excess work.
00560       if (LHSKnownZero == 0 && LHSKnownOne == 0)
00561         break;
00562 
00563       // Turn it into OR if input bits are zero.
00564       if ((LHSKnownZero & RHS->getValue()) == RHS->getValue()) {
00565         Instruction *Or =
00566           BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
00567                                    I->getName());
00568         return InsertNewInstWith(Or, *I);
00569       }
00570 
00571       // We can say something about the output known-zero and known-one bits,
00572       // depending on potential carries from the input constant and the
00573       // unknowns.  For example if the LHS is known to have at most the 0x0F0F0
00574       // bits set and the RHS constant is 0x01001, then we know we have a known
00575       // one mask of 0x00001 and a known zero mask of 0xE0F0E.
00576 
00577       // To compute this, we first compute the potential carry bits.  These are
00578       // the bits which may be modified.  I'm not aware of a better way to do
00579       // this scan.
00580       const APInt &RHSVal = RHS->getValue();
00581       APInt CarryBits((~LHSKnownZero + RHSVal) ^ (~LHSKnownZero ^ RHSVal));
00582 
00583       // Now that we know which bits have carries, compute the known-1/0 sets.
00584 
00585       // Bits are known one if they are known zero in one operand and one in the
00586       // other, and there is no input carry.
00587       KnownOne = ((LHSKnownZero & RHSVal) |
00588                   (LHSKnownOne & ~RHSVal)) & ~CarryBits;
00589 
00590       // Bits are known zero if they are known zero in both operands and there
00591       // is no input carry.
00592       KnownZero = LHSKnownZero & ~RHSVal & ~CarryBits;
00593     } else {
00594       // If the high-bits of this ADD are not demanded, then it does not demand
00595       // the high bits of its LHS or RHS.
00596       if (DemandedMask[BitWidth-1] == 0) {
00597         // Right fill the mask of bits for this ADD to demand the most
00598         // significant bit and all those below it.
00599         APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
00600         if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
00601                                  LHSKnownZero, LHSKnownOne, Depth + 1) ||
00602             SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
00603                                  LHSKnownZero, LHSKnownOne, Depth + 1))
00604           return I;
00605       }
00606     }
00607     break;
00608   }
00609   case Instruction::Sub:
00610     // If the high-bits of this SUB are not demanded, then it does not demand
00611     // the high bits of its LHS or RHS.
00612     if (DemandedMask[BitWidth-1] == 0) {
00613       // Right fill the mask of bits for this SUB to demand the most
00614       // significant bit and all those below it.
00615       uint32_t NLZ = DemandedMask.countLeadingZeros();
00616       APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
00617       if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
00618                                LHSKnownZero, LHSKnownOne, Depth + 1) ||
00619           SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
00620                                LHSKnownZero, LHSKnownOne, Depth + 1))
00621         return I;
00622     }
00623 
00624     // Otherwise just hand the sub off to computeKnownBits to fill in
00625     // the known zeros and ones.
00626     computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI);
00627 
00628     // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known
00629     // zero.
00630     if (ConstantInt *C0 = dyn_cast<ConstantInt>(I->getOperand(0))) {
00631       APInt I0 = C0->getValue();
00632       if ((I0 + 1).isPowerOf2() && (I0 | KnownZero).isAllOnesValue()) {
00633         Instruction *Xor = BinaryOperator::CreateXor(I->getOperand(1), C0);
00634         return InsertNewInstWith(Xor, *I);
00635       }
00636     }
00637     break;
00638   case Instruction::Shl:
00639     if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
00640       {
00641         Value *VarX; ConstantInt *C1;
00642         if (match(I->getOperand(0), m_Shr(m_Value(VarX), m_ConstantInt(C1)))) {
00643           Instruction *Shr = cast<Instruction>(I->getOperand(0));
00644           Value *R = SimplifyShrShlDemandedBits(Shr, I, DemandedMask,
00645                                                 KnownZero, KnownOne);
00646           if (R)
00647             return R;
00648         }
00649       }
00650 
00651       uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
00652       APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt));
00653 
00654       // If the shift is NUW/NSW, then it does demand the high bits.
00655       ShlOperator *IOp = cast<ShlOperator>(I);
00656       if (IOp->hasNoSignedWrap())
00657         DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1);
00658       else if (IOp->hasNoUnsignedWrap())
00659         DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
00660 
00661       if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero,
00662                                KnownOne, Depth + 1))
00663         return I;
00664       assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
00665       KnownZero <<= ShiftAmt;
00666       KnownOne  <<= ShiftAmt;
00667       // low bits known zero.
00668       if (ShiftAmt)
00669         KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
00670     }
00671     break;
00672   case Instruction::LShr:
00673     // For a logical shift right
00674     if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
00675       uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
00676 
00677       // Unsigned shift right.
00678       APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
00679 
00680       // If the shift is exact, then it does demand the low bits (and knows that
00681       // they are zero).
00682       if (cast<LShrOperator>(I)->isExact())
00683         DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
00684 
00685       if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero,
00686                                KnownOne, Depth + 1))
00687         return I;
00688       assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
00689       KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
00690       KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
00691       if (ShiftAmt) {
00692         // Compute the new bits that are at the top now.
00693         APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
00694         KnownZero |= HighBits;  // high bits known zero.
00695       }
00696     }
00697     break;
00698   case Instruction::AShr:
00699     // If this is an arithmetic shift right and only the low-bit is set, we can
00700     // always convert this into a logical shr, even if the shift amount is
00701     // variable.  The low bit of the shift cannot be an input sign bit unless
00702     // the shift amount is >= the size of the datatype, which is undefined.
00703     if (DemandedMask == 1) {
00704       // Perform the logical shift right.
00705       Instruction *NewVal = BinaryOperator::CreateLShr(
00706                         I->getOperand(0), I->getOperand(1), I->getName());
00707       return InsertNewInstWith(NewVal, *I);
00708     }
00709 
00710     // If the sign bit is the only bit demanded by this ashr, then there is no
00711     // need to do it, the shift doesn't change the high bit.
00712     if (DemandedMask.isSignBit())
00713       return I->getOperand(0);
00714 
00715     if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
00716       uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
00717 
00718       // Signed shift right.
00719       APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
00720       // If any of the "high bits" are demanded, we should set the sign bit as
00721       // demanded.
00722       if (DemandedMask.countLeadingZeros() <= ShiftAmt)
00723         DemandedMaskIn.setBit(BitWidth-1);
00724 
00725       // If the shift is exact, then it does demand the low bits (and knows that
00726       // they are zero).
00727       if (cast<AShrOperator>(I)->isExact())
00728         DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
00729 
00730       if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero,
00731                                KnownOne, Depth + 1))
00732         return I;
00733       assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
00734       // Compute the new bits that are at the top now.
00735       APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
00736       KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
00737       KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
00738 
00739       // Handle the sign bits.
00740       APInt SignBit(APInt::getSignBit(BitWidth));
00741       // Adjust to where it is now in the mask.
00742       SignBit = APIntOps::lshr(SignBit, ShiftAmt);
00743 
00744       // If the input sign bit is known to be zero, or if none of the top bits
00745       // are demanded, turn this into an unsigned shift right.
00746       if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] ||
00747           (HighBits & ~DemandedMask) == HighBits) {
00748         // Perform the logical shift right.
00749         BinaryOperator *NewVal = BinaryOperator::CreateLShr(I->getOperand(0),
00750                                                             SA, I->getName());
00751         NewVal->setIsExact(cast<BinaryOperator>(I)->isExact());
00752         return InsertNewInstWith(NewVal, *I);
00753       } else if ((KnownOne & SignBit) != 0) { // New bits are known one.
00754         KnownOne |= HighBits;
00755       }
00756     }
00757     break;
00758   case Instruction::SRem:
00759     if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
00760       // X % -1 demands all the bits because we don't want to introduce
00761       // INT_MIN % -1 (== undef) by accident.
00762       if (Rem->isAllOnesValue())
00763         break;
00764       APInt RA = Rem->getValue().abs();
00765       if (RA.isPowerOf2()) {
00766         if (DemandedMask.ult(RA))    // srem won't affect demanded bits
00767           return I->getOperand(0);
00768 
00769         APInt LowBits = RA - 1;
00770         APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
00771         if (SimplifyDemandedBits(I->getOperandUse(0), Mask2, LHSKnownZero,
00772                                  LHSKnownOne, Depth + 1))
00773           return I;
00774 
00775         // The low bits of LHS are unchanged by the srem.
00776         KnownZero = LHSKnownZero & LowBits;
00777         KnownOne = LHSKnownOne & LowBits;
00778 
00779         // If LHS is non-negative or has all low bits zero, then the upper bits
00780         // are all zero.
00781         if (LHSKnownZero[BitWidth-1] || ((LHSKnownZero & LowBits) == LowBits))
00782           KnownZero |= ~LowBits;
00783 
00784         // If LHS is negative and not all low bits are zero, then the upper bits
00785         // are all one.
00786         if (LHSKnownOne[BitWidth-1] && ((LHSKnownOne & LowBits) != 0))
00787           KnownOne |= ~LowBits;
00788 
00789         assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
00790       }
00791     }
00792 
00793     // The sign bit is the LHS's sign bit, except when the result of the
00794     // remainder is zero.
00795     if (DemandedMask.isNegative() && KnownZero.isNonNegative()) {
00796       APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
00797       computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
00798                        CxtI);
00799       // If it's known zero, our sign bit is also zero.
00800       if (LHSKnownZero.isNegative())
00801         KnownZero.setBit(KnownZero.getBitWidth() - 1);
00802     }
00803     break;
00804   case Instruction::URem: {
00805     APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
00806     APInt AllOnes = APInt::getAllOnesValue(BitWidth);
00807     if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes, KnownZero2,
00808                              KnownOne2, Depth + 1) ||
00809         SimplifyDemandedBits(I->getOperandUse(1), AllOnes, KnownZero2,
00810                              KnownOne2, Depth + 1))
00811       return I;
00812 
00813     unsigned Leaders = KnownZero2.countLeadingOnes();
00814     Leaders = std::max(Leaders,
00815                        KnownZero2.countLeadingOnes());
00816     KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask;
00817     break;
00818   }
00819   case Instruction::Call:
00820     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
00821       switch (II->getIntrinsicID()) {
00822       default: break;
00823       case Intrinsic::bswap: {
00824         // If the only bits demanded come from one byte of the bswap result,
00825         // just shift the input byte into position to eliminate the bswap.
00826         unsigned NLZ = DemandedMask.countLeadingZeros();
00827         unsigned NTZ = DemandedMask.countTrailingZeros();
00828 
00829         // Round NTZ down to the next byte.  If we have 11 trailing zeros, then
00830         // we need all the bits down to bit 8.  Likewise, round NLZ.  If we
00831         // have 14 leading zeros, round to 8.
00832         NLZ &= ~7;
00833         NTZ &= ~7;
00834         // If we need exactly one byte, we can do this transformation.
00835         if (BitWidth-NLZ-NTZ == 8) {
00836           unsigned ResultBit = NTZ;
00837           unsigned InputBit = BitWidth-NTZ-8;
00838 
00839           // Replace this with either a left or right shift to get the byte into
00840           // the right place.
00841           Instruction *NewVal;
00842           if (InputBit > ResultBit)
00843             NewVal = BinaryOperator::CreateLShr(II->getArgOperand(0),
00844                     ConstantInt::get(I->getType(), InputBit-ResultBit));
00845           else
00846             NewVal = BinaryOperator::CreateShl(II->getArgOperand(0),
00847                     ConstantInt::get(I->getType(), ResultBit-InputBit));
00848           NewVal->takeName(I);
00849           return InsertNewInstWith(NewVal, *I);
00850         }
00851 
00852         // TODO: Could compute known zero/one bits based on the input.
00853         break;
00854       }
00855       case Intrinsic::x86_sse42_crc32_64_64:
00856         KnownZero = APInt::getHighBitsSet(64, 32);
00857         return nullptr;
00858       }
00859     }
00860     computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI);
00861     break;
00862   }
00863 
00864   // If the client is only demanding bits that we know, return the known
00865   // constant.
00866   if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask)
00867     return Constant::getIntegerValue(VTy, KnownOne);
00868   return nullptr;
00869 }
00870 
00871 /// Helper routine of SimplifyDemandedUseBits. It tries to simplify
00872 /// "E1 = (X lsr C1) << C2", where the C1 and C2 are constant, into
00873 /// "E2 = X << (C2 - C1)" or "E2 = X >> (C1 - C2)", depending on the sign
00874 /// of "C2-C1".
00875 ///
00876 /// Suppose E1 and E2 are generally different in bits S={bm, bm+1,
00877 /// ..., bn}, without considering the specific value X is holding.
00878 /// This transformation is legal iff one of following conditions is hold:
00879 ///  1) All the bit in S are 0, in this case E1 == E2.
00880 ///  2) We don't care those bits in S, per the input DemandedMask.
00881 ///  3) Combination of 1) and 2). Some bits in S are 0, and we don't care the
00882 ///     rest bits.
00883 ///
00884 /// Currently we only test condition 2).
00885 ///
00886 /// As with SimplifyDemandedUseBits, it returns NULL if the simplification was
00887 /// not successful.
00888 Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr,
00889   Instruction *Shl, APInt DemandedMask, APInt &KnownZero, APInt &KnownOne) {
00890 
00891   const APInt &ShlOp1 = cast<ConstantInt>(Shl->getOperand(1))->getValue();
00892   const APInt &ShrOp1 = cast<ConstantInt>(Shr->getOperand(1))->getValue();
00893   if (!ShlOp1 || !ShrOp1)
00894       return nullptr; // Noop.
00895 
00896   Value *VarX = Shr->getOperand(0);
00897   Type *Ty = VarX->getType();
00898   unsigned BitWidth = Ty->getIntegerBitWidth();
00899   if (ShlOp1.uge(BitWidth) || ShrOp1.uge(BitWidth))
00900     return nullptr; // Undef.
00901 
00902   unsigned ShlAmt = ShlOp1.getZExtValue();
00903   unsigned ShrAmt = ShrOp1.getZExtValue();
00904 
00905   KnownOne.clearAllBits();
00906   KnownZero = APInt::getBitsSet(KnownZero.getBitWidth(), 0, ShlAmt-1);
00907   KnownZero &= DemandedMask;
00908 
00909   APInt BitMask1(APInt::getAllOnesValue(BitWidth));
00910   APInt BitMask2(APInt::getAllOnesValue(BitWidth));
00911 
00912   bool isLshr = (Shr->getOpcode() == Instruction::LShr);
00913   BitMask1 = isLshr ? (BitMask1.lshr(ShrAmt) << ShlAmt) :
00914                       (BitMask1.ashr(ShrAmt) << ShlAmt);
00915 
00916   if (ShrAmt <= ShlAmt) {
00917     BitMask2 <<= (ShlAmt - ShrAmt);
00918   } else {
00919     BitMask2 = isLshr ? BitMask2.lshr(ShrAmt - ShlAmt):
00920                         BitMask2.ashr(ShrAmt - ShlAmt);
00921   }
00922 
00923   // Check if condition-2 (see the comment to this function) is satified.
00924   if ((BitMask1 & DemandedMask) == (BitMask2 & DemandedMask)) {
00925     if (ShrAmt == ShlAmt)
00926       return VarX;
00927 
00928     if (!Shr->hasOneUse())
00929       return nullptr;
00930 
00931     BinaryOperator *New;
00932     if (ShrAmt < ShlAmt) {
00933       Constant *Amt = ConstantInt::get(VarX->getType(), ShlAmt - ShrAmt);
00934       New = BinaryOperator::CreateShl(VarX, Amt);
00935       BinaryOperator *Orig = cast<BinaryOperator>(Shl);
00936       New->setHasNoSignedWrap(Orig->hasNoSignedWrap());
00937       New->setHasNoUnsignedWrap(Orig->hasNoUnsignedWrap());
00938     } else {
00939       Constant *Amt = ConstantInt::get(VarX->getType(), ShrAmt - ShlAmt);
00940       New = isLshr ? BinaryOperator::CreateLShr(VarX, Amt) :
00941                      BinaryOperator::CreateAShr(VarX, Amt);
00942       if (cast<BinaryOperator>(Shr)->isExact())
00943         New->setIsExact(true);
00944     }
00945 
00946     return InsertNewInstWith(New, *Shl);
00947   }
00948 
00949   return nullptr;
00950 }
00951 
00952 /// SimplifyDemandedVectorElts - The specified value produces a vector with
00953 /// any number of elements. DemandedElts contains the set of elements that are
00954 /// actually used by the caller.  This method analyzes which elements of the
00955 /// operand are undef and returns that information in UndefElts.
00956 ///
00957 /// If the information about demanded elements can be used to simplify the
00958 /// operation, the operation is simplified, then the resultant value is
00959 /// returned.  This returns null if no change was made.
00960 Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
00961                                                 APInt &UndefElts,
00962                                                 unsigned Depth) {
00963   unsigned VWidth = cast<VectorType>(V->getType())->getNumElements();
00964   APInt EltMask(APInt::getAllOnesValue(VWidth));
00965   assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!");
00966 
00967   if (isa<UndefValue>(V)) {
00968     // If the entire vector is undefined, just return this info.
00969     UndefElts = EltMask;
00970     return nullptr;
00971   }
00972 
00973   if (DemandedElts == 0) { // If nothing is demanded, provide undef.
00974     UndefElts = EltMask;
00975     return UndefValue::get(V->getType());
00976   }
00977 
00978   UndefElts = 0;
00979 
00980   // Handle ConstantAggregateZero, ConstantVector, ConstantDataSequential.
00981   if (Constant *C = dyn_cast<Constant>(V)) {
00982     // Check if this is identity. If so, return 0 since we are not simplifying
00983     // anything.
00984     if (DemandedElts.isAllOnesValue())
00985       return nullptr;
00986 
00987     Type *EltTy = cast<VectorType>(V->getType())->getElementType();
00988     Constant *Undef = UndefValue::get(EltTy);
00989 
00990     SmallVector<Constant*, 16> Elts;
00991     for (unsigned i = 0; i != VWidth; ++i) {
00992       if (!DemandedElts[i]) {   // If not demanded, set to undef.
00993         Elts.push_back(Undef);
00994         UndefElts.setBit(i);
00995         continue;
00996       }
00997 
00998       Constant *Elt = C->getAggregateElement(i);
00999       if (!Elt) return nullptr;
01000 
01001       if (isa<UndefValue>(Elt)) {   // Already undef.
01002         Elts.push_back(Undef);
01003         UndefElts.setBit(i);
01004       } else {                               // Otherwise, defined.
01005         Elts.push_back(Elt);
01006       }
01007     }
01008 
01009     // If we changed the constant, return it.
01010     Constant *NewCV = ConstantVector::get(Elts);
01011     return NewCV != C ? NewCV : nullptr;
01012   }
01013 
01014   // Limit search depth.
01015   if (Depth == 10)
01016     return nullptr;
01017 
01018   // If multiple users are using the root value, proceed with
01019   // simplification conservatively assuming that all elements
01020   // are needed.
01021   if (!V->hasOneUse()) {
01022     // Quit if we find multiple users of a non-root value though.
01023     // They'll be handled when it's their turn to be visited by
01024     // the main instcombine process.
01025     if (Depth != 0)
01026       // TODO: Just compute the UndefElts information recursively.
01027       return nullptr;
01028 
01029     // Conservatively assume that all elements are needed.
01030     DemandedElts = EltMask;
01031   }
01032 
01033   Instruction *I = dyn_cast<Instruction>(V);
01034   if (!I) return nullptr;        // Only analyze instructions.
01035 
01036   bool MadeChange = false;
01037   APInt UndefElts2(VWidth, 0);
01038   Value *TmpV;
01039   switch (I->getOpcode()) {
01040   default: break;
01041 
01042   case Instruction::InsertElement: {
01043     // If this is a variable index, we don't know which element it overwrites.
01044     // demand exactly the same input as we produce.
01045     ConstantInt *Idx = dyn_cast<ConstantInt>(I->getOperand(2));
01046     if (!Idx) {
01047       // Note that we can't propagate undef elt info, because we don't know
01048       // which elt is getting updated.
01049       TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
01050                                         UndefElts2, Depth + 1);
01051       if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
01052       break;
01053     }
01054 
01055     // If this is inserting an element that isn't demanded, remove this
01056     // insertelement.
01057     unsigned IdxNo = Idx->getZExtValue();
01058     if (IdxNo >= VWidth || !DemandedElts[IdxNo]) {
01059       Worklist.Add(I);
01060       return I->getOperand(0);
01061     }
01062 
01063     // Otherwise, the element inserted overwrites whatever was there, so the
01064     // input demanded set is simpler than the output set.
01065     APInt DemandedElts2 = DemandedElts;
01066     DemandedElts2.clearBit(IdxNo);
01067     TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2,
01068                                       UndefElts, Depth + 1);
01069     if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
01070 
01071     // The inserted element is defined.
01072     UndefElts.clearBit(IdxNo);
01073     break;
01074   }
01075   case Instruction::ShuffleVector: {
01076     ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
01077     uint64_t LHSVWidth =
01078       cast<VectorType>(Shuffle->getOperand(0)->getType())->getNumElements();
01079     APInt LeftDemanded(LHSVWidth, 0), RightDemanded(LHSVWidth, 0);
01080     for (unsigned i = 0; i < VWidth; i++) {
01081       if (DemandedElts[i]) {
01082         unsigned MaskVal = Shuffle->getMaskValue(i);
01083         if (MaskVal != -1u) {
01084           assert(MaskVal < LHSVWidth * 2 &&
01085                  "shufflevector mask index out of range!");
01086           if (MaskVal < LHSVWidth)
01087             LeftDemanded.setBit(MaskVal);
01088           else
01089             RightDemanded.setBit(MaskVal - LHSVWidth);
01090         }
01091       }
01092     }
01093 
01094     APInt UndefElts4(LHSVWidth, 0);
01095     TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded,
01096                                       UndefElts4, Depth + 1);
01097     if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
01098 
01099     APInt UndefElts3(LHSVWidth, 0);
01100     TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded,
01101                                       UndefElts3, Depth + 1);
01102     if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
01103 
01104     bool NewUndefElts = false;
01105     for (unsigned i = 0; i < VWidth; i++) {
01106       unsigned MaskVal = Shuffle->getMaskValue(i);
01107       if (MaskVal == -1u) {
01108         UndefElts.setBit(i);
01109       } else if (!DemandedElts[i]) {
01110         NewUndefElts = true;
01111         UndefElts.setBit(i);
01112       } else if (MaskVal < LHSVWidth) {
01113         if (UndefElts4[MaskVal]) {
01114           NewUndefElts = true;
01115           UndefElts.setBit(i);
01116         }
01117       } else {
01118         if (UndefElts3[MaskVal - LHSVWidth]) {
01119           NewUndefElts = true;
01120           UndefElts.setBit(i);
01121         }
01122       }
01123     }
01124 
01125     if (NewUndefElts) {
01126       // Add additional discovered undefs.
01127       SmallVector<Constant*, 16> Elts;
01128       for (unsigned i = 0; i < VWidth; ++i) {
01129         if (UndefElts[i])
01130           Elts.push_back(UndefValue::get(Type::getInt32Ty(I->getContext())));
01131         else
01132           Elts.push_back(ConstantInt::get(Type::getInt32Ty(I->getContext()),
01133                                           Shuffle->getMaskValue(i)));
01134       }
01135       I->setOperand(2, ConstantVector::get(Elts));
01136       MadeChange = true;
01137     }
01138     break;
01139   }
01140   case Instruction::Select: {
01141     APInt LeftDemanded(DemandedElts), RightDemanded(DemandedElts);
01142     if (ConstantVector* CV = dyn_cast<ConstantVector>(I->getOperand(0))) {
01143       for (unsigned i = 0; i < VWidth; i++) {
01144         if (CV->getAggregateElement(i)->isNullValue())
01145           LeftDemanded.clearBit(i);
01146         else
01147           RightDemanded.clearBit(i);
01148       }
01149     }
01150 
01151     TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded, UndefElts,
01152                                       Depth + 1);
01153     if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
01154 
01155     TmpV = SimplifyDemandedVectorElts(I->getOperand(2), RightDemanded,
01156                                       UndefElts2, Depth + 1);
01157     if (TmpV) { I->setOperand(2, TmpV); MadeChange = true; }
01158 
01159     // Output elements are undefined if both are undefined.
01160     UndefElts &= UndefElts2;
01161     break;
01162   }
01163   case Instruction::BitCast: {
01164     // Vector->vector casts only.
01165     VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());
01166     if (!VTy) break;
01167     unsigned InVWidth = VTy->getNumElements();
01168     APInt InputDemandedElts(InVWidth, 0);
01169     unsigned Ratio;
01170 
01171     if (VWidth == InVWidth) {
01172       // If we are converting from <4 x i32> -> <4 x f32>, we demand the same
01173       // elements as are demanded of us.
01174       Ratio = 1;
01175       InputDemandedElts = DemandedElts;
01176     } else if (VWidth > InVWidth) {
01177       // Untested so far.
01178       break;
01179 
01180       // If there are more elements in the result than there are in the source,
01181       // then an input element is live if any of the corresponding output
01182       // elements are live.
01183       Ratio = VWidth/InVWidth;
01184       for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
01185         if (DemandedElts[OutIdx])
01186           InputDemandedElts.setBit(OutIdx/Ratio);
01187       }
01188     } else {
01189       // Untested so far.
01190       break;
01191 
01192       // If there are more elements in the source than there are in the result,
01193       // then an input element is live if the corresponding output element is
01194       // live.
01195       Ratio = InVWidth/VWidth;
01196       for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
01197         if (DemandedElts[InIdx/Ratio])
01198           InputDemandedElts.setBit(InIdx);
01199     }
01200 
01201     // div/rem demand all inputs, because they don't want divide by zero.
01202     TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts,
01203                                       UndefElts2, Depth + 1);
01204     if (TmpV) {
01205       I->setOperand(0, TmpV);
01206       MadeChange = true;
01207     }
01208 
01209     UndefElts = UndefElts2;
01210     if (VWidth > InVWidth) {
01211       llvm_unreachable("Unimp");
01212       // If there are more elements in the result than there are in the source,
01213       // then an output element is undef if the corresponding input element is
01214       // undef.
01215       for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
01216         if (UndefElts2[OutIdx/Ratio])
01217           UndefElts.setBit(OutIdx);
01218     } else if (VWidth < InVWidth) {
01219       llvm_unreachable("Unimp");
01220       // If there are more elements in the source than there are in the result,
01221       // then a result element is undef if all of the corresponding input
01222       // elements are undef.
01223       UndefElts = ~0ULL >> (64-VWidth);  // Start out all undef.
01224       for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
01225         if (!UndefElts2[InIdx])            // Not undef?
01226           UndefElts.clearBit(InIdx/Ratio);    // Clear undef bit.
01227     }
01228     break;
01229   }
01230   case Instruction::And:
01231   case Instruction::Or:
01232   case Instruction::Xor:
01233   case Instruction::Add:
01234   case Instruction::Sub:
01235   case Instruction::Mul:
01236     // div/rem demand all inputs, because they don't want divide by zero.
01237     TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, UndefElts,
01238                                       Depth + 1);
01239     if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
01240     TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts,
01241                                       UndefElts2, Depth + 1);
01242     if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
01243 
01244     // Output elements are undefined if both are undefined.  Consider things
01245     // like undef&0.  The result is known zero, not undef.
01246     UndefElts &= UndefElts2;
01247     break;
01248   case Instruction::FPTrunc:
01249   case Instruction::FPExt:
01250     TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, UndefElts,
01251                                       Depth + 1);
01252     if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
01253     break;
01254 
01255   case Instruction::Call: {
01256     IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
01257     if (!II) break;
01258     switch (II->getIntrinsicID()) {
01259     default: break;
01260 
01261     // Binary vector operations that work column-wise.  A dest element is a
01262     // function of the corresponding input elements from the two inputs.
01263     case Intrinsic::x86_sse_sub_ss:
01264     case Intrinsic::x86_sse_mul_ss:
01265     case Intrinsic::x86_sse_min_ss:
01266     case Intrinsic::x86_sse_max_ss:
01267     case Intrinsic::x86_sse2_sub_sd:
01268     case Intrinsic::x86_sse2_mul_sd:
01269     case Intrinsic::x86_sse2_min_sd:
01270     case Intrinsic::x86_sse2_max_sd:
01271       TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
01272                                         UndefElts, Depth + 1);
01273       if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
01274       TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
01275                                         UndefElts2, Depth + 1);
01276       if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
01277 
01278       // If only the low elt is demanded and this is a scalarizable intrinsic,
01279       // scalarize it now.
01280       if (DemandedElts == 1) {
01281         switch (II->getIntrinsicID()) {
01282         default: break;
01283         case Intrinsic::x86_sse_sub_ss:
01284         case Intrinsic::x86_sse_mul_ss:
01285         case Intrinsic::x86_sse2_sub_sd:
01286         case Intrinsic::x86_sse2_mul_sd:
01287           // TODO: Lower MIN/MAX/ABS/etc
01288           Value *LHS = II->getArgOperand(0);
01289           Value *RHS = II->getArgOperand(1);
01290           // Extract the element as scalars.
01291           LHS = InsertNewInstWith(ExtractElementInst::Create(LHS,
01292             ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
01293           RHS = InsertNewInstWith(ExtractElementInst::Create(RHS,
01294             ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
01295 
01296           switch (II->getIntrinsicID()) {
01297           default: llvm_unreachable("Case stmts out of sync!");
01298           case Intrinsic::x86_sse_sub_ss:
01299           case Intrinsic::x86_sse2_sub_sd:
01300             TmpV = InsertNewInstWith(BinaryOperator::CreateFSub(LHS, RHS,
01301                                                         II->getName()), *II);
01302             break;
01303           case Intrinsic::x86_sse_mul_ss:
01304           case Intrinsic::x86_sse2_mul_sd:
01305             TmpV = InsertNewInstWith(BinaryOperator::CreateFMul(LHS, RHS,
01306                                                          II->getName()), *II);
01307             break;
01308           }
01309 
01310           Instruction *New =
01311             InsertElementInst::Create(
01312               UndefValue::get(II->getType()), TmpV,
01313               ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U, false),
01314                                       II->getName());
01315           InsertNewInstWith(New, *II);
01316           return New;
01317         }
01318       }
01319 
01320       // Output elements are undefined if both are undefined.  Consider things
01321       // like undef&0.  The result is known zero, not undef.
01322       UndefElts &= UndefElts2;
01323       break;
01324     }
01325     break;
01326   }
01327   }
01328   return MadeChange ? I : nullptr;
01329 }