LLVM API Documentation
00001 //===- InstCombineAddSub.cpp ----------------------------------------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file implements the visit functions for add, fadd, sub, and fsub. 00011 // 00012 //===----------------------------------------------------------------------===// 00013 00014 #include "InstCombine.h" 00015 #include "llvm/Analysis/InstructionSimplify.h" 00016 #include "llvm/IR/DataLayout.h" 00017 #include "llvm/Support/GetElementPtrTypeIterator.h" 00018 #include "llvm/Support/PatternMatch.h" 00019 using namespace llvm; 00020 using namespace PatternMatch; 00021 00022 namespace { 00023 00024 /// Class representing coefficient of floating-point addend. 00025 /// This class needs to be highly efficient, which is especially true for 00026 /// the constructor. As of I write this comment, the cost of the default 00027 /// constructor is merely 4-byte-store-zero (Assuming compiler is able to 00028 /// perform write-merging). 00029 /// 00030 class FAddendCoef { 00031 public: 00032 // The constructor has to initialize a APFloat, which is uncessary for 00033 // most addends which have coefficient either 1 or -1. So, the constructor 00034 // is expensive. In order to avoid the cost of the constructor, we should 00035 // reuse some instances whenever possible. The pre-created instances 00036 // FAddCombine::Add[0-5] embodies this idea. 00037 // 00038 FAddendCoef() : IsFp(false), BufHasFpVal(false), IntVal(0) {} 00039 ~FAddendCoef(); 00040 00041 void set(short C) { 00042 assert(!insaneIntVal(C) && "Insane coefficient"); 00043 IsFp = false; IntVal = C; 00044 } 00045 00046 void set(const APFloat& C); 00047 00048 void negate(); 00049 00050 bool isZero() const { return isInt() ? !IntVal : getFpVal().isZero(); } 00051 Value *getValue(Type *) const; 00052 00053 // If possible, don't define operator+/operator- etc because these 00054 // operators inevitably call FAddendCoef's constructor which is not cheap. 00055 void operator=(const FAddendCoef &A); 00056 void operator+=(const FAddendCoef &A); 00057 void operator-=(const FAddendCoef &A); 00058 void operator*=(const FAddendCoef &S); 00059 00060 bool isOne() const { return isInt() && IntVal == 1; } 00061 bool isTwo() const { return isInt() && IntVal == 2; } 00062 bool isMinusOne() const { return isInt() && IntVal == -1; } 00063 bool isMinusTwo() const { return isInt() && IntVal == -2; } 00064 00065 private: 00066 bool insaneIntVal(int V) { return V > 4 || V < -4; } 00067 APFloat *getFpValPtr(void) 00068 { return reinterpret_cast<APFloat*>(&FpValBuf.buffer[0]); } 00069 const APFloat *getFpValPtr(void) const 00070 { return reinterpret_cast<const APFloat*>(&FpValBuf.buffer[0]); } 00071 00072 const APFloat &getFpVal(void) const { 00073 assert(IsFp && BufHasFpVal && "Incorret state"); 00074 return *getFpValPtr(); 00075 } 00076 00077 APFloat &getFpVal(void) { 00078 assert(IsFp && BufHasFpVal && "Incorret state"); 00079 return *getFpValPtr(); 00080 } 00081 00082 bool isInt() const { return !IsFp; } 00083 00084 // If the coefficient is represented by an integer, promote it to a 00085 // floating point. 00086 void convertToFpType(const fltSemantics &Sem); 00087 00088 // Construct an APFloat from a signed integer. 00089 // TODO: We should get rid of this function when APFloat can be constructed 00090 // from an *SIGNED* integer. 00091 APFloat createAPFloatFromInt(const fltSemantics &Sem, int Val); 00092 private: 00093 00094 bool IsFp; 00095 00096 // True iff FpValBuf contains an instance of APFloat. 00097 bool BufHasFpVal; 00098 00099 // The integer coefficient of an individual addend is either 1 or -1, 00100 // and we try to simplify at most 4 addends from neighboring at most 00101 // two instructions. So the range of <IntVal> falls in [-4, 4]. APInt 00102 // is overkill of this end. 00103 short IntVal; 00104 00105 AlignedCharArrayUnion<APFloat> FpValBuf; 00106 }; 00107 00108 /// FAddend is used to represent floating-point addend. An addend is 00109 /// represented as <C, V>, where the V is a symbolic value, and C is a 00110 /// constant coefficient. A constant addend is represented as <C, 0>. 00111 /// 00112 class FAddend { 00113 public: 00114 FAddend() { Val = 0; } 00115 00116 Value *getSymVal (void) const { return Val; } 00117 const FAddendCoef &getCoef(void) const { return Coeff; } 00118 00119 bool isConstant() const { return Val == 0; } 00120 bool isZero() const { return Coeff.isZero(); } 00121 00122 void set(short Coefficient, Value *V) { Coeff.set(Coefficient), Val = V; } 00123 void set(const APFloat& Coefficient, Value *V) 00124 { Coeff.set(Coefficient); Val = V; } 00125 void set(const ConstantFP* Coefficient, Value *V) 00126 { Coeff.set(Coefficient->getValueAPF()); Val = V; } 00127 00128 void negate() { Coeff.negate(); } 00129 00130 /// Drill down the U-D chain one step to find the definition of V, and 00131 /// try to break the definition into one or two addends. 00132 static unsigned drillValueDownOneStep(Value* V, FAddend &A0, FAddend &A1); 00133 00134 /// Similar to FAddend::drillDownOneStep() except that the value being 00135 /// splitted is the addend itself. 00136 unsigned drillAddendDownOneStep(FAddend &Addend0, FAddend &Addend1) const; 00137 00138 void operator+=(const FAddend &T) { 00139 assert((Val == T.Val) && "Symbolic-values disagree"); 00140 Coeff += T.Coeff; 00141 } 00142 00143 private: 00144 void Scale(const FAddendCoef& ScaleAmt) { Coeff *= ScaleAmt; } 00145 00146 // This addend has the value of "Coeff * Val". 00147 Value *Val; 00148 FAddendCoef Coeff; 00149 }; 00150 00151 /// FAddCombine is the class for optimizing an unsafe fadd/fsub along 00152 /// with its neighboring at most two instructions. 00153 /// 00154 class FAddCombine { 00155 public: 00156 FAddCombine(InstCombiner::BuilderTy *B) : Builder(B), Instr(0) {} 00157 Value *simplify(Instruction *FAdd); 00158 00159 private: 00160 typedef SmallVector<const FAddend*, 4> AddendVect; 00161 00162 Value *simplifyFAdd(AddendVect& V, unsigned InstrQuota); 00163 00164 Value *performFactorization(Instruction *I); 00165 00166 /// Convert given addend to a Value 00167 Value *createAddendVal(const FAddend &A, bool& NeedNeg); 00168 00169 /// Return the number of instructions needed to emit the N-ary addition. 00170 unsigned calcInstrNumber(const AddendVect& Vect); 00171 Value *createFSub(Value *Opnd0, Value *Opnd1); 00172 Value *createFAdd(Value *Opnd0, Value *Opnd1); 00173 Value *createFMul(Value *Opnd0, Value *Opnd1); 00174 Value *createFDiv(Value *Opnd0, Value *Opnd1); 00175 Value *createFNeg(Value *V); 00176 Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota); 00177 void createInstPostProc(Instruction *NewInst); 00178 00179 InstCombiner::BuilderTy *Builder; 00180 Instruction *Instr; 00181 00182 private: 00183 // Debugging stuff are clustered here. 00184 #ifndef NDEBUG 00185 unsigned CreateInstrNum; 00186 void initCreateInstNum() { CreateInstrNum = 0; } 00187 void incCreateInstNum() { CreateInstrNum++; } 00188 #else 00189 void initCreateInstNum() {} 00190 void incCreateInstNum() {} 00191 #endif 00192 }; 00193 } 00194 00195 //===----------------------------------------------------------------------===// 00196 // 00197 // Implementation of 00198 // {FAddendCoef, FAddend, FAddition, FAddCombine}. 00199 // 00200 //===----------------------------------------------------------------------===// 00201 FAddendCoef::~FAddendCoef() { 00202 if (BufHasFpVal) 00203 getFpValPtr()->~APFloat(); 00204 } 00205 00206 void FAddendCoef::set(const APFloat& C) { 00207 APFloat *P = getFpValPtr(); 00208 00209 if (isInt()) { 00210 // As the buffer is meanless byte stream, we cannot call 00211 // APFloat::operator=(). 00212 new(P) APFloat(C); 00213 } else 00214 *P = C; 00215 00216 IsFp = BufHasFpVal = true; 00217 } 00218 00219 void FAddendCoef::convertToFpType(const fltSemantics &Sem) { 00220 if (!isInt()) 00221 return; 00222 00223 APFloat *P = getFpValPtr(); 00224 if (IntVal > 0) 00225 new(P) APFloat(Sem, IntVal); 00226 else { 00227 new(P) APFloat(Sem, 0 - IntVal); 00228 P->changeSign(); 00229 } 00230 IsFp = BufHasFpVal = true; 00231 } 00232 00233 APFloat FAddendCoef::createAPFloatFromInt(const fltSemantics &Sem, int Val) { 00234 if (Val >= 0) 00235 return APFloat(Sem, Val); 00236 00237 APFloat T(Sem, 0 - Val); 00238 T.changeSign(); 00239 00240 return T; 00241 } 00242 00243 void FAddendCoef::operator=(const FAddendCoef &That) { 00244 if (That.isInt()) 00245 set(That.IntVal); 00246 else 00247 set(That.getFpVal()); 00248 } 00249 00250 void FAddendCoef::operator+=(const FAddendCoef &That) { 00251 enum APFloat::roundingMode RndMode = APFloat::rmNearestTiesToEven; 00252 if (isInt() == That.isInt()) { 00253 if (isInt()) 00254 IntVal += That.IntVal; 00255 else 00256 getFpVal().add(That.getFpVal(), RndMode); 00257 return; 00258 } 00259 00260 if (isInt()) { 00261 const APFloat &T = That.getFpVal(); 00262 convertToFpType(T.getSemantics()); 00263 getFpVal().add(T, RndMode); 00264 return; 00265 } 00266 00267 APFloat &T = getFpVal(); 00268 T.add(createAPFloatFromInt(T.getSemantics(), That.IntVal), RndMode); 00269 } 00270 00271 void FAddendCoef::operator-=(const FAddendCoef &That) { 00272 enum APFloat::roundingMode RndMode = APFloat::rmNearestTiesToEven; 00273 if (isInt() == That.isInt()) { 00274 if (isInt()) 00275 IntVal -= That.IntVal; 00276 else 00277 getFpVal().subtract(That.getFpVal(), RndMode); 00278 return; 00279 } 00280 00281 if (isInt()) { 00282 const APFloat &T = That.getFpVal(); 00283 convertToFpType(T.getSemantics()); 00284 getFpVal().subtract(T, RndMode); 00285 return; 00286 } 00287 00288 APFloat &T = getFpVal(); 00289 T.subtract(createAPFloatFromInt(T.getSemantics(), IntVal), RndMode); 00290 } 00291 00292 void FAddendCoef::operator*=(const FAddendCoef &That) { 00293 if (That.isOne()) 00294 return; 00295 00296 if (That.isMinusOne()) { 00297 negate(); 00298 return; 00299 } 00300 00301 if (isInt() && That.isInt()) { 00302 int Res = IntVal * (int)That.IntVal; 00303 assert(!insaneIntVal(Res) && "Insane int value"); 00304 IntVal = Res; 00305 return; 00306 } 00307 00308 const fltSemantics &Semantic = 00309 isInt() ? That.getFpVal().getSemantics() : getFpVal().getSemantics(); 00310 00311 if (isInt()) 00312 convertToFpType(Semantic); 00313 APFloat &F0 = getFpVal(); 00314 00315 if (That.isInt()) 00316 F0.multiply(createAPFloatFromInt(Semantic, That.IntVal), 00317 APFloat::rmNearestTiesToEven); 00318 else 00319 F0.multiply(That.getFpVal(), APFloat::rmNearestTiesToEven); 00320 00321 return; 00322 } 00323 00324 void FAddendCoef::negate() { 00325 if (isInt()) 00326 IntVal = 0 - IntVal; 00327 else 00328 getFpVal().changeSign(); 00329 } 00330 00331 Value *FAddendCoef::getValue(Type *Ty) const { 00332 return isInt() ? 00333 ConstantFP::get(Ty, float(IntVal)) : 00334 ConstantFP::get(Ty->getContext(), getFpVal()); 00335 } 00336 00337 // The definition of <Val> Addends 00338 // ========================================= 00339 // A + B <1, A>, <1,B> 00340 // A - B <1, A>, <1,B> 00341 // 0 - B <-1, B> 00342 // C * A, <C, A> 00343 // A + C <1, A> <C, NULL> 00344 // 0 +/- 0 <0, NULL> (corner case) 00345 // 00346 // Legend: A and B are not constant, C is constant 00347 // 00348 unsigned FAddend::drillValueDownOneStep 00349 (Value *Val, FAddend &Addend0, FAddend &Addend1) { 00350 Instruction *I = 0; 00351 if (Val == 0 || !(I = dyn_cast<Instruction>(Val))) 00352 return 0; 00353 00354 unsigned Opcode = I->getOpcode(); 00355 00356 if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub) { 00357 ConstantFP *C0, *C1; 00358 Value *Opnd0 = I->getOperand(0); 00359 Value *Opnd1 = I->getOperand(1); 00360 if ((C0 = dyn_cast<ConstantFP>(Opnd0)) && C0->isZero()) 00361 Opnd0 = 0; 00362 00363 if ((C1 = dyn_cast<ConstantFP>(Opnd1)) && C1->isZero()) 00364 Opnd1 = 0; 00365 00366 if (Opnd0) { 00367 if (!C0) 00368 Addend0.set(1, Opnd0); 00369 else 00370 Addend0.set(C0, 0); 00371 } 00372 00373 if (Opnd1) { 00374 FAddend &Addend = Opnd0 ? Addend1 : Addend0; 00375 if (!C1) 00376 Addend.set(1, Opnd1); 00377 else 00378 Addend.set(C1, 0); 00379 if (Opcode == Instruction::FSub) 00380 Addend.negate(); 00381 } 00382 00383 if (Opnd0 || Opnd1) 00384 return Opnd0 && Opnd1 ? 2 : 1; 00385 00386 // Both operands are zero. Weird! 00387 Addend0.set(APFloat(C0->getValueAPF().getSemantics()), 0); 00388 return 1; 00389 } 00390 00391 if (I->getOpcode() == Instruction::FMul) { 00392 Value *V0 = I->getOperand(0); 00393 Value *V1 = I->getOperand(1); 00394 if (ConstantFP *C = dyn_cast<ConstantFP>(V0)) { 00395 Addend0.set(C, V1); 00396 return 1; 00397 } 00398 00399 if (ConstantFP *C = dyn_cast<ConstantFP>(V1)) { 00400 Addend0.set(C, V0); 00401 return 1; 00402 } 00403 } 00404 00405 return 0; 00406 } 00407 00408 // Try to break *this* addend into two addends. e.g. Suppose this addend is 00409 // <2.3, V>, and V = X + Y, by calling this function, we obtain two addends, 00410 // i.e. <2.3, X> and <2.3, Y>. 00411 // 00412 unsigned FAddend::drillAddendDownOneStep 00413 (FAddend &Addend0, FAddend &Addend1) const { 00414 if (isConstant()) 00415 return 0; 00416 00417 unsigned BreakNum = FAddend::drillValueDownOneStep(Val, Addend0, Addend1); 00418 if (!BreakNum || Coeff.isOne()) 00419 return BreakNum; 00420 00421 Addend0.Scale(Coeff); 00422 00423 if (BreakNum == 2) 00424 Addend1.Scale(Coeff); 00425 00426 return BreakNum; 00427 } 00428 00429 // Try to perform following optimization on the input instruction I. Return the 00430 // simplified expression if was successful; otherwise, return 0. 00431 // 00432 // Instruction "I" is Simplified into 00433 // ------------------------------------------------------- 00434 // (x * y) +/- (x * z) x * (y +/- z) 00435 // (y / x) +/- (z / x) (y +/- z) / x 00436 // 00437 Value *FAddCombine::performFactorization(Instruction *I) { 00438 assert((I->getOpcode() == Instruction::FAdd || 00439 I->getOpcode() == Instruction::FSub) && "Expect add/sub"); 00440 00441 Instruction *I0 = dyn_cast<Instruction>(I->getOperand(0)); 00442 Instruction *I1 = dyn_cast<Instruction>(I->getOperand(1)); 00443 00444 if (!I0 || !I1 || I0->getOpcode() != I1->getOpcode()) 00445 return 0; 00446 00447 bool isMpy = false; 00448 if (I0->getOpcode() == Instruction::FMul) 00449 isMpy = true; 00450 else if (I0->getOpcode() != Instruction::FDiv) 00451 return 0; 00452 00453 Value *Opnd0_0 = I0->getOperand(0); 00454 Value *Opnd0_1 = I0->getOperand(1); 00455 Value *Opnd1_0 = I1->getOperand(0); 00456 Value *Opnd1_1 = I1->getOperand(1); 00457 00458 // Input Instr I Factor AddSub0 AddSub1 00459 // ---------------------------------------------- 00460 // (x*y) +/- (x*z) x y z 00461 // (y/x) +/- (z/x) x y z 00462 // 00463 Value *Factor = 0; 00464 Value *AddSub0 = 0, *AddSub1 = 0; 00465 00466 if (isMpy) { 00467 if (Opnd0_0 == Opnd1_0 || Opnd0_0 == Opnd1_1) 00468 Factor = Opnd0_0; 00469 else if (Opnd0_1 == Opnd1_0 || Opnd0_1 == Opnd1_1) 00470 Factor = Opnd0_1; 00471 00472 if (Factor) { 00473 AddSub0 = (Factor == Opnd0_0) ? Opnd0_1 : Opnd0_0; 00474 AddSub1 = (Factor == Opnd1_0) ? Opnd1_1 : Opnd1_0; 00475 } 00476 } else if (Opnd0_1 == Opnd1_1) { 00477 Factor = Opnd0_1; 00478 AddSub0 = Opnd0_0; 00479 AddSub1 = Opnd1_0; 00480 } 00481 00482 if (!Factor) 00483 return 0; 00484 00485 // Create expression "NewAddSub = AddSub0 +/- AddsSub1" 00486 Value *NewAddSub = (I->getOpcode() == Instruction::FAdd) ? 00487 createFAdd(AddSub0, AddSub1) : 00488 createFSub(AddSub0, AddSub1); 00489 if (ConstantFP *CFP = dyn_cast<ConstantFP>(NewAddSub)) { 00490 const APFloat &F = CFP->getValueAPF(); 00491 if (!F.isNormal() || F.isDenormal()) 00492 return 0; 00493 } 00494 00495 if (isMpy) 00496 return createFMul(Factor, NewAddSub); 00497 00498 return createFDiv(NewAddSub, Factor); 00499 } 00500 00501 Value *FAddCombine::simplify(Instruction *I) { 00502 assert(I->hasUnsafeAlgebra() && "Should be in unsafe mode"); 00503 00504 // Currently we are not able to handle vector type. 00505 if (I->getType()->isVectorTy()) 00506 return 0; 00507 00508 assert((I->getOpcode() == Instruction::FAdd || 00509 I->getOpcode() == Instruction::FSub) && "Expect add/sub"); 00510 00511 // Save the instruction before calling other member-functions. 00512 Instr = I; 00513 00514 FAddend Opnd0, Opnd1, Opnd0_0, Opnd0_1, Opnd1_0, Opnd1_1; 00515 00516 unsigned OpndNum = FAddend::drillValueDownOneStep(I, Opnd0, Opnd1); 00517 00518 // Step 1: Expand the 1st addend into Opnd0_0 and Opnd0_1. 00519 unsigned Opnd0_ExpNum = 0; 00520 unsigned Opnd1_ExpNum = 0; 00521 00522 if (!Opnd0.isConstant()) 00523 Opnd0_ExpNum = Opnd0.drillAddendDownOneStep(Opnd0_0, Opnd0_1); 00524 00525 // Step 2: Expand the 2nd addend into Opnd1_0 and Opnd1_1. 00526 if (OpndNum == 2 && !Opnd1.isConstant()) 00527 Opnd1_ExpNum = Opnd1.drillAddendDownOneStep(Opnd1_0, Opnd1_1); 00528 00529 // Step 3: Try to optimize Opnd0_0 + Opnd0_1 + Opnd1_0 + Opnd1_1 00530 if (Opnd0_ExpNum && Opnd1_ExpNum) { 00531 AddendVect AllOpnds; 00532 AllOpnds.push_back(&Opnd0_0); 00533 AllOpnds.push_back(&Opnd1_0); 00534 if (Opnd0_ExpNum == 2) 00535 AllOpnds.push_back(&Opnd0_1); 00536 if (Opnd1_ExpNum == 2) 00537 AllOpnds.push_back(&Opnd1_1); 00538 00539 // Compute instruction quota. We should save at least one instruction. 00540 unsigned InstQuota = 0; 00541 00542 Value *V0 = I->getOperand(0); 00543 Value *V1 = I->getOperand(1); 00544 InstQuota = ((!isa<Constant>(V0) && V0->hasOneUse()) && 00545 (!isa<Constant>(V1) && V1->hasOneUse())) ? 2 : 1; 00546 00547 if (Value *R = simplifyFAdd(AllOpnds, InstQuota)) 00548 return R; 00549 } 00550 00551 if (OpndNum != 2) { 00552 // The input instruction is : "I=0.0 +/- V". If the "V" were able to be 00553 // splitted into two addends, say "V = X - Y", the instruction would have 00554 // been optimized into "I = Y - X" in the previous steps. 00555 // 00556 const FAddendCoef &CE = Opnd0.getCoef(); 00557 return CE.isOne() ? Opnd0.getSymVal() : 0; 00558 } 00559 00560 // step 4: Try to optimize Opnd0 + Opnd1_0 [+ Opnd1_1] 00561 if (Opnd1_ExpNum) { 00562 AddendVect AllOpnds; 00563 AllOpnds.push_back(&Opnd0); 00564 AllOpnds.push_back(&Opnd1_0); 00565 if (Opnd1_ExpNum == 2) 00566 AllOpnds.push_back(&Opnd1_1); 00567 00568 if (Value *R = simplifyFAdd(AllOpnds, 1)) 00569 return R; 00570 } 00571 00572 // step 5: Try to optimize Opnd1 + Opnd0_0 [+ Opnd0_1] 00573 if (Opnd0_ExpNum) { 00574 AddendVect AllOpnds; 00575 AllOpnds.push_back(&Opnd1); 00576 AllOpnds.push_back(&Opnd0_0); 00577 if (Opnd0_ExpNum == 2) 00578 AllOpnds.push_back(&Opnd0_1); 00579 00580 if (Value *R = simplifyFAdd(AllOpnds, 1)) 00581 return R; 00582 } 00583 00584 // step 6: Try factorization as the last resort, 00585 return performFactorization(I); 00586 } 00587 00588 Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { 00589 00590 unsigned AddendNum = Addends.size(); 00591 assert(AddendNum <= 4 && "Too many addends"); 00592 00593 // For saving intermediate results; 00594 unsigned NextTmpIdx = 0; 00595 FAddend TmpResult[3]; 00596 00597 // Points to the constant addend of the resulting simplified expression. 00598 // If the resulting expr has constant-addend, this constant-addend is 00599 // desirable to reside at the top of the resulting expression tree. Placing 00600 // constant close to supper-expr(s) will potentially reveal some optimization 00601 // opportunities in super-expr(s). 00602 // 00603 const FAddend *ConstAdd = 0; 00604 00605 // Simplified addends are placed <SimpVect>. 00606 AddendVect SimpVect; 00607 00608 // The outer loop works on one symbolic-value at a time. Suppose the input 00609 // addends are : <a1, x>, <b1, y>, <a2, x>, <c1, z>, <b2, y>, ... 00610 // The symbolic-values will be processed in this order: x, y, z. 00611 // 00612 for (unsigned SymIdx = 0; SymIdx < AddendNum; SymIdx++) { 00613 00614 const FAddend *ThisAddend = Addends[SymIdx]; 00615 if (!ThisAddend) { 00616 // This addend was processed before. 00617 continue; 00618 } 00619 00620 Value *Val = ThisAddend->getSymVal(); 00621 unsigned StartIdx = SimpVect.size(); 00622 SimpVect.push_back(ThisAddend); 00623 00624 // The inner loop collects addends sharing same symbolic-value, and these 00625 // addends will be later on folded into a single addend. Following above 00626 // example, if the symbolic value "y" is being processed, the inner loop 00627 // will collect two addends "<b1,y>" and "<b2,Y>". These two addends will 00628 // be later on folded into "<b1+b2, y>". 00629 // 00630 for (unsigned SameSymIdx = SymIdx + 1; 00631 SameSymIdx < AddendNum; SameSymIdx++) { 00632 const FAddend *T = Addends[SameSymIdx]; 00633 if (T && T->getSymVal() == Val) { 00634 // Set null such that next iteration of the outer loop will not process 00635 // this addend again. 00636 Addends[SameSymIdx] = 0; 00637 SimpVect.push_back(T); 00638 } 00639 } 00640 00641 // If multiple addends share same symbolic value, fold them together. 00642 if (StartIdx + 1 != SimpVect.size()) { 00643 FAddend &R = TmpResult[NextTmpIdx ++]; 00644 R = *SimpVect[StartIdx]; 00645 for (unsigned Idx = StartIdx + 1; Idx < SimpVect.size(); Idx++) 00646 R += *SimpVect[Idx]; 00647 00648 // Pop all addends being folded and push the resulting folded addend. 00649 SimpVect.resize(StartIdx); 00650 if (Val != 0) { 00651 if (!R.isZero()) { 00652 SimpVect.push_back(&R); 00653 } 00654 } else { 00655 // Don't push constant addend at this time. It will be the last element 00656 // of <SimpVect>. 00657 ConstAdd = &R; 00658 } 00659 } 00660 } 00661 00662 assert((NextTmpIdx <= sizeof(TmpResult)/sizeof(TmpResult[0]) + 1) && 00663 "out-of-bound access"); 00664 00665 if (ConstAdd) 00666 SimpVect.push_back(ConstAdd); 00667 00668 Value *Result; 00669 if (!SimpVect.empty()) 00670 Result = createNaryFAdd(SimpVect, InstrQuota); 00671 else { 00672 // The addition is folded to 0.0. 00673 Result = ConstantFP::get(Instr->getType(), 0.0); 00674 } 00675 00676 return Result; 00677 } 00678 00679 Value *FAddCombine::createNaryFAdd 00680 (const AddendVect &Opnds, unsigned InstrQuota) { 00681 assert(!Opnds.empty() && "Expect at least one addend"); 00682 00683 // Step 1: Check if the # of instructions needed exceeds the quota. 00684 // 00685 unsigned InstrNeeded = calcInstrNumber(Opnds); 00686 if (InstrNeeded > InstrQuota) 00687 return 0; 00688 00689 initCreateInstNum(); 00690 00691 // step 2: Emit the N-ary addition. 00692 // Note that at most three instructions are involved in Fadd-InstCombine: the 00693 // addition in question, and at most two neighboring instructions. 00694 // The resulting optimized addition should have at least one less instruction 00695 // than the original addition expression tree. This implies that the resulting 00696 // N-ary addition has at most two instructions, and we don't need to worry 00697 // about tree-height when constructing the N-ary addition. 00698 00699 Value *LastVal = 0; 00700 bool LastValNeedNeg = false; 00701 00702 // Iterate the addends, creating fadd/fsub using adjacent two addends. 00703 for (AddendVect::const_iterator I = Opnds.begin(), E = Opnds.end(); 00704 I != E; I++) { 00705 bool NeedNeg; 00706 Value *V = createAddendVal(**I, NeedNeg); 00707 if (!LastVal) { 00708 LastVal = V; 00709 LastValNeedNeg = NeedNeg; 00710 continue; 00711 } 00712 00713 if (LastValNeedNeg == NeedNeg) { 00714 LastVal = createFAdd(LastVal, V); 00715 continue; 00716 } 00717 00718 if (LastValNeedNeg) 00719 LastVal = createFSub(V, LastVal); 00720 else 00721 LastVal = createFSub(LastVal, V); 00722 00723 LastValNeedNeg = false; 00724 } 00725 00726 if (LastValNeedNeg) { 00727 LastVal = createFNeg(LastVal); 00728 } 00729 00730 #ifndef NDEBUG 00731 assert(CreateInstrNum == InstrNeeded && 00732 "Inconsistent in instruction numbers"); 00733 #endif 00734 00735 return LastVal; 00736 } 00737 00738 Value *FAddCombine::createFSub 00739 (Value *Opnd0, Value *Opnd1) { 00740 Value *V = Builder->CreateFSub(Opnd0, Opnd1); 00741 if (Instruction *I = dyn_cast<Instruction>(V)) 00742 createInstPostProc(I); 00743 return V; 00744 } 00745 00746 Value *FAddCombine::createFNeg(Value *V) { 00747 Value *Zero = cast<Value>(ConstantFP::get(V->getType(), 0.0)); 00748 return createFSub(Zero, V); 00749 } 00750 00751 Value *FAddCombine::createFAdd 00752 (Value *Opnd0, Value *Opnd1) { 00753 Value *V = Builder->CreateFAdd(Opnd0, Opnd1); 00754 if (Instruction *I = dyn_cast<Instruction>(V)) 00755 createInstPostProc(I); 00756 return V; 00757 } 00758 00759 Value *FAddCombine::createFMul(Value *Opnd0, Value *Opnd1) { 00760 Value *V = Builder->CreateFMul(Opnd0, Opnd1); 00761 if (Instruction *I = dyn_cast<Instruction>(V)) 00762 createInstPostProc(I); 00763 return V; 00764 } 00765 00766 Value *FAddCombine::createFDiv(Value *Opnd0, Value *Opnd1) { 00767 Value *V = Builder->CreateFDiv(Opnd0, Opnd1); 00768 if (Instruction *I = dyn_cast<Instruction>(V)) 00769 createInstPostProc(I); 00770 return V; 00771 } 00772 00773 void FAddCombine::createInstPostProc(Instruction *NewInstr) { 00774 NewInstr->setDebugLoc(Instr->getDebugLoc()); 00775 00776 // Keep track of the number of instruction created. 00777 incCreateInstNum(); 00778 00779 // Propagate fast-math flags 00780 NewInstr->setFastMathFlags(Instr->getFastMathFlags()); 00781 } 00782 00783 // Return the number of instruction needed to emit the N-ary addition. 00784 // NOTE: Keep this function in sync with createAddendVal(). 00785 unsigned FAddCombine::calcInstrNumber(const AddendVect &Opnds) { 00786 unsigned OpndNum = Opnds.size(); 00787 unsigned InstrNeeded = OpndNum - 1; 00788 00789 // The number of addends in the form of "(-1)*x". 00790 unsigned NegOpndNum = 0; 00791 00792 // Adjust the number of instructions needed to emit the N-ary add. 00793 for (AddendVect::const_iterator I = Opnds.begin(), E = Opnds.end(); 00794 I != E; I++) { 00795 const FAddend *Opnd = *I; 00796 if (Opnd->isConstant()) 00797 continue; 00798 00799 const FAddendCoef &CE = Opnd->getCoef(); 00800 if (CE.isMinusOne() || CE.isMinusTwo()) 00801 NegOpndNum++; 00802 00803 // Let the addend be "c * x". If "c == +/-1", the value of the addend 00804 // is immediately available; otherwise, it needs exactly one instruction 00805 // to evaluate the value. 00806 if (!CE.isMinusOne() && !CE.isOne()) 00807 InstrNeeded++; 00808 } 00809 if (NegOpndNum == OpndNum) 00810 InstrNeeded++; 00811 return InstrNeeded; 00812 } 00813 00814 // Input Addend Value NeedNeg(output) 00815 // ================================================================ 00816 // Constant C C false 00817 // <+/-1, V> V coefficient is -1 00818 // <2/-2, V> "fadd V, V" coefficient is -2 00819 // <C, V> "fmul V, C" false 00820 // 00821 // NOTE: Keep this function in sync with FAddCombine::calcInstrNumber. 00822 Value *FAddCombine::createAddendVal 00823 (const FAddend &Opnd, bool &NeedNeg) { 00824 const FAddendCoef &Coeff = Opnd.getCoef(); 00825 00826 if (Opnd.isConstant()) { 00827 NeedNeg = false; 00828 return Coeff.getValue(Instr->getType()); 00829 } 00830 00831 Value *OpndVal = Opnd.getSymVal(); 00832 00833 if (Coeff.isMinusOne() || Coeff.isOne()) { 00834 NeedNeg = Coeff.isMinusOne(); 00835 return OpndVal; 00836 } 00837 00838 if (Coeff.isTwo() || Coeff.isMinusTwo()) { 00839 NeedNeg = Coeff.isMinusTwo(); 00840 return createFAdd(OpndVal, OpndVal); 00841 } 00842 00843 NeedNeg = false; 00844 return createFMul(OpndVal, Coeff.getValue(Instr->getType())); 00845 } 00846 00847 /// AddOne - Add one to a ConstantInt. 00848 static Constant *AddOne(Constant *C) { 00849 return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1)); 00850 } 00851 00852 /// SubOne - Subtract one from a ConstantInt. 00853 static Constant *SubOne(ConstantInt *C) { 00854 return ConstantInt::get(C->getContext(), C->getValue()-1); 00855 } 00856 00857 00858 // dyn_castFoldableMul - If this value is a multiply that can be folded into 00859 // other computations (because it has a constant operand), return the 00860 // non-constant operand of the multiply, and set CST to point to the multiplier. 00861 // Otherwise, return null. 00862 // 00863 static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) { 00864 if (!V->hasOneUse() || !V->getType()->isIntegerTy()) 00865 return 0; 00866 00867 Instruction *I = dyn_cast<Instruction>(V); 00868 if (I == 0) return 0; 00869 00870 if (I->getOpcode() == Instruction::Mul) 00871 if ((CST = dyn_cast<ConstantInt>(I->getOperand(1)))) 00872 return I->getOperand(0); 00873 if (I->getOpcode() == Instruction::Shl) 00874 if ((CST = dyn_cast<ConstantInt>(I->getOperand(1)))) { 00875 // The multiplier is really 1 << CST. 00876 uint32_t BitWidth = cast<IntegerType>(V->getType())->getBitWidth(); 00877 uint32_t CSTVal = CST->getLimitedValue(BitWidth); 00878 CST = ConstantInt::get(V->getType()->getContext(), 00879 APInt(BitWidth, 1).shl(CSTVal)); 00880 return I->getOperand(0); 00881 } 00882 return 0; 00883 } 00884 00885 00886 /// WillNotOverflowSignedAdd - Return true if we can prove that: 00887 /// (sext (add LHS, RHS)) === (add (sext LHS), (sext RHS)) 00888 /// This basically requires proving that the add in the original type would not 00889 /// overflow to change the sign bit or have a carry out. 00890 bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) { 00891 // There are different heuristics we can use for this. Here are some simple 00892 // ones. 00893 00894 // Add has the property that adding any two 2's complement numbers can only 00895 // have one carry bit which can change a sign. As such, if LHS and RHS each 00896 // have at least two sign bits, we know that the addition of the two values 00897 // will sign extend fine. 00898 if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1) 00899 return true; 00900 00901 00902 // If one of the operands only has one non-zero bit, and if the other operand 00903 // has a known-zero bit in a more significant place than it (not including the 00904 // sign bit) the ripple may go up to and fill the zero, but won't change the 00905 // sign. For example, (X & ~4) + 1. 00906 00907 // TODO: Implement. 00908 00909 return false; 00910 } 00911 00912 Instruction *InstCombiner::visitAdd(BinaryOperator &I) { 00913 bool Changed = SimplifyAssociativeOrCommutative(I); 00914 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); 00915 00916 if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), 00917 I.hasNoUnsignedWrap(), TD)) 00918 return ReplaceInstUsesWith(I, V); 00919 00920 // (A*B)+(A*C) -> A*(B+C) etc 00921 if (Value *V = SimplifyUsingDistributiveLaws(I)) 00922 return ReplaceInstUsesWith(I, V); 00923 00924 if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { 00925 // X + (signbit) --> X ^ signbit 00926 const APInt &Val = CI->getValue(); 00927 if (Val.isSignBit()) 00928 return BinaryOperator::CreateXor(LHS, RHS); 00929 00930 // See if SimplifyDemandedBits can simplify this. This handles stuff like 00931 // (X & 254)+1 -> (X&254)|1 00932 if (SimplifyDemandedInstructionBits(I)) 00933 return &I; 00934 00935 // zext(bool) + C -> bool ? C + 1 : C 00936 if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS)) 00937 if (ZI->getSrcTy()->isIntegerTy(1)) 00938 return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI); 00939 00940 Value *XorLHS = 0; ConstantInt *XorRHS = 0; 00941 if (match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) { 00942 uint32_t TySizeBits = I.getType()->getScalarSizeInBits(); 00943 const APInt &RHSVal = CI->getValue(); 00944 unsigned ExtendAmt = 0; 00945 // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext. 00946 // If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext. 00947 if (XorRHS->getValue() == -RHSVal) { 00948 if (RHSVal.isPowerOf2()) 00949 ExtendAmt = TySizeBits - RHSVal.logBase2() - 1; 00950 else if (XorRHS->getValue().isPowerOf2()) 00951 ExtendAmt = TySizeBits - XorRHS->getValue().logBase2() - 1; 00952 } 00953 00954 if (ExtendAmt) { 00955 APInt Mask = APInt::getHighBitsSet(TySizeBits, ExtendAmt); 00956 if (!MaskedValueIsZero(XorLHS, Mask)) 00957 ExtendAmt = 0; 00958 } 00959 00960 if (ExtendAmt) { 00961 Constant *ShAmt = ConstantInt::get(I.getType(), ExtendAmt); 00962 Value *NewShl = Builder->CreateShl(XorLHS, ShAmt, "sext"); 00963 return BinaryOperator::CreateAShr(NewShl, ShAmt); 00964 } 00965 00966 // If this is a xor that was canonicalized from a sub, turn it back into 00967 // a sub and fuse this add with it. 00968 if (LHS->hasOneUse() && (XorRHS->getValue()+1).isPowerOf2()) { 00969 IntegerType *IT = cast<IntegerType>(I.getType()); 00970 APInt LHSKnownOne(IT->getBitWidth(), 0); 00971 APInt LHSKnownZero(IT->getBitWidth(), 0); 00972 ComputeMaskedBits(XorLHS, LHSKnownZero, LHSKnownOne); 00973 if ((XorRHS->getValue() | LHSKnownZero).isAllOnesValue()) 00974 return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI), 00975 XorLHS); 00976 } 00977 // (X + signbit) + C could have gotten canonicalized to (X ^ signbit) + C, 00978 // transform them into (X + (signbit ^ C)) 00979 if (XorRHS->getValue().isSignBit()) 00980 return BinaryOperator::CreateAdd(XorLHS, 00981 ConstantExpr::getXor(XorRHS, CI)); 00982 } 00983 } 00984 00985 if (isa<Constant>(RHS) && isa<PHINode>(LHS)) 00986 if (Instruction *NV = FoldOpIntoPhi(I)) 00987 return NV; 00988 00989 if (I.getType()->isIntegerTy(1)) 00990 return BinaryOperator::CreateXor(LHS, RHS); 00991 00992 // X + X --> X << 1 00993 if (LHS == RHS) { 00994 BinaryOperator *New = 00995 BinaryOperator::CreateShl(LHS, ConstantInt::get(I.getType(), 1)); 00996 New->setHasNoSignedWrap(I.hasNoSignedWrap()); 00997 New->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); 00998 return New; 00999 } 01000 01001 // -A + B --> B - A 01002 // -A + -B --> -(A + B) 01003 if (Value *LHSV = dyn_castNegVal(LHS)) { 01004 if (!isa<Constant>(RHS)) 01005 if (Value *RHSV = dyn_castNegVal(RHS)) { 01006 Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum"); 01007 return BinaryOperator::CreateNeg(NewAdd); 01008 } 01009 01010 return BinaryOperator::CreateSub(RHS, LHSV); 01011 } 01012 01013 // A + -B --> A - B 01014 if (!isa<Constant>(RHS)) 01015 if (Value *V = dyn_castNegVal(RHS)) 01016 return BinaryOperator::CreateSub(LHS, V); 01017 01018 01019 ConstantInt *C2; 01020 if (Value *X = dyn_castFoldableMul(LHS, C2)) { 01021 if (X == RHS) // X*C + X --> X * (C+1) 01022 return BinaryOperator::CreateMul(RHS, AddOne(C2)); 01023 01024 // X*C1 + X*C2 --> X * (C1+C2) 01025 ConstantInt *C1; 01026 if (X == dyn_castFoldableMul(RHS, C1)) 01027 return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2)); 01028 } 01029 01030 // X + X*C --> X * (C+1) 01031 if (dyn_castFoldableMul(RHS, C2) == LHS) 01032 return BinaryOperator::CreateMul(LHS, AddOne(C2)); 01033 01034 // A+B --> A|B iff A and B have no bits set in common. 01035 if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) { 01036 APInt LHSKnownOne(IT->getBitWidth(), 0); 01037 APInt LHSKnownZero(IT->getBitWidth(), 0); 01038 ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne); 01039 if (LHSKnownZero != 0) { 01040 APInt RHSKnownOne(IT->getBitWidth(), 0); 01041 APInt RHSKnownZero(IT->getBitWidth(), 0); 01042 ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne); 01043 01044 // No bits in common -> bitwise or. 01045 if ((LHSKnownZero|RHSKnownZero).isAllOnesValue()) 01046 return BinaryOperator::CreateOr(LHS, RHS); 01047 } 01048 } 01049 01050 // W*X + Y*Z --> W * (X+Z) iff W == Y 01051 { 01052 Value *W, *X, *Y, *Z; 01053 if (match(LHS, m_Mul(m_Value(W), m_Value(X))) && 01054 match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) { 01055 if (W != Y) { 01056 if (W == Z) { 01057 std::swap(Y, Z); 01058 } else if (Y == X) { 01059 std::swap(W, X); 01060 } else if (X == Z) { 01061 std::swap(Y, Z); 01062 std::swap(W, X); 01063 } 01064 } 01065 01066 if (W == Y) { 01067 Value *NewAdd = Builder->CreateAdd(X, Z, LHS->getName()); 01068 return BinaryOperator::CreateMul(W, NewAdd); 01069 } 01070 } 01071 } 01072 01073 if (ConstantInt *CRHS = dyn_cast<ConstantInt>(RHS)) { 01074 Value *X = 0; 01075 if (match(LHS, m_Not(m_Value(X)))) // ~X + C --> (C-1) - X 01076 return BinaryOperator::CreateSub(SubOne(CRHS), X); 01077 01078 // (X & FF00) + xx00 -> (X+xx00) & FF00 01079 if (LHS->hasOneUse() && 01080 match(LHS, m_And(m_Value(X), m_ConstantInt(C2))) && 01081 CRHS->getValue() == (CRHS->getValue() & C2->getValue())) { 01082 // See if all bits from the first bit set in the Add RHS up are included 01083 // in the mask. First, get the rightmost bit. 01084 const APInt &AddRHSV = CRHS->getValue(); 01085 01086 // Form a mask of all bits from the lowest bit added through the top. 01087 APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1)); 01088 01089 // See if the and mask includes all of these bits. 01090 APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue()); 01091 01092 if (AddRHSHighBits == AddRHSHighBitsAnd) { 01093 // Okay, the xform is safe. Insert the new add pronto. 01094 Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName()); 01095 return BinaryOperator::CreateAnd(NewAdd, C2); 01096 } 01097 } 01098 01099 // Try to fold constant add into select arguments. 01100 if (SelectInst *SI = dyn_cast<SelectInst>(LHS)) 01101 if (Instruction *R = FoldOpIntoSelect(I, SI)) 01102 return R; 01103 } 01104 01105 // add (select X 0 (sub n A)) A --> select X A n 01106 { 01107 SelectInst *SI = dyn_cast<SelectInst>(LHS); 01108 Value *A = RHS; 01109 if (!SI) { 01110 SI = dyn_cast<SelectInst>(RHS); 01111 A = LHS; 01112 } 01113 if (SI && SI->hasOneUse()) { 01114 Value *TV = SI->getTrueValue(); 01115 Value *FV = SI->getFalseValue(); 01116 Value *N; 01117 01118 // Can we fold the add into the argument of the select? 01119 // We check both true and false select arguments for a matching subtract. 01120 if (match(FV, m_Zero()) && match(TV, m_Sub(m_Value(N), m_Specific(A)))) 01121 // Fold the add into the true select value. 01122 return SelectInst::Create(SI->getCondition(), N, A); 01123 01124 if (match(TV, m_Zero()) && match(FV, m_Sub(m_Value(N), m_Specific(A)))) 01125 // Fold the add into the false select value. 01126 return SelectInst::Create(SI->getCondition(), A, N); 01127 } 01128 } 01129 01130 // Check for (add (sext x), y), see if we can merge this into an 01131 // integer add followed by a sext. 01132 if (SExtInst *LHSConv = dyn_cast<SExtInst>(LHS)) { 01133 // (add (sext x), cst) --> (sext (add x, cst')) 01134 if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) { 01135 Constant *CI = 01136 ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType()); 01137 if (LHSConv->hasOneUse() && 01138 ConstantExpr::getSExt(CI, I.getType()) == RHSC && 01139 WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) { 01140 // Insert the new, smaller add. 01141 Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 01142 CI, "addconv"); 01143 return new SExtInst(NewAdd, I.getType()); 01144 } 01145 } 01146 01147 // (add (sext x), (sext y)) --> (sext (add int x, y)) 01148 if (SExtInst *RHSConv = dyn_cast<SExtInst>(RHS)) { 01149 // Only do this if x/y have the same type, if at last one of them has a 01150 // single use (so we don't increase the number of sexts), and if the 01151 // integer add will not overflow. 01152 if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&& 01153 (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && 01154 WillNotOverflowSignedAdd(LHSConv->getOperand(0), 01155 RHSConv->getOperand(0))) { 01156 // Insert the new integer add. 01157 Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 01158 RHSConv->getOperand(0), "addconv"); 01159 return new SExtInst(NewAdd, I.getType()); 01160 } 01161 } 01162 } 01163 01164 // Check for (x & y) + (x ^ y) 01165 { 01166 Value *A = 0, *B = 0; 01167 if (match(RHS, m_Xor(m_Value(A), m_Value(B))) && 01168 (match(LHS, m_And(m_Specific(A), m_Specific(B))) || 01169 match(LHS, m_And(m_Specific(B), m_Specific(A))))) 01170 return BinaryOperator::CreateOr(A, B); 01171 01172 if (match(LHS, m_Xor(m_Value(A), m_Value(B))) && 01173 (match(RHS, m_And(m_Specific(A), m_Specific(B))) || 01174 match(RHS, m_And(m_Specific(B), m_Specific(A))))) 01175 return BinaryOperator::CreateOr(A, B); 01176 } 01177 01178 return Changed ? &I : 0; 01179 } 01180 01181 Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { 01182 bool Changed = SimplifyAssociativeOrCommutative(I); 01183 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); 01184 01185 if (Value *V = SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), TD)) 01186 return ReplaceInstUsesWith(I, V); 01187 01188 if (isa<Constant>(RHS) && isa<PHINode>(LHS)) 01189 if (Instruction *NV = FoldOpIntoPhi(I)) 01190 return NV; 01191 01192 // -A + B --> B - A 01193 // -A + -B --> -(A + B) 01194 if (Value *LHSV = dyn_castFNegVal(LHS)) 01195 return BinaryOperator::CreateFSub(RHS, LHSV); 01196 01197 // A + -B --> A - B 01198 if (!isa<Constant>(RHS)) 01199 if (Value *V = dyn_castFNegVal(RHS)) 01200 return BinaryOperator::CreateFSub(LHS, V); 01201 01202 // Check for (fadd double (sitofp x), y), see if we can merge this into an 01203 // integer add followed by a promotion. 01204 if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) { 01205 // (fadd double (sitofp x), fpcst) --> (sitofp (add int x, intcst)) 01206 // ... if the constant fits in the integer value. This is useful for things 01207 // like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer 01208 // requires a constant pool load, and generally allows the add to be better 01209 // instcombined. 01210 if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) { 01211 Constant *CI = 01212 ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType()); 01213 if (LHSConv->hasOneUse() && 01214 ConstantExpr::getSIToFP(CI, I.getType()) == CFP && 01215 WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) { 01216 // Insert the new integer add. 01217 Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 01218 CI, "addconv"); 01219 return new SIToFPInst(NewAdd, I.getType()); 01220 } 01221 } 01222 01223 // (fadd double (sitofp x), (sitofp y)) --> (sitofp (add int x, y)) 01224 if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) { 01225 // Only do this if x/y have the same type, if at last one of them has a 01226 // single use (so we don't increase the number of int->fp conversions), 01227 // and if the integer add will not overflow. 01228 if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&& 01229 (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && 01230 WillNotOverflowSignedAdd(LHSConv->getOperand(0), 01231 RHSConv->getOperand(0))) { 01232 // Insert the new integer add. 01233 Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 01234 RHSConv->getOperand(0),"addconv"); 01235 return new SIToFPInst(NewAdd, I.getType()); 01236 } 01237 } 01238 } 01239 01240 // select C, 0, B + select C, A, 0 -> select C, A, B 01241 { 01242 Value *A1, *B1, *C1, *A2, *B2, *C2; 01243 if (match(LHS, m_Select(m_Value(C1), m_Value(A1), m_Value(B1))) && 01244 match(RHS, m_Select(m_Value(C2), m_Value(A2), m_Value(B2)))) { 01245 if (C1 == C2) { 01246 Constant *Z1=0, *Z2=0; 01247 Value *A, *B, *C=C1; 01248 if (match(A1, m_AnyZero()) && match(B2, m_AnyZero())) { 01249 Z1 = dyn_cast<Constant>(A1); A = A2; 01250 Z2 = dyn_cast<Constant>(B2); B = B1; 01251 } else if (match(B1, m_AnyZero()) && match(A2, m_AnyZero())) { 01252 Z1 = dyn_cast<Constant>(B1); B = B2; 01253 Z2 = dyn_cast<Constant>(A2); A = A1; 01254 } 01255 01256 if (Z1 && Z2 && 01257 (I.hasNoSignedZeros() || 01258 (Z1->isNegativeZeroValue() && Z2->isNegativeZeroValue()))) { 01259 return SelectInst::Create(C, A, B); 01260 } 01261 } 01262 } 01263 } 01264 01265 if (I.hasUnsafeAlgebra()) { 01266 if (Value *V = FAddCombine(Builder).simplify(&I)) 01267 return ReplaceInstUsesWith(I, V); 01268 } 01269 01270 return Changed ? &I : 0; 01271 } 01272 01273 01274 /// Optimize pointer differences into the same array into a size. Consider: 01275 /// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer 01276 /// operands to the ptrtoint instructions for the LHS/RHS of the subtract. 01277 /// 01278 Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, 01279 Type *Ty) { 01280 assert(TD && "Must have target data info for this"); 01281 01282 // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize 01283 // this. 01284 bool Swapped = false; 01285 GEPOperator *GEP1 = 0, *GEP2 = 0; 01286 01287 // For now we require one side to be the base pointer "A" or a constant 01288 // GEP derived from it. 01289 if (GEPOperator *LHSGEP = dyn_cast<GEPOperator>(LHS)) { 01290 // (gep X, ...) - X 01291 if (LHSGEP->getOperand(0) == RHS) { 01292 GEP1 = LHSGEP; 01293 Swapped = false; 01294 } else if (GEPOperator *RHSGEP = dyn_cast<GEPOperator>(RHS)) { 01295 // (gep X, ...) - (gep X, ...) 01296 if (LHSGEP->getOperand(0)->stripPointerCasts() == 01297 RHSGEP->getOperand(0)->stripPointerCasts()) { 01298 GEP2 = RHSGEP; 01299 GEP1 = LHSGEP; 01300 Swapped = false; 01301 } 01302 } 01303 } 01304 01305 if (GEPOperator *RHSGEP = dyn_cast<GEPOperator>(RHS)) { 01306 // X - (gep X, ...) 01307 if (RHSGEP->getOperand(0) == LHS) { 01308 GEP1 = RHSGEP; 01309 Swapped = true; 01310 } else if (GEPOperator *LHSGEP = dyn_cast<GEPOperator>(LHS)) { 01311 // (gep X, ...) - (gep X, ...) 01312 if (RHSGEP->getOperand(0)->stripPointerCasts() == 01313 LHSGEP->getOperand(0)->stripPointerCasts()) { 01314 GEP2 = LHSGEP; 01315 GEP1 = RHSGEP; 01316 Swapped = true; 01317 } 01318 } 01319 } 01320 01321 // Avoid duplicating the arithmetic if GEP2 has non-constant indices and 01322 // multiple users. 01323 if (GEP1 == 0 || 01324 (GEP2 != 0 && !GEP2->hasAllConstantIndices() && !GEP2->hasOneUse())) 01325 return 0; 01326 01327 // Emit the offset of the GEP and an intptr_t. 01328 Value *Result = EmitGEPOffset(GEP1); 01329 01330 // If we had a constant expression GEP on the other side offsetting the 01331 // pointer, subtract it from the offset we have. 01332 if (GEP2) { 01333 Value *Offset = EmitGEPOffset(GEP2); 01334 Result = Builder->CreateSub(Result, Offset); 01335 } 01336 01337 // If we have p - gep(p, ...) then we have to negate the result. 01338 if (Swapped) 01339 Result = Builder->CreateNeg(Result, "diff.neg"); 01340 01341 return Builder->CreateIntCast(Result, Ty, true); 01342 } 01343 01344 01345 Instruction *InstCombiner::visitSub(BinaryOperator &I) { 01346 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 01347 01348 if (Value *V = SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(), 01349 I.hasNoUnsignedWrap(), TD)) 01350 return ReplaceInstUsesWith(I, V); 01351 01352 // (A*B)-(A*C) -> A*(B-C) etc 01353 if (Value *V = SimplifyUsingDistributiveLaws(I)) 01354 return ReplaceInstUsesWith(I, V); 01355 01356 // If this is a 'B = x-(-A)', change to B = x+A. This preserves NSW/NUW. 01357 if (Value *V = dyn_castNegVal(Op1)) { 01358 BinaryOperator *Res = BinaryOperator::CreateAdd(Op0, V); 01359 Res->setHasNoSignedWrap(I.hasNoSignedWrap()); 01360 Res->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); 01361 return Res; 01362 } 01363 01364 if (I.getType()->isIntegerTy(1)) 01365 return BinaryOperator::CreateXor(Op0, Op1); 01366 01367 // Replace (-1 - A) with (~A). 01368 if (match(Op0, m_AllOnes())) 01369 return BinaryOperator::CreateNot(Op1); 01370 01371 if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) { 01372 // C - ~X == X + (1+C) 01373 Value *X = 0; 01374 if (match(Op1, m_Not(m_Value(X)))) 01375 return BinaryOperator::CreateAdd(X, AddOne(C)); 01376 01377 // -(X >>u 31) -> (X >>s 31) 01378 // -(X >>s 31) -> (X >>u 31) 01379 if (C->isZero()) { 01380 Value *X; ConstantInt *CI; 01381 if (match(Op1, m_LShr(m_Value(X), m_ConstantInt(CI))) && 01382 // Verify we are shifting out everything but the sign bit. 01383 CI->getValue() == I.getType()->getPrimitiveSizeInBits()-1) 01384 return BinaryOperator::CreateAShr(X, CI); 01385 01386 if (match(Op1, m_AShr(m_Value(X), m_ConstantInt(CI))) && 01387 // Verify we are shifting out everything but the sign bit. 01388 CI->getValue() == I.getType()->getPrimitiveSizeInBits()-1) 01389 return BinaryOperator::CreateLShr(X, CI); 01390 } 01391 01392 // Try to fold constant sub into select arguments. 01393 if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) 01394 if (Instruction *R = FoldOpIntoSelect(I, SI)) 01395 return R; 01396 01397 // C-(X+C2) --> (C-C2)-X 01398 ConstantInt *C2; 01399 if (match(Op1, m_Add(m_Value(X), m_ConstantInt(C2)))) 01400 return BinaryOperator::CreateSub(ConstantExpr::getSub(C, C2), X); 01401 01402 if (SimplifyDemandedInstructionBits(I)) 01403 return &I; 01404 01405 // Fold (sub 0, (zext bool to B)) --> (sext bool to B) 01406 if (C->isZero() && match(Op1, m_ZExt(m_Value(X)))) 01407 if (X->getType()->isIntegerTy(1)) 01408 return CastInst::CreateSExtOrBitCast(X, Op1->getType()); 01409 01410 // Fold (sub 0, (sext bool to B)) --> (zext bool to B) 01411 if (C->isZero() && match(Op1, m_SExt(m_Value(X)))) 01412 if (X->getType()->isIntegerTy(1)) 01413 return CastInst::CreateZExtOrBitCast(X, Op1->getType()); 01414 } 01415 01416 01417 { Value *Y; 01418 // X-(X+Y) == -Y X-(Y+X) == -Y 01419 if (match(Op1, m_Add(m_Specific(Op0), m_Value(Y))) || 01420 match(Op1, m_Add(m_Value(Y), m_Specific(Op0)))) 01421 return BinaryOperator::CreateNeg(Y); 01422 01423 // (X-Y)-X == -Y 01424 if (match(Op0, m_Sub(m_Specific(Op1), m_Value(Y)))) 01425 return BinaryOperator::CreateNeg(Y); 01426 } 01427 01428 if (Op1->hasOneUse()) { 01429 Value *X = 0, *Y = 0, *Z = 0; 01430 Constant *C = 0; 01431 ConstantInt *CI = 0; 01432 01433 // (X - (Y - Z)) --> (X + (Z - Y)). 01434 if (match(Op1, m_Sub(m_Value(Y), m_Value(Z)))) 01435 return BinaryOperator::CreateAdd(Op0, 01436 Builder->CreateSub(Z, Y, Op1->getName())); 01437 01438 // (X - (X & Y)) --> (X & ~Y) 01439 // 01440 if (match(Op1, m_And(m_Value(Y), m_Specific(Op0))) || 01441 match(Op1, m_And(m_Specific(Op0), m_Value(Y)))) 01442 return BinaryOperator::CreateAnd(Op0, 01443 Builder->CreateNot(Y, Y->getName() + ".not")); 01444 01445 // 0 - (X sdiv C) -> (X sdiv -C) 01446 if (match(Op1, m_SDiv(m_Value(X), m_Constant(C))) && 01447 match(Op0, m_Zero())) 01448 return BinaryOperator::CreateSDiv(X, ConstantExpr::getNeg(C)); 01449 01450 // 0 - (X << Y) -> (-X << Y) when X is freely negatable. 01451 if (match(Op1, m_Shl(m_Value(X), m_Value(Y))) && match(Op0, m_Zero())) 01452 if (Value *XNeg = dyn_castNegVal(X)) 01453 return BinaryOperator::CreateShl(XNeg, Y); 01454 01455 // X - X*C --> X * (1-C) 01456 if (match(Op1, m_Mul(m_Specific(Op0), m_ConstantInt(CI)))) { 01457 Constant *CP1 = ConstantExpr::getSub(ConstantInt::get(I.getType(),1), CI); 01458 return BinaryOperator::CreateMul(Op0, CP1); 01459 } 01460 01461 // X - X<<C --> X * (1-(1<<C)) 01462 if (match(Op1, m_Shl(m_Specific(Op0), m_ConstantInt(CI)))) { 01463 Constant *One = ConstantInt::get(I.getType(), 1); 01464 C = ConstantExpr::getSub(One, ConstantExpr::getShl(One, CI)); 01465 return BinaryOperator::CreateMul(Op0, C); 01466 } 01467 01468 // X - A*-B -> X + A*B 01469 // X - -A*B -> X + A*B 01470 Value *A, *B; 01471 if (match(Op1, m_Mul(m_Value(A), m_Neg(m_Value(B)))) || 01472 match(Op1, m_Mul(m_Neg(m_Value(A)), m_Value(B)))) 01473 return BinaryOperator::CreateAdd(Op0, Builder->CreateMul(A, B)); 01474 01475 // X - A*CI -> X + A*-CI 01476 // X - CI*A -> X + A*-CI 01477 if (match(Op1, m_Mul(m_Value(A), m_ConstantInt(CI))) || 01478 match(Op1, m_Mul(m_ConstantInt(CI), m_Value(A)))) { 01479 Value *NewMul = Builder->CreateMul(A, ConstantExpr::getNeg(CI)); 01480 return BinaryOperator::CreateAdd(Op0, NewMul); 01481 } 01482 } 01483 01484 ConstantInt *C1; 01485 if (Value *X = dyn_castFoldableMul(Op0, C1)) { 01486 if (X == Op1) // X*C - X --> X * (C-1) 01487 return BinaryOperator::CreateMul(Op1, SubOne(C1)); 01488 01489 ConstantInt *C2; // X*C1 - X*C2 -> X * (C1-C2) 01490 if (X == dyn_castFoldableMul(Op1, C2)) 01491 return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2)); 01492 } 01493 01494 // Optimize pointer differences into the same array into a size. Consider: 01495 // &A[10] - &A[0]: we should compile this to "10". 01496 if (TD) { 01497 Value *LHSOp, *RHSOp; 01498 if (match(Op0, m_PtrToInt(m_Value(LHSOp))) && 01499 match(Op1, m_PtrToInt(m_Value(RHSOp)))) 01500 if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) 01501 return ReplaceInstUsesWith(I, Res); 01502 01503 // trunc(p)-trunc(q) -> trunc(p-q) 01504 if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) && 01505 match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp))))) 01506 if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) 01507 return ReplaceInstUsesWith(I, Res); 01508 } 01509 01510 return 0; 01511 } 01512 01513 Instruction *InstCombiner::visitFSub(BinaryOperator &I) { 01514 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 01515 01516 if (Value *V = SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), TD)) 01517 return ReplaceInstUsesWith(I, V); 01518 01519 // If this is a 'B = x-(-A)', change to B = x+A... 01520 if (Value *V = dyn_castFNegVal(Op1)) 01521 return BinaryOperator::CreateFAdd(Op0, V); 01522 01523 if (I.hasUnsafeAlgebra()) { 01524 if (Value *V = FAddCombine(Builder).simplify(&I)) 01525 return ReplaceInstUsesWith(I, V); 01526 } 01527 01528 return 0; 01529 }