LLVM API Documentation
00001 //===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run 00011 // both before and after the DAG is legalized. 00012 // 00013 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is 00014 // primarily intended to handle simplification opportunities that are implicit 00015 // in the LLVM IR and exposed by the various codegen lowering phases. 00016 // 00017 //===----------------------------------------------------------------------===// 00018 00019 #define DEBUG_TYPE "dagcombine" 00020 #include "llvm/CodeGen/SelectionDAG.h" 00021 #include "llvm/ADT/SmallPtrSet.h" 00022 #include "llvm/ADT/Statistic.h" 00023 #include "llvm/Analysis/AliasAnalysis.h" 00024 #include "llvm/CodeGen/MachineFrameInfo.h" 00025 #include "llvm/CodeGen/MachineFunction.h" 00026 #include "llvm/IR/DataLayout.h" 00027 #include "llvm/IR/DerivedTypes.h" 00028 #include "llvm/IR/Function.h" 00029 #include "llvm/IR/LLVMContext.h" 00030 #include "llvm/Support/CommandLine.h" 00031 #include "llvm/Support/Debug.h" 00032 #include "llvm/Support/ErrorHandling.h" 00033 #include "llvm/Support/MathExtras.h" 00034 #include "llvm/Support/raw_ostream.h" 00035 #include "llvm/Target/TargetLowering.h" 00036 #include "llvm/Target/TargetMachine.h" 00037 #include "llvm/Target/TargetOptions.h" 00038 #include <algorithm> 00039 using namespace llvm; 00040 00041 STATISTIC(NodesCombined , "Number of dag nodes combined"); 00042 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); 00043 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); 00044 STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); 00045 STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int"); 00046 00047 namespace { 00048 static cl::opt<bool> 00049 CombinerAA("combiner-alias-analysis", cl::Hidden, 00050 cl::desc("Turn on alias analysis during testing")); 00051 00052 static cl::opt<bool> 00053 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, 00054 cl::desc("Include global information in alias analysis")); 00055 00056 //------------------------------ DAGCombiner ---------------------------------// 00057 00058 class DAGCombiner { 00059 SelectionDAG &DAG; 00060 const TargetLowering &TLI; 00061 CombineLevel Level; 00062 CodeGenOpt::Level OptLevel; 00063 bool LegalOperations; 00064 bool LegalTypes; 00065 00066 // Worklist of all of the nodes that need to be simplified. 00067 // 00068 // This has the semantics that when adding to the worklist, 00069 // the item added must be next to be processed. It should 00070 // also only appear once. The naive approach to this takes 00071 // linear time. 00072 // 00073 // To reduce the insert/remove time to logarithmic, we use 00074 // a set and a vector to maintain our worklist. 00075 // 00076 // The set contains the items on the worklist, but does not 00077 // maintain the order they should be visited. 00078 // 00079 // The vector maintains the order nodes should be visited, but may 00080 // contain duplicate or removed nodes. When choosing a node to 00081 // visit, we pop off the order stack until we find an item that is 00082 // also in the contents set. All operations are O(log N). 00083 SmallPtrSet<SDNode*, 64> WorkListContents; 00084 SmallVector<SDNode*, 64> WorkListOrder; 00085 00086 // AA - Used for DAG load/store alias analysis. 00087 AliasAnalysis &AA; 00088 00089 /// AddUsersToWorkList - When an instruction is simplified, add all users of 00090 /// the instruction to the work lists because they might get more simplified 00091 /// now. 00092 /// 00093 void AddUsersToWorkList(SDNode *N) { 00094 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); 00095 UI != UE; ++UI) 00096 AddToWorkList(*UI); 00097 } 00098 00099 /// visit - call the node-specific routine that knows how to fold each 00100 /// particular type of node. 00101 SDValue visit(SDNode *N); 00102 00103 public: 00104 /// AddToWorkList - Add to the work list making sure its instance is at the 00105 /// back (next to be processed.) 00106 void AddToWorkList(SDNode *N) { 00107 WorkListContents.insert(N); 00108 WorkListOrder.push_back(N); 00109 } 00110 00111 /// removeFromWorkList - remove all instances of N from the worklist. 00112 /// 00113 void removeFromWorkList(SDNode *N) { 00114 WorkListContents.erase(N); 00115 } 00116 00117 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, 00118 bool AddTo = true); 00119 00120 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) { 00121 return CombineTo(N, &Res, 1, AddTo); 00122 } 00123 00124 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, 00125 bool AddTo = true) { 00126 SDValue To[] = { Res0, Res1 }; 00127 return CombineTo(N, To, 2, AddTo); 00128 } 00129 00130 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO); 00131 00132 private: 00133 00134 /// SimplifyDemandedBits - Check the specified integer node value to see if 00135 /// it can be simplified or if things it uses can be simplified by bit 00136 /// propagation. If so, return true. 00137 bool SimplifyDemandedBits(SDValue Op) { 00138 unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); 00139 APInt Demanded = APInt::getAllOnesValue(BitWidth); 00140 return SimplifyDemandedBits(Op, Demanded); 00141 } 00142 00143 bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded); 00144 00145 bool CombineToPreIndexedLoadStore(SDNode *N); 00146 bool CombineToPostIndexedLoadStore(SDNode *N); 00147 00148 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); 00149 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace); 00150 SDValue SExtPromoteOperand(SDValue Op, EVT PVT); 00151 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT); 00152 SDValue PromoteIntBinOp(SDValue Op); 00153 SDValue PromoteIntShiftOp(SDValue Op); 00154 SDValue PromoteExtend(SDValue Op); 00155 bool PromoteLoad(SDValue Op); 00156 00157 void ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs, 00158 SDValue Trunc, SDValue ExtLoad, SDLoc DL, 00159 ISD::NodeType ExtType); 00160 00161 /// combine - call the node-specific routine that knows how to fold each 00162 /// particular type of node. If that doesn't do anything, try the 00163 /// target-specific DAG combines. 00164 SDValue combine(SDNode *N); 00165 00166 // Visitation implementation - Implement dag node combining for different 00167 // node types. The semantics are as follows: 00168 // Return Value: 00169 // SDValue.getNode() == 0 - No change was made 00170 // SDValue.getNode() == N - N was replaced, is dead and has been handled. 00171 // otherwise - N should be replaced by the returned Operand. 00172 // 00173 SDValue visitTokenFactor(SDNode *N); 00174 SDValue visitMERGE_VALUES(SDNode *N); 00175 SDValue visitADD(SDNode *N); 00176 SDValue visitSUB(SDNode *N); 00177 SDValue visitADDC(SDNode *N); 00178 SDValue visitSUBC(SDNode *N); 00179 SDValue visitADDE(SDNode *N); 00180 SDValue visitSUBE(SDNode *N); 00181 SDValue visitMUL(SDNode *N); 00182 SDValue visitSDIV(SDNode *N); 00183 SDValue visitUDIV(SDNode *N); 00184 SDValue visitSREM(SDNode *N); 00185 SDValue visitUREM(SDNode *N); 00186 SDValue visitMULHU(SDNode *N); 00187 SDValue visitMULHS(SDNode *N); 00188 SDValue visitSMUL_LOHI(SDNode *N); 00189 SDValue visitUMUL_LOHI(SDNode *N); 00190 SDValue visitSMULO(SDNode *N); 00191 SDValue visitUMULO(SDNode *N); 00192 SDValue visitSDIVREM(SDNode *N); 00193 SDValue visitUDIVREM(SDNode *N); 00194 SDValue visitAND(SDNode *N); 00195 SDValue visitOR(SDNode *N); 00196 SDValue visitXOR(SDNode *N); 00197 SDValue SimplifyVBinOp(SDNode *N); 00198 SDValue SimplifyVUnaryOp(SDNode *N); 00199 SDValue visitSHL(SDNode *N); 00200 SDValue visitSRA(SDNode *N); 00201 SDValue visitSRL(SDNode *N); 00202 SDValue visitCTLZ(SDNode *N); 00203 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N); 00204 SDValue visitCTTZ(SDNode *N); 00205 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N); 00206 SDValue visitCTPOP(SDNode *N); 00207 SDValue visitSELECT(SDNode *N); 00208 SDValue visitVSELECT(SDNode *N); 00209 SDValue visitSELECT_CC(SDNode *N); 00210 SDValue visitSETCC(SDNode *N); 00211 SDValue visitSIGN_EXTEND(SDNode *N); 00212 SDValue visitZERO_EXTEND(SDNode *N); 00213 SDValue visitANY_EXTEND(SDNode *N); 00214 SDValue visitSIGN_EXTEND_INREG(SDNode *N); 00215 SDValue visitTRUNCATE(SDNode *N); 00216 SDValue visitBITCAST(SDNode *N); 00217 SDValue visitBUILD_PAIR(SDNode *N); 00218 SDValue visitFADD(SDNode *N); 00219 SDValue visitFSUB(SDNode *N); 00220 SDValue visitFMUL(SDNode *N); 00221 SDValue visitFMA(SDNode *N); 00222 SDValue visitFDIV(SDNode *N); 00223 SDValue visitFREM(SDNode *N); 00224 SDValue visitFCOPYSIGN(SDNode *N); 00225 SDValue visitSINT_TO_FP(SDNode *N); 00226 SDValue visitUINT_TO_FP(SDNode *N); 00227 SDValue visitFP_TO_SINT(SDNode *N); 00228 SDValue visitFP_TO_UINT(SDNode *N); 00229 SDValue visitFP_ROUND(SDNode *N); 00230 SDValue visitFP_ROUND_INREG(SDNode *N); 00231 SDValue visitFP_EXTEND(SDNode *N); 00232 SDValue visitFNEG(SDNode *N); 00233 SDValue visitFABS(SDNode *N); 00234 SDValue visitFCEIL(SDNode *N); 00235 SDValue visitFTRUNC(SDNode *N); 00236 SDValue visitFFLOOR(SDNode *N); 00237 SDValue visitBRCOND(SDNode *N); 00238 SDValue visitBR_CC(SDNode *N); 00239 SDValue visitLOAD(SDNode *N); 00240 SDValue visitSTORE(SDNode *N); 00241 SDValue visitINSERT_VECTOR_ELT(SDNode *N); 00242 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N); 00243 SDValue visitBUILD_VECTOR(SDNode *N); 00244 SDValue visitCONCAT_VECTORS(SDNode *N); 00245 SDValue visitEXTRACT_SUBVECTOR(SDNode *N); 00246 SDValue visitVECTOR_SHUFFLE(SDNode *N); 00247 00248 SDValue XformToShuffleWithZero(SDNode *N); 00249 SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS); 00250 00251 SDValue visitShiftByConstant(SDNode *N, unsigned Amt); 00252 00253 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); 00254 SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N); 00255 SDValue SimplifySelect(SDLoc DL, SDValue N0, SDValue N1, SDValue N2); 00256 SDValue SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue N2, 00257 SDValue N3, ISD::CondCode CC, 00258 bool NotExtCompare = false); 00259 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, 00260 SDLoc DL, bool foldBooleans = true); 00261 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, 00262 unsigned HiOp); 00263 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); 00264 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); 00265 SDValue BuildSDIV(SDNode *N); 00266 SDValue BuildUDIV(SDNode *N); 00267 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, 00268 bool DemandHighBits = true); 00269 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); 00270 SDNode *MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL); 00271 SDValue ReduceLoadWidth(SDNode *N); 00272 SDValue ReduceLoadOpStoreWidth(SDNode *N); 00273 SDValue TransformFPLoadStorePair(SDNode *N); 00274 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); 00275 SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N); 00276 00277 SDValue GetDemandedBits(SDValue V, const APInt &Mask); 00278 00279 /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, 00280 /// looking for aliasing nodes and adding them to the Aliases vector. 00281 void GatherAllAliases(SDNode *N, SDValue OriginalChain, 00282 SmallVector<SDValue, 8> &Aliases); 00283 00284 /// isAlias - Return true if there is any possibility that the two addresses 00285 /// overlap. 00286 bool isAlias(SDValue Ptr1, int64_t Size1, 00287 const Value *SrcValue1, int SrcValueOffset1, 00288 unsigned SrcValueAlign1, 00289 const MDNode *TBAAInfo1, 00290 SDValue Ptr2, int64_t Size2, 00291 const Value *SrcValue2, int SrcValueOffset2, 00292 unsigned SrcValueAlign2, 00293 const MDNode *TBAAInfo2) const; 00294 00295 /// isAlias - Return true if there is any possibility that the two addresses 00296 /// overlap. 00297 bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1); 00298 00299 /// FindAliasInfo - Extracts the relevant alias information from the memory 00300 /// node. Returns true if the operand was a load. 00301 bool FindAliasInfo(SDNode *N, 00302 SDValue &Ptr, int64_t &Size, 00303 const Value *&SrcValue, int &SrcValueOffset, 00304 unsigned &SrcValueAlignment, 00305 const MDNode *&TBAAInfo) const; 00306 00307 /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, 00308 /// looking for a better chain (aliasing node.) 00309 SDValue FindBetterChain(SDNode *N, SDValue Chain); 00310 00311 /// Merge consecutive store operations into a wide store. 00312 /// This optimization uses wide integers or vectors when possible. 00313 /// \return True if some memory operations were changed. 00314 bool MergeConsecutiveStores(StoreSDNode *N); 00315 00316 public: 00317 DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) 00318 : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), 00319 OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {} 00320 00321 /// Run - runs the dag combiner on all nodes in the work list 00322 void Run(CombineLevel AtLevel); 00323 00324 SelectionDAG &getDAG() const { return DAG; } 00325 00326 /// getShiftAmountTy - Returns a type large enough to hold any valid 00327 /// shift amount - before type legalization these can be huge. 00328 EVT getShiftAmountTy(EVT LHSTy) { 00329 return LegalTypes ? TLI.getShiftAmountTy(LHSTy) : TLI.getPointerTy(); 00330 } 00331 00332 /// isTypeLegal - This method returns true if we are running before type 00333 /// legalization or if the specified VT is legal. 00334 bool isTypeLegal(const EVT &VT) { 00335 if (!LegalTypes) return true; 00336 return TLI.isTypeLegal(VT); 00337 } 00338 00339 /// getSetCCResultType - Convenience wrapper around 00340 /// TargetLowering::getSetCCResultType 00341 EVT getSetCCResultType(EVT VT) const { 00342 return TLI.getSetCCResultType(*DAG.getContext(), VT); 00343 } 00344 }; 00345 } 00346 00347 00348 namespace { 00349 /// WorkListRemover - This class is a DAGUpdateListener that removes any deleted 00350 /// nodes from the worklist. 00351 class WorkListRemover : public SelectionDAG::DAGUpdateListener { 00352 DAGCombiner &DC; 00353 public: 00354 explicit WorkListRemover(DAGCombiner &dc) 00355 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {} 00356 00357 virtual void NodeDeleted(SDNode *N, SDNode *E) { 00358 DC.removeFromWorkList(N); 00359 } 00360 }; 00361 } 00362 00363 //===----------------------------------------------------------------------===// 00364 // TargetLowering::DAGCombinerInfo implementation 00365 //===----------------------------------------------------------------------===// 00366 00367 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) { 00368 ((DAGCombiner*)DC)->AddToWorkList(N); 00369 } 00370 00371 void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) { 00372 ((DAGCombiner*)DC)->removeFromWorkList(N); 00373 } 00374 00375 SDValue TargetLowering::DAGCombinerInfo:: 00376 CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) { 00377 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo); 00378 } 00379 00380 SDValue TargetLowering::DAGCombinerInfo:: 00381 CombineTo(SDNode *N, SDValue Res, bool AddTo) { 00382 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo); 00383 } 00384 00385 00386 SDValue TargetLowering::DAGCombinerInfo:: 00387 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) { 00388 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo); 00389 } 00390 00391 void TargetLowering::DAGCombinerInfo:: 00392 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { 00393 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO); 00394 } 00395 00396 //===----------------------------------------------------------------------===// 00397 // Helper Functions 00398 //===----------------------------------------------------------------------===// 00399 00400 /// isNegatibleForFree - Return 1 if we can compute the negated form of the 00401 /// specified expression for the same cost as the expression itself, or 2 if we 00402 /// can compute the negated form more cheaply than the expression itself. 00403 static char isNegatibleForFree(SDValue Op, bool LegalOperations, 00404 const TargetLowering &TLI, 00405 const TargetOptions *Options, 00406 unsigned Depth = 0) { 00407 // fneg is removable even if it has multiple uses. 00408 if (Op.getOpcode() == ISD::FNEG) return 2; 00409 00410 // Don't allow anything with multiple uses. 00411 if (!Op.hasOneUse()) return 0; 00412 00413 // Don't recurse exponentially. 00414 if (Depth > 6) return 0; 00415 00416 switch (Op.getOpcode()) { 00417 default: return false; 00418 case ISD::ConstantFP: 00419 // Don't invert constant FP values after legalize. The negated constant 00420 // isn't necessarily legal. 00421 return LegalOperations ? 0 : 1; 00422 case ISD::FADD: 00423 // FIXME: determine better conditions for this xform. 00424 if (!Options->UnsafeFPMath) return 0; 00425 00426 // After operation legalization, it might not be legal to create new FSUBs. 00427 if (LegalOperations && 00428 !TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) 00429 return 0; 00430 00431 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) 00432 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, 00433 Options, Depth + 1)) 00434 return V; 00435 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) 00436 return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, 00437 Depth + 1); 00438 case ISD::FSUB: 00439 // We can't turn -(A-B) into B-A when we honor signed zeros. 00440 if (!Options->UnsafeFPMath) return 0; 00441 00442 // fold (fneg (fsub A, B)) -> (fsub B, A) 00443 return 1; 00444 00445 case ISD::FMUL: 00446 case ISD::FDIV: 00447 if (Options->HonorSignDependentRoundingFPMath()) return 0; 00448 00449 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) 00450 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, 00451 Options, Depth + 1)) 00452 return V; 00453 00454 return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, 00455 Depth + 1); 00456 00457 case ISD::FP_EXTEND: 00458 case ISD::FP_ROUND: 00459 case ISD::FSIN: 00460 return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options, 00461 Depth + 1); 00462 } 00463 } 00464 00465 /// GetNegatedExpression - If isNegatibleForFree returns true, this function 00466 /// returns the newly negated expression. 00467 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, 00468 bool LegalOperations, unsigned Depth = 0) { 00469 // fneg is removable even if it has multiple uses. 00470 if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0); 00471 00472 // Don't allow anything with multiple uses. 00473 assert(Op.hasOneUse() && "Unknown reuse!"); 00474 00475 assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); 00476 switch (Op.getOpcode()) { 00477 default: llvm_unreachable("Unknown code"); 00478 case ISD::ConstantFP: { 00479 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF(); 00480 V.changeSign(); 00481 return DAG.getConstantFP(V, Op.getValueType()); 00482 } 00483 case ISD::FADD: 00484 // FIXME: determine better conditions for this xform. 00485 assert(DAG.getTarget().Options.UnsafeFPMath); 00486 00487 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) 00488 if (isNegatibleForFree(Op.getOperand(0), LegalOperations, 00489 DAG.getTargetLoweringInfo(), 00490 &DAG.getTarget().Options, Depth+1)) 00491 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), 00492 GetNegatedExpression(Op.getOperand(0), DAG, 00493 LegalOperations, Depth+1), 00494 Op.getOperand(1)); 00495 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) 00496 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), 00497 GetNegatedExpression(Op.getOperand(1), DAG, 00498 LegalOperations, Depth+1), 00499 Op.getOperand(0)); 00500 case ISD::FSUB: 00501 // We can't turn -(A-B) into B-A when we honor signed zeros. 00502 assert(DAG.getTarget().Options.UnsafeFPMath); 00503 00504 // fold (fneg (fsub 0, B)) -> B 00505 if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0))) 00506 if (N0CFP->getValueAPF().isZero()) 00507 return Op.getOperand(1); 00508 00509 // fold (fneg (fsub A, B)) -> (fsub B, A) 00510 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), 00511 Op.getOperand(1), Op.getOperand(0)); 00512 00513 case ISD::FMUL: 00514 case ISD::FDIV: 00515 assert(!DAG.getTarget().Options.HonorSignDependentRoundingFPMath()); 00516 00517 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) 00518 if (isNegatibleForFree(Op.getOperand(0), LegalOperations, 00519 DAG.getTargetLoweringInfo(), 00520 &DAG.getTarget().Options, Depth+1)) 00521 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), 00522 GetNegatedExpression(Op.getOperand(0), DAG, 00523 LegalOperations, Depth+1), 00524 Op.getOperand(1)); 00525 00526 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) 00527 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), 00528 Op.getOperand(0), 00529 GetNegatedExpression(Op.getOperand(1), DAG, 00530 LegalOperations, Depth+1)); 00531 00532 case ISD::FP_EXTEND: 00533 case ISD::FSIN: 00534 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), 00535 GetNegatedExpression(Op.getOperand(0), DAG, 00536 LegalOperations, Depth+1)); 00537 case ISD::FP_ROUND: 00538 return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(), 00539 GetNegatedExpression(Op.getOperand(0), DAG, 00540 LegalOperations, Depth+1), 00541 Op.getOperand(1)); 00542 } 00543 } 00544 00545 00546 // isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc 00547 // that selects between the values 1 and 0, making it equivalent to a setcc. 00548 // Also, set the incoming LHS, RHS, and CC references to the appropriate 00549 // nodes based on the type of node we are checking. This simplifies life a 00550 // bit for the callers. 00551 static bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, 00552 SDValue &CC) { 00553 if (N.getOpcode() == ISD::SETCC) { 00554 LHS = N.getOperand(0); 00555 RHS = N.getOperand(1); 00556 CC = N.getOperand(2); 00557 return true; 00558 } 00559 if (N.getOpcode() == ISD::SELECT_CC && 00560 N.getOperand(2).getOpcode() == ISD::Constant && 00561 N.getOperand(3).getOpcode() == ISD::Constant && 00562 cast<ConstantSDNode>(N.getOperand(2))->getAPIntValue() == 1 && 00563 cast<ConstantSDNode>(N.getOperand(3))->isNullValue()) { 00564 LHS = N.getOperand(0); 00565 RHS = N.getOperand(1); 00566 CC = N.getOperand(4); 00567 return true; 00568 } 00569 return false; 00570 } 00571 00572 // isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only 00573 // one use. If this is true, it allows the users to invert the operation for 00574 // free when it is profitable to do so. 00575 static bool isOneUseSetCC(SDValue N) { 00576 SDValue N0, N1, N2; 00577 if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse()) 00578 return true; 00579 return false; 00580 } 00581 00582 SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, 00583 SDValue N0, SDValue N1) { 00584 EVT VT = N0.getValueType(); 00585 if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) { 00586 if (isa<ConstantSDNode>(N1)) { 00587 // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) 00588 SDValue OpNode = 00589 DAG.FoldConstantArithmetic(Opc, VT, 00590 cast<ConstantSDNode>(N0.getOperand(1)), 00591 cast<ConstantSDNode>(N1)); 00592 return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); 00593 } 00594 if (N0.hasOneUse()) { 00595 // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use 00596 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, 00597 N0.getOperand(0), N1); 00598 AddToWorkList(OpNode.getNode()); 00599 return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); 00600 } 00601 } 00602 00603 if (N1.getOpcode() == Opc && isa<ConstantSDNode>(N1.getOperand(1))) { 00604 if (isa<ConstantSDNode>(N0)) { 00605 // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) 00606 SDValue OpNode = 00607 DAG.FoldConstantArithmetic(Opc, VT, 00608 cast<ConstantSDNode>(N1.getOperand(1)), 00609 cast<ConstantSDNode>(N0)); 00610 return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); 00611 } 00612 if (N1.hasOneUse()) { 00613 // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use 00614 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, 00615 N1.getOperand(0), N0); 00616 AddToWorkList(OpNode.getNode()); 00617 return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1)); 00618 } 00619 } 00620 00621 return SDValue(); 00622 } 00623 00624 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, 00625 bool AddTo) { 00626 assert(N->getNumValues() == NumTo && "Broken CombineTo call!"); 00627 ++NodesCombined; 00628 DEBUG(dbgs() << "\nReplacing.1 "; 00629 N->dump(&DAG); 00630 dbgs() << "\nWith: "; 00631 To[0].getNode()->dump(&DAG); 00632 dbgs() << " and " << NumTo-1 << " other values\n"; 00633 for (unsigned i = 0, e = NumTo; i != e; ++i) 00634 assert((!To[i].getNode() || 00635 N->getValueType(i) == To[i].getValueType()) && 00636 "Cannot combine value to value of different type!")); 00637 WorkListRemover DeadNodes(*this); 00638 DAG.ReplaceAllUsesWith(N, To); 00639 if (AddTo) { 00640 // Push the new nodes and any users onto the worklist 00641 for (unsigned i = 0, e = NumTo; i != e; ++i) { 00642 if (To[i].getNode()) { 00643 AddToWorkList(To[i].getNode()); 00644 AddUsersToWorkList(To[i].getNode()); 00645 } 00646 } 00647 } 00648 00649 // Finally, if the node is now dead, remove it from the graph. The node 00650 // may not be dead if the replacement process recursively simplified to 00651 // something else needing this node. 00652 if (N->use_empty()) { 00653 // Nodes can be reintroduced into the worklist. Make sure we do not 00654 // process a node that has been replaced. 00655 removeFromWorkList(N); 00656 00657 // Finally, since the node is now dead, remove it from the graph. 00658 DAG.DeleteNode(N); 00659 } 00660 return SDValue(N, 0); 00661 } 00662 00663 void DAGCombiner:: 00664 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { 00665 // Replace all uses. If any nodes become isomorphic to other nodes and 00666 // are deleted, make sure to remove them from our worklist. 00667 WorkListRemover DeadNodes(*this); 00668 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New); 00669 00670 // Push the new node and any (possibly new) users onto the worklist. 00671 AddToWorkList(TLO.New.getNode()); 00672 AddUsersToWorkList(TLO.New.getNode()); 00673 00674 // Finally, if the node is now dead, remove it from the graph. The node 00675 // may not be dead if the replacement process recursively simplified to 00676 // something else needing this node. 00677 if (TLO.Old.getNode()->use_empty()) { 00678 removeFromWorkList(TLO.Old.getNode()); 00679 00680 // If the operands of this node are only used by the node, they will now 00681 // be dead. Make sure to visit them first to delete dead nodes early. 00682 for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i) 00683 if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse()) 00684 AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode()); 00685 00686 DAG.DeleteNode(TLO.Old.getNode()); 00687 } 00688 } 00689 00690 /// SimplifyDemandedBits - Check the specified integer node value to see if 00691 /// it can be simplified or if things it uses can be simplified by bit 00692 /// propagation. If so, return true. 00693 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { 00694 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); 00695 APInt KnownZero, KnownOne; 00696 if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO)) 00697 return false; 00698 00699 // Revisit the node. 00700 AddToWorkList(Op.getNode()); 00701 00702 // Replace the old value with the new one. 00703 ++NodesCombined; 00704 DEBUG(dbgs() << "\nReplacing.2 "; 00705 TLO.Old.getNode()->dump(&DAG); 00706 dbgs() << "\nWith: "; 00707 TLO.New.getNode()->dump(&DAG); 00708 dbgs() << '\n'); 00709 00710 CommitTargetLoweringOpt(TLO); 00711 return true; 00712 } 00713 00714 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { 00715 SDLoc dl(Load); 00716 EVT VT = Load->getValueType(0); 00717 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0)); 00718 00719 DEBUG(dbgs() << "\nReplacing.9 "; 00720 Load->dump(&DAG); 00721 dbgs() << "\nWith: "; 00722 Trunc.getNode()->dump(&DAG); 00723 dbgs() << '\n'); 00724 WorkListRemover DeadNodes(*this); 00725 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc); 00726 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1)); 00727 removeFromWorkList(Load); 00728 DAG.DeleteNode(Load); 00729 AddToWorkList(Trunc.getNode()); 00730 } 00731 00732 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { 00733 Replace = false; 00734 SDLoc dl(Op); 00735 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { 00736 EVT MemVT = LD->getMemoryVT(); 00737 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) 00738 ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD 00739 : ISD::EXTLOAD) 00740 : LD->getExtensionType(); 00741 Replace = true; 00742 return DAG.getExtLoad(ExtType, dl, PVT, 00743 LD->getChain(), LD->getBasePtr(), 00744 LD->getPointerInfo(), 00745 MemVT, LD->isVolatile(), 00746 LD->isNonTemporal(), LD->getAlignment()); 00747 } 00748 00749 unsigned Opc = Op.getOpcode(); 00750 switch (Opc) { 00751 default: break; 00752 case ISD::AssertSext: 00753 return DAG.getNode(ISD::AssertSext, dl, PVT, 00754 SExtPromoteOperand(Op.getOperand(0), PVT), 00755 Op.getOperand(1)); 00756 case ISD::AssertZext: 00757 return DAG.getNode(ISD::AssertZext, dl, PVT, 00758 ZExtPromoteOperand(Op.getOperand(0), PVT), 00759 Op.getOperand(1)); 00760 case ISD::Constant: { 00761 unsigned ExtOpc = 00762 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 00763 return DAG.getNode(ExtOpc, dl, PVT, Op); 00764 } 00765 } 00766 00767 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT)) 00768 return SDValue(); 00769 return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op); 00770 } 00771 00772 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { 00773 if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT)) 00774 return SDValue(); 00775 EVT OldVT = Op.getValueType(); 00776 SDLoc dl(Op); 00777 bool Replace = false; 00778 SDValue NewOp = PromoteOperand(Op, PVT, Replace); 00779 if (NewOp.getNode() == 0) 00780 return SDValue(); 00781 AddToWorkList(NewOp.getNode()); 00782 00783 if (Replace) 00784 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); 00785 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp, 00786 DAG.getValueType(OldVT)); 00787 } 00788 00789 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { 00790 EVT OldVT = Op.getValueType(); 00791 SDLoc dl(Op); 00792 bool Replace = false; 00793 SDValue NewOp = PromoteOperand(Op, PVT, Replace); 00794 if (NewOp.getNode() == 0) 00795 return SDValue(); 00796 AddToWorkList(NewOp.getNode()); 00797 00798 if (Replace) 00799 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); 00800 return DAG.getZeroExtendInReg(NewOp, dl, OldVT); 00801 } 00802 00803 /// PromoteIntBinOp - Promote the specified integer binary operation if the 00804 /// target indicates it is beneficial. e.g. On x86, it's usually better to 00805 /// promote i16 operations to i32 since i16 instructions are longer. 00806 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { 00807 if (!LegalOperations) 00808 return SDValue(); 00809 00810 EVT VT = Op.getValueType(); 00811 if (VT.isVector() || !VT.isInteger()) 00812 return SDValue(); 00813 00814 // If operation type is 'undesirable', e.g. i16 on x86, consider 00815 // promoting it. 00816 unsigned Opc = Op.getOpcode(); 00817 if (TLI.isTypeDesirableForOp(Opc, VT)) 00818 return SDValue(); 00819 00820 EVT PVT = VT; 00821 // Consult target whether it is a good idea to promote this operation and 00822 // what's the right type to promote it to. 00823 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 00824 assert(PVT != VT && "Don't know what type to promote to!"); 00825 00826 bool Replace0 = false; 00827 SDValue N0 = Op.getOperand(0); 00828 SDValue NN0 = PromoteOperand(N0, PVT, Replace0); 00829 if (NN0.getNode() == 0) 00830 return SDValue(); 00831 00832 bool Replace1 = false; 00833 SDValue N1 = Op.getOperand(1); 00834 SDValue NN1; 00835 if (N0 == N1) 00836 NN1 = NN0; 00837 else { 00838 NN1 = PromoteOperand(N1, PVT, Replace1); 00839 if (NN1.getNode() == 0) 00840 return SDValue(); 00841 } 00842 00843 AddToWorkList(NN0.getNode()); 00844 if (NN1.getNode()) 00845 AddToWorkList(NN1.getNode()); 00846 00847 if (Replace0) 00848 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode()); 00849 if (Replace1) 00850 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode()); 00851 00852 DEBUG(dbgs() << "\nPromoting "; 00853 Op.getNode()->dump(&DAG)); 00854 SDLoc dl(Op); 00855 return DAG.getNode(ISD::TRUNCATE, dl, VT, 00856 DAG.getNode(Opc, dl, PVT, NN0, NN1)); 00857 } 00858 return SDValue(); 00859 } 00860 00861 /// PromoteIntShiftOp - Promote the specified integer shift operation if the 00862 /// target indicates it is beneficial. e.g. On x86, it's usually better to 00863 /// promote i16 operations to i32 since i16 instructions are longer. 00864 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { 00865 if (!LegalOperations) 00866 return SDValue(); 00867 00868 EVT VT = Op.getValueType(); 00869 if (VT.isVector() || !VT.isInteger()) 00870 return SDValue(); 00871 00872 // If operation type is 'undesirable', e.g. i16 on x86, consider 00873 // promoting it. 00874 unsigned Opc = Op.getOpcode(); 00875 if (TLI.isTypeDesirableForOp(Opc, VT)) 00876 return SDValue(); 00877 00878 EVT PVT = VT; 00879 // Consult target whether it is a good idea to promote this operation and 00880 // what's the right type to promote it to. 00881 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 00882 assert(PVT != VT && "Don't know what type to promote to!"); 00883 00884 bool Replace = false; 00885 SDValue N0 = Op.getOperand(0); 00886 if (Opc == ISD::SRA) 00887 N0 = SExtPromoteOperand(Op.getOperand(0), PVT); 00888 else if (Opc == ISD::SRL) 00889 N0 = ZExtPromoteOperand(Op.getOperand(0), PVT); 00890 else 00891 N0 = PromoteOperand(N0, PVT, Replace); 00892 if (N0.getNode() == 0) 00893 return SDValue(); 00894 00895 AddToWorkList(N0.getNode()); 00896 if (Replace) 00897 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode()); 00898 00899 DEBUG(dbgs() << "\nPromoting "; 00900 Op.getNode()->dump(&DAG)); 00901 SDLoc dl(Op); 00902 return DAG.getNode(ISD::TRUNCATE, dl, VT, 00903 DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1))); 00904 } 00905 return SDValue(); 00906 } 00907 00908 SDValue DAGCombiner::PromoteExtend(SDValue Op) { 00909 if (!LegalOperations) 00910 return SDValue(); 00911 00912 EVT VT = Op.getValueType(); 00913 if (VT.isVector() || !VT.isInteger()) 00914 return SDValue(); 00915 00916 // If operation type is 'undesirable', e.g. i16 on x86, consider 00917 // promoting it. 00918 unsigned Opc = Op.getOpcode(); 00919 if (TLI.isTypeDesirableForOp(Opc, VT)) 00920 return SDValue(); 00921 00922 EVT PVT = VT; 00923 // Consult target whether it is a good idea to promote this operation and 00924 // what's the right type to promote it to. 00925 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 00926 assert(PVT != VT && "Don't know what type to promote to!"); 00927 // fold (aext (aext x)) -> (aext x) 00928 // fold (aext (zext x)) -> (zext x) 00929 // fold (aext (sext x)) -> (sext x) 00930 DEBUG(dbgs() << "\nPromoting "; 00931 Op.getNode()->dump(&DAG)); 00932 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0)); 00933 } 00934 return SDValue(); 00935 } 00936 00937 bool DAGCombiner::PromoteLoad(SDValue Op) { 00938 if (!LegalOperations) 00939 return false; 00940 00941 EVT VT = Op.getValueType(); 00942 if (VT.isVector() || !VT.isInteger()) 00943 return false; 00944 00945 // If operation type is 'undesirable', e.g. i16 on x86, consider 00946 // promoting it. 00947 unsigned Opc = Op.getOpcode(); 00948 if (TLI.isTypeDesirableForOp(Opc, VT)) 00949 return false; 00950 00951 EVT PVT = VT; 00952 // Consult target whether it is a good idea to promote this operation and 00953 // what's the right type to promote it to. 00954 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 00955 assert(PVT != VT && "Don't know what type to promote to!"); 00956 00957 SDLoc dl(Op); 00958 SDNode *N = Op.getNode(); 00959 LoadSDNode *LD = cast<LoadSDNode>(N); 00960 EVT MemVT = LD->getMemoryVT(); 00961 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) 00962 ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD 00963 : ISD::EXTLOAD) 00964 : LD->getExtensionType(); 00965 SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT, 00966 LD->getChain(), LD->getBasePtr(), 00967 LD->getPointerInfo(), 00968 MemVT, LD->isVolatile(), 00969 LD->isNonTemporal(), LD->getAlignment()); 00970 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD); 00971 00972 DEBUG(dbgs() << "\nPromoting "; 00973 N->dump(&DAG); 00974 dbgs() << "\nTo: "; 00975 Result.getNode()->dump(&DAG); 00976 dbgs() << '\n'); 00977 WorkListRemover DeadNodes(*this); 00978 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 00979 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1)); 00980 removeFromWorkList(N); 00981 DAG.DeleteNode(N); 00982 AddToWorkList(Result.getNode()); 00983 return true; 00984 } 00985 return false; 00986 } 00987 00988 00989 //===----------------------------------------------------------------------===// 00990 // Main DAG Combiner implementation 00991 //===----------------------------------------------------------------------===// 00992 00993 void DAGCombiner::Run(CombineLevel AtLevel) { 00994 // set the instance variables, so that the various visit routines may use it. 00995 Level = AtLevel; 00996 LegalOperations = Level >= AfterLegalizeVectorOps; 00997 LegalTypes = Level >= AfterLegalizeTypes; 00998 00999 // Add all the dag nodes to the worklist. 01000 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), 01001 E = DAG.allnodes_end(); I != E; ++I) 01002 AddToWorkList(I); 01003 01004 // Create a dummy node (which is not added to allnodes), that adds a reference 01005 // to the root node, preventing it from being deleted, and tracking any 01006 // changes of the root. 01007 HandleSDNode Dummy(DAG.getRoot()); 01008 01009 // The root of the dag may dangle to deleted nodes until the dag combiner is 01010 // done. Set it to null to avoid confusion. 01011 DAG.setRoot(SDValue()); 01012 01013 // while the worklist isn't empty, find a node and 01014 // try and combine it. 01015 while (!WorkListContents.empty()) { 01016 SDNode *N; 01017 // The WorkListOrder holds the SDNodes in order, but it may contain duplicates. 01018 // In order to avoid a linear scan, we use a set (O(log N)) to hold what the 01019 // worklist *should* contain, and check the node we want to visit is should 01020 // actually be visited. 01021 do { 01022 N = WorkListOrder.pop_back_val(); 01023 } while (!WorkListContents.erase(N)); 01024 01025 // If N has no uses, it is dead. Make sure to revisit all N's operands once 01026 // N is deleted from the DAG, since they too may now be dead or may have a 01027 // reduced number of uses, allowing other xforms. 01028 if (N->use_empty() && N != &Dummy) { 01029 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 01030 AddToWorkList(N->getOperand(i).getNode()); 01031 01032 DAG.DeleteNode(N); 01033 continue; 01034 } 01035 01036 SDValue RV = combine(N); 01037 01038 if (RV.getNode() == 0) 01039 continue; 01040 01041 ++NodesCombined; 01042 01043 // If we get back the same node we passed in, rather than a new node or 01044 // zero, we know that the node must have defined multiple values and 01045 // CombineTo was used. Since CombineTo takes care of the worklist 01046 // mechanics for us, we have no work to do in this case. 01047 if (RV.getNode() == N) 01048 continue; 01049 01050 assert(N->getOpcode() != ISD::DELETED_NODE && 01051 RV.getNode()->getOpcode() != ISD::DELETED_NODE && 01052 "Node was deleted but visit returned new node!"); 01053 01054 DEBUG(dbgs() << "\nReplacing.3 "; 01055 N->dump(&DAG); 01056 dbgs() << "\nWith: "; 01057 RV.getNode()->dump(&DAG); 01058 dbgs() << '\n'); 01059 01060 // Transfer debug value. 01061 DAG.TransferDbgValues(SDValue(N, 0), RV); 01062 WorkListRemover DeadNodes(*this); 01063 if (N->getNumValues() == RV.getNode()->getNumValues()) 01064 DAG.ReplaceAllUsesWith(N, RV.getNode()); 01065 else { 01066 assert(N->getValueType(0) == RV.getValueType() && 01067 N->getNumValues() == 1 && "Type mismatch"); 01068 SDValue OpV = RV; 01069 DAG.ReplaceAllUsesWith(N, &OpV); 01070 } 01071 01072 // Push the new node and any users onto the worklist 01073 AddToWorkList(RV.getNode()); 01074 AddUsersToWorkList(RV.getNode()); 01075 01076 // Add any uses of the old node to the worklist in case this node is the 01077 // last one that uses them. They may become dead after this node is 01078 // deleted. 01079 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 01080 AddToWorkList(N->getOperand(i).getNode()); 01081 01082 // Finally, if the node is now dead, remove it from the graph. The node 01083 // may not be dead if the replacement process recursively simplified to 01084 // something else needing this node. 01085 if (N->use_empty()) { 01086 // Nodes can be reintroduced into the worklist. Make sure we do not 01087 // process a node that has been replaced. 01088 removeFromWorkList(N); 01089 01090 // Finally, since the node is now dead, remove it from the graph. 01091 DAG.DeleteNode(N); 01092 } 01093 } 01094 01095 // If the root changed (e.g. it was a dead load, update the root). 01096 DAG.setRoot(Dummy.getValue()); 01097 DAG.RemoveDeadNodes(); 01098 } 01099 01100 SDValue DAGCombiner::visit(SDNode *N) { 01101 switch (N->getOpcode()) { 01102 default: break; 01103 case ISD::TokenFactor: return visitTokenFactor(N); 01104 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N); 01105 case ISD::ADD: return visitADD(N); 01106 case ISD::SUB: return visitSUB(N); 01107 case ISD::ADDC: return visitADDC(N); 01108 case ISD::SUBC: return visitSUBC(N); 01109 case ISD::ADDE: return visitADDE(N); 01110 case ISD::SUBE: return visitSUBE(N); 01111 case ISD::MUL: return visitMUL(N); 01112 case ISD::SDIV: return visitSDIV(N); 01113 case ISD::UDIV: return visitUDIV(N); 01114 case ISD::SREM: return visitSREM(N); 01115 case ISD::UREM: return visitUREM(N); 01116 case ISD::MULHU: return visitMULHU(N); 01117 case ISD::MULHS: return visitMULHS(N); 01118 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N); 01119 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N); 01120 case ISD::SMULO: return visitSMULO(N); 01121 case ISD::UMULO: return visitUMULO(N); 01122 case ISD::SDIVREM: return visitSDIVREM(N); 01123 case ISD::UDIVREM: return visitUDIVREM(N); 01124 case ISD::AND: return visitAND(N); 01125 case ISD::OR: return visitOR(N); 01126 case ISD::XOR: return visitXOR(N); 01127 case ISD::SHL: return visitSHL(N); 01128 case ISD::SRA: return visitSRA(N); 01129 case ISD::SRL: return visitSRL(N); 01130 case ISD::CTLZ: return visitCTLZ(N); 01131 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N); 01132 case ISD::CTTZ: return visitCTTZ(N); 01133 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N); 01134 case ISD::CTPOP: return visitCTPOP(N); 01135 case ISD::SELECT: return visitSELECT(N); 01136 case ISD::VSELECT: return visitVSELECT(N); 01137 case ISD::SELECT_CC: return visitSELECT_CC(N); 01138 case ISD::SETCC: return visitSETCC(N); 01139 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); 01140 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); 01141 case ISD::ANY_EXTEND: return visitANY_EXTEND(N); 01142 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); 01143 case ISD::TRUNCATE: return visitTRUNCATE(N); 01144 case ISD::BITCAST: return visitBITCAST(N); 01145 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); 01146 case ISD::FADD: return visitFADD(N); 01147 case ISD::FSUB: return visitFSUB(N); 01148 case ISD::FMUL: return visitFMUL(N); 01149 case ISD::FMA: return visitFMA(N); 01150 case ISD::FDIV: return visitFDIV(N); 01151 case ISD::FREM: return visitFREM(N); 01152 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N); 01153 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N); 01154 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N); 01155 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N); 01156 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N); 01157 case ISD::FP_ROUND: return visitFP_ROUND(N); 01158 case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N); 01159 case ISD::FP_EXTEND: return visitFP_EXTEND(N); 01160 case ISD::FNEG: return visitFNEG(N); 01161 case ISD::FABS: return visitFABS(N); 01162 case ISD::FFLOOR: return visitFFLOOR(N); 01163 case ISD::FCEIL: return visitFCEIL(N); 01164 case ISD::FTRUNC: return visitFTRUNC(N); 01165 case ISD::BRCOND: return visitBRCOND(N); 01166 case ISD::BR_CC: return visitBR_CC(N); 01167 case ISD::LOAD: return visitLOAD(N); 01168 case ISD::STORE: return visitSTORE(N); 01169 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N); 01170 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N); 01171 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); 01172 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); 01173 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); 01174 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); 01175 } 01176 return SDValue(); 01177 } 01178 01179 SDValue DAGCombiner::combine(SDNode *N) { 01180 SDValue RV = visit(N); 01181 01182 // If nothing happened, try a target-specific DAG combine. 01183 if (RV.getNode() == 0) { 01184 assert(N->getOpcode() != ISD::DELETED_NODE && 01185 "Node was deleted but visit returned NULL!"); 01186 01187 if (N->getOpcode() >= ISD::BUILTIN_OP_END || 01188 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) { 01189 01190 // Expose the DAG combiner to the target combiner impls. 01191 TargetLowering::DAGCombinerInfo 01192 DagCombineInfo(DAG, Level, false, this); 01193 01194 RV = TLI.PerformDAGCombine(N, DagCombineInfo); 01195 } 01196 } 01197 01198 // If nothing happened still, try promoting the operation. 01199 if (RV.getNode() == 0) { 01200 switch (N->getOpcode()) { 01201 default: break; 01202 case ISD::ADD: 01203 case ISD::SUB: 01204 case ISD::MUL: 01205 case ISD::AND: 01206 case ISD::OR: 01207 case ISD::XOR: 01208 RV = PromoteIntBinOp(SDValue(N, 0)); 01209 break; 01210 case ISD::SHL: 01211 case ISD::SRA: 01212 case ISD::SRL: 01213 RV = PromoteIntShiftOp(SDValue(N, 0)); 01214 break; 01215 case ISD::SIGN_EXTEND: 01216 case ISD::ZERO_EXTEND: 01217 case ISD::ANY_EXTEND: 01218 RV = PromoteExtend(SDValue(N, 0)); 01219 break; 01220 case ISD::LOAD: 01221 if (PromoteLoad(SDValue(N, 0))) 01222 RV = SDValue(N, 0); 01223 break; 01224 } 01225 } 01226 01227 // If N is a commutative binary node, try commuting it to enable more 01228 // sdisel CSE. 01229 if (RV.getNode() == 0 && 01230 SelectionDAG::isCommutativeBinOp(N->getOpcode()) && 01231 N->getNumValues() == 1) { 01232 SDValue N0 = N->getOperand(0); 01233 SDValue N1 = N->getOperand(1); 01234 01235 // Constant operands are canonicalized to RHS. 01236 if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) { 01237 SDValue Ops[] = { N1, N0 }; 01238 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), 01239 Ops, 2); 01240 if (CSENode) 01241 return SDValue(CSENode, 0); 01242 } 01243 } 01244 01245 return RV; 01246 } 01247 01248 /// getInputChainForNode - Given a node, return its input chain if it has one, 01249 /// otherwise return a null sd operand. 01250 static SDValue getInputChainForNode(SDNode *N) { 01251 if (unsigned NumOps = N->getNumOperands()) { 01252 if (N->getOperand(0).getValueType() == MVT::Other) 01253 return N->getOperand(0); 01254 else if (N->getOperand(NumOps-1).getValueType() == MVT::Other) 01255 return N->getOperand(NumOps-1); 01256 for (unsigned i = 1; i < NumOps-1; ++i) 01257 if (N->getOperand(i).getValueType() == MVT::Other) 01258 return N->getOperand(i); 01259 } 01260 return SDValue(); 01261 } 01262 01263 SDValue DAGCombiner::visitTokenFactor(SDNode *N) { 01264 // If N has two operands, where one has an input chain equal to the other, 01265 // the 'other' chain is redundant. 01266 if (N->getNumOperands() == 2) { 01267 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1)) 01268 return N->getOperand(0); 01269 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0)) 01270 return N->getOperand(1); 01271 } 01272 01273 SmallVector<SDNode *, 8> TFs; // List of token factors to visit. 01274 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor. 01275 SmallPtrSet<SDNode*, 16> SeenOps; 01276 bool Changed = false; // If we should replace this token factor. 01277 01278 // Start out with this token factor. 01279 TFs.push_back(N); 01280 01281 // Iterate through token factors. The TFs grows when new token factors are 01282 // encountered. 01283 for (unsigned i = 0; i < TFs.size(); ++i) { 01284 SDNode *TF = TFs[i]; 01285 01286 // Check each of the operands. 01287 for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) { 01288 SDValue Op = TF->getOperand(i); 01289 01290 switch (Op.getOpcode()) { 01291 case ISD::EntryToken: 01292 // Entry tokens don't need to be added to the list. They are 01293 // rededundant. 01294 Changed = true; 01295 break; 01296 01297 case ISD::TokenFactor: 01298 if (Op.hasOneUse() && 01299 std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) { 01300 // Queue up for processing. 01301 TFs.push_back(Op.getNode()); 01302 // Clean up in case the token factor is removed. 01303 AddToWorkList(Op.getNode()); 01304 Changed = true; 01305 break; 01306 } 01307 // Fall thru 01308 01309 default: 01310 // Only add if it isn't already in the list. 01311 if (SeenOps.insert(Op.getNode())) 01312 Ops.push_back(Op); 01313 else 01314 Changed = true; 01315 break; 01316 } 01317 } 01318 } 01319 01320 SDValue Result; 01321 01322 // If we've change things around then replace token factor. 01323 if (Changed) { 01324 if (Ops.empty()) { 01325 // The entry token is the only possible outcome. 01326 Result = DAG.getEntryNode(); 01327 } else { 01328 // New and improved token factor. 01329 Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), 01330 MVT::Other, &Ops[0], Ops.size()); 01331 } 01332 01333 // Don't add users to work list. 01334 return CombineTo(N, Result, false); 01335 } 01336 01337 return Result; 01338 } 01339 01340 /// MERGE_VALUES can always be eliminated. 01341 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { 01342 WorkListRemover DeadNodes(*this); 01343 // Replacing results may cause a different MERGE_VALUES to suddenly 01344 // be CSE'd with N, and carry its uses with it. Iterate until no 01345 // uses remain, to ensure that the node can be safely deleted. 01346 // First add the users of this node to the work list so that they 01347 // can be tried again once they have new operands. 01348 AddUsersToWorkList(N); 01349 do { 01350 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 01351 DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i)); 01352 } while (!N->use_empty()); 01353 removeFromWorkList(N); 01354 DAG.DeleteNode(N); 01355 return SDValue(N, 0); // Return N so it doesn't get rechecked! 01356 } 01357 01358 static 01359 SDValue combineShlAddConstant(SDLoc DL, SDValue N0, SDValue N1, 01360 SelectionDAG &DAG) { 01361 EVT VT = N0.getValueType(); 01362 SDValue N00 = N0.getOperand(0); 01363 SDValue N01 = N0.getOperand(1); 01364 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01); 01365 01366 if (N01C && N00.getOpcode() == ISD::ADD && N00.getNode()->hasOneUse() && 01367 isa<ConstantSDNode>(N00.getOperand(1))) { 01368 // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), ) 01369 N0 = DAG.getNode(ISD::ADD, SDLoc(N0), VT, 01370 DAG.getNode(ISD::SHL, SDLoc(N00), VT, 01371 N00.getOperand(0), N01), 01372 DAG.getNode(ISD::SHL, SDLoc(N01), VT, 01373 N00.getOperand(1), N01)); 01374 return DAG.getNode(ISD::ADD, DL, VT, N0, N1); 01375 } 01376 01377 return SDValue(); 01378 } 01379 01380 SDValue DAGCombiner::visitADD(SDNode *N) { 01381 SDValue N0 = N->getOperand(0); 01382 SDValue N1 = N->getOperand(1); 01383 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 01384 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 01385 EVT VT = N0.getValueType(); 01386 01387 // fold vector ops 01388 if (VT.isVector()) { 01389 SDValue FoldedVOp = SimplifyVBinOp(N); 01390 if (FoldedVOp.getNode()) return FoldedVOp; 01391 01392 // fold (add x, 0) -> x, vector edition 01393 if (ISD::isBuildVectorAllZeros(N1.getNode())) 01394 return N0; 01395 if (ISD::isBuildVectorAllZeros(N0.getNode())) 01396 return N1; 01397 } 01398 01399 // fold (add x, undef) -> undef 01400 if (N0.getOpcode() == ISD::UNDEF) 01401 return N0; 01402 if (N1.getOpcode() == ISD::UNDEF) 01403 return N1; 01404 // fold (add c1, c2) -> c1+c2 01405 if (N0C && N1C) 01406 return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C); 01407 // canonicalize constant to RHS 01408 if (N0C && !N1C) 01409 return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0); 01410 // fold (add x, 0) -> x 01411 if (N1C && N1C->isNullValue()) 01412 return N0; 01413 // fold (add Sym, c) -> Sym+c 01414 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) 01415 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C && 01416 GA->getOpcode() == ISD::GlobalAddress) 01417 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, 01418 GA->getOffset() + 01419 (uint64_t)N1C->getSExtValue()); 01420 // fold ((c1-A)+c2) -> (c1+c2)-A 01421 if (N1C && N0.getOpcode() == ISD::SUB) 01422 if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0))) 01423 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 01424 DAG.getConstant(N1C->getAPIntValue()+ 01425 N0C->getAPIntValue(), VT), 01426 N0.getOperand(1)); 01427 // reassociate add 01428 SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1); 01429 if (RADD.getNode() != 0) 01430 return RADD; 01431 // fold ((0-A) + B) -> B-A 01432 if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) && 01433 cast<ConstantSDNode>(N0.getOperand(0))->isNullValue()) 01434 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1)); 01435 // fold (A + (0-B)) -> A-B 01436 if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) && 01437 cast<ConstantSDNode>(N1.getOperand(0))->isNullValue()) 01438 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1)); 01439 // fold (A+(B-A)) -> B 01440 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1)) 01441 return N1.getOperand(0); 01442 // fold ((B-A)+A) -> B 01443 if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1)) 01444 return N0.getOperand(0); 01445 // fold (A+(B-(A+C))) to (B-C) 01446 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && 01447 N0 == N1.getOperand(1).getOperand(0)) 01448 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0), 01449 N1.getOperand(1).getOperand(1)); 01450 // fold (A+(B-(C+A))) to (B-C) 01451 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && 01452 N0 == N1.getOperand(1).getOperand(1)) 01453 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0), 01454 N1.getOperand(1).getOperand(0)); 01455 // fold (A+((B-A)+or-C)) to (B+or-C) 01456 if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) && 01457 N1.getOperand(0).getOpcode() == ISD::SUB && 01458 N0 == N1.getOperand(0).getOperand(1)) 01459 return DAG.getNode(N1.getOpcode(), SDLoc(N), VT, 01460 N1.getOperand(0).getOperand(0), N1.getOperand(1)); 01461 01462 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant 01463 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) { 01464 SDValue N00 = N0.getOperand(0); 01465 SDValue N01 = N0.getOperand(1); 01466 SDValue N10 = N1.getOperand(0); 01467 SDValue N11 = N1.getOperand(1); 01468 01469 if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10)) 01470 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 01471 DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10), 01472 DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11)); 01473 } 01474 01475 if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) 01476 return SDValue(N, 0); 01477 01478 // fold (a+b) -> (a|b) iff a and b share no bits. 01479 if (VT.isInteger() && !VT.isVector()) { 01480 APInt LHSZero, LHSOne; 01481 APInt RHSZero, RHSOne; 01482 DAG.ComputeMaskedBits(N0, LHSZero, LHSOne); 01483 01484 if (LHSZero.getBoolValue()) { 01485 DAG.ComputeMaskedBits(N1, RHSZero, RHSOne); 01486 01487 // If all possibly-set bits on the LHS are clear on the RHS, return an OR. 01488 // If all possibly-set bits on the RHS are clear on the LHS, return an OR. 01489 if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) 01490 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1); 01491 } 01492 } 01493 01494 // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), ) 01495 if (N0.getOpcode() == ISD::SHL && N0.getNode()->hasOneUse()) { 01496 SDValue Result = combineShlAddConstant(SDLoc(N), N0, N1, DAG); 01497 if (Result.getNode()) return Result; 01498 } 01499 if (N1.getOpcode() == ISD::SHL && N1.getNode()->hasOneUse()) { 01500 SDValue Result = combineShlAddConstant(SDLoc(N), N1, N0, DAG); 01501 if (Result.getNode()) return Result; 01502 } 01503 01504 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) 01505 if (N1.getOpcode() == ISD::SHL && 01506 N1.getOperand(0).getOpcode() == ISD::SUB) 01507 if (ConstantSDNode *C = 01508 dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(0))) 01509 if (C->getAPIntValue() == 0) 01510 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, 01511 DAG.getNode(ISD::SHL, SDLoc(N), VT, 01512 N1.getOperand(0).getOperand(1), 01513 N1.getOperand(1))); 01514 if (N0.getOpcode() == ISD::SHL && 01515 N0.getOperand(0).getOpcode() == ISD::SUB) 01516 if (ConstantSDNode *C = 01517 dyn_cast<ConstantSDNode>(N0.getOperand(0).getOperand(0))) 01518 if (C->getAPIntValue() == 0) 01519 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, 01520 DAG.getNode(ISD::SHL, SDLoc(N), VT, 01521 N0.getOperand(0).getOperand(1), 01522 N0.getOperand(1))); 01523 01524 if (N1.getOpcode() == ISD::AND) { 01525 SDValue AndOp0 = N1.getOperand(0); 01526 ConstantSDNode *AndOp1 = dyn_cast<ConstantSDNode>(N1->getOperand(1)); 01527 unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0); 01528 unsigned DestBits = VT.getScalarType().getSizeInBits(); 01529 01530 // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x)) 01531 // and similar xforms where the inner op is either ~0 or 0. 01532 if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) { 01533 SDLoc DL(N); 01534 return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0); 01535 } 01536 } 01537 01538 // add (sext i1), X -> sub X, (zext i1) 01539 if (N0.getOpcode() == ISD::SIGN_EXTEND && 01540 N0.getOperand(0).getValueType() == MVT::i1 && 01541 !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) { 01542 SDLoc DL(N); 01543 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); 01544 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); 01545 } 01546 01547 return SDValue(); 01548 } 01549 01550 SDValue DAGCombiner::visitADDC(SDNode *N) { 01551 SDValue N0 = N->getOperand(0); 01552 SDValue N1 = N->getOperand(1); 01553 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 01554 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 01555 EVT VT = N0.getValueType(); 01556 01557 // If the flag result is dead, turn this into an ADD. 01558 if (!N->hasAnyUseOfValue(1)) 01559 return CombineTo(N, DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N1), 01560 DAG.getNode(ISD::CARRY_FALSE, 01561 SDLoc(N), MVT::Glue)); 01562 01563 // canonicalize constant to RHS. 01564 if (N0C && !N1C) 01565 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0); 01566 01567 // fold (addc x, 0) -> x + no carry out 01568 if (N1C && N1C->isNullValue()) 01569 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, 01570 SDLoc(N), MVT::Glue)); 01571 01572 // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits. 01573 APInt LHSZero, LHSOne; 01574 APInt RHSZero, RHSOne; 01575 DAG.ComputeMaskedBits(N0, LHSZero, LHSOne); 01576 01577 if (LHSZero.getBoolValue()) { 01578 DAG.ComputeMaskedBits(N1, RHSZero, RHSOne); 01579 01580 // If all possibly-set bits on the LHS are clear on the RHS, return an OR. 01581 // If all possibly-set bits on the RHS are clear on the LHS, return an OR. 01582 if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) 01583 return CombineTo(N, DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1), 01584 DAG.getNode(ISD::CARRY_FALSE, 01585 SDLoc(N), MVT::Glue)); 01586 } 01587 01588 return SDValue(); 01589 } 01590 01591 SDValue DAGCombiner::visitADDE(SDNode *N) { 01592 SDValue N0 = N->getOperand(0); 01593 SDValue N1 = N->getOperand(1); 01594 SDValue CarryIn = N->getOperand(2); 01595 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 01596 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 01597 01598 // canonicalize constant to RHS 01599 if (N0C && !N1C) 01600 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(), 01601 N1, N0, CarryIn); 01602 01603 // fold (adde x, y, false) -> (addc x, y) 01604 if (CarryIn.getOpcode() == ISD::CARRY_FALSE) 01605 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1); 01606 01607 return SDValue(); 01608 } 01609 01610 // Since it may not be valid to emit a fold to zero for vector initializers 01611 // check if we can before folding. 01612 static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT, 01613 SelectionDAG &DAG, bool LegalOperations) { 01614 if (!VT.isVector()) { 01615 return DAG.getConstant(0, VT); 01616 } 01617 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) { 01618 // Produce a vector of zeros. 01619 SDValue El = DAG.getConstant(0, VT.getVectorElementType()); 01620 std::vector<SDValue> Ops(VT.getVectorNumElements(), El); 01621 return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, 01622 &Ops[0], Ops.size()); 01623 } 01624 return SDValue(); 01625 } 01626 01627 SDValue DAGCombiner::visitSUB(SDNode *N) { 01628 SDValue N0 = N->getOperand(0); 01629 SDValue N1 = N->getOperand(1); 01630 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); 01631 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); 01632 ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? 0 : 01633 dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode()); 01634 EVT VT = N0.getValueType(); 01635 01636 // fold vector ops 01637 if (VT.isVector()) { 01638 SDValue FoldedVOp = SimplifyVBinOp(N); 01639 if (FoldedVOp.getNode()) return FoldedVOp; 01640 01641 // fold (sub x, 0) -> x, vector edition 01642 if (ISD::isBuildVectorAllZeros(N1.getNode())) 01643 return N0; 01644 } 01645 01646 // fold (sub x, x) -> 0 01647 // FIXME: Refactor this and xor and other similar operations together. 01648 if (N0 == N1) 01649 return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations); 01650 // fold (sub c1, c2) -> c1-c2 01651 if (N0C && N1C) 01652 return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C); 01653 // fold (sub x, c) -> (add x, -c) 01654 if (N1C) 01655 return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, 01656 DAG.getConstant(-N1C->getAPIntValue(), VT)); 01657 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) 01658 if (N0C && N0C->isAllOnesValue()) 01659 return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); 01660 // fold A-(A-B) -> B 01661 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0)) 01662 return N1.getOperand(1); 01663 // fold (A+B)-A -> B 01664 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1) 01665 return N0.getOperand(1); 01666 // fold (A+B)-B -> A 01667 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1) 01668 return N0.getOperand(0); 01669 // fold C2-(A+C1) -> (C2-C1)-A 01670 if (N1.getOpcode() == ISD::ADD && N0C && N1C1) { 01671 SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(), 01672 VT); 01673 return DAG.getNode(ISD::SUB, SDLoc(N), VT, NewC, 01674 N1.getOperand(0)); 01675 } 01676 // fold ((A+(B+or-C))-B) -> A+or-C 01677 if (N0.getOpcode() == ISD::ADD && 01678 (N0.getOperand(1).getOpcode() == ISD::SUB || 01679 N0.getOperand(1).getOpcode() == ISD::ADD) && 01680 N0.getOperand(1).getOperand(0) == N1) 01681 return DAG.getNode(N0.getOperand(1).getOpcode(), SDLoc(N), VT, 01682 N0.getOperand(0), N0.getOperand(1).getOperand(1)); 01683 // fold ((A+(C+B))-B) -> A+C 01684 if (N0.getOpcode() == ISD::ADD && 01685 N0.getOperand(1).getOpcode() == ISD::ADD && 01686 N0.getOperand(1).getOperand(1) == N1) 01687 return DAG.getNode(ISD::ADD, SDLoc(N), VT, 01688 N0.getOperand(0), N0.getOperand(1).getOperand(0)); 01689 // fold ((A-(B-C))-C) -> A-B 01690 if (N0.getOpcode() == ISD::SUB && 01691 N0.getOperand(1).getOpcode() == ISD::SUB && 01692 N0.getOperand(1).getOperand(1) == N1) 01693 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 01694 N0.getOperand(0), N0.getOperand(1).getOperand(0)); 01695 01696 // If either operand of a sub is undef, the result is undef 01697 if (N0.getOpcode() == ISD::UNDEF) 01698 return N0; 01699 if (N1.getOpcode() == ISD::UNDEF) 01700 return N1; 01701 01702 // If the relocation model supports it, consider symbol offsets. 01703 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) 01704 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { 01705 // fold (sub Sym, c) -> Sym-c 01706 if (N1C && GA->getOpcode() == ISD::GlobalAddress) 01707 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, 01708 GA->getOffset() - 01709 (uint64_t)N1C->getSExtValue()); 01710 // fold (sub Sym+c1, Sym+c2) -> c1-c2 01711 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1)) 01712 if (GA->getGlobal() == GB->getGlobal()) 01713 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(), 01714 VT); 01715 } 01716 01717 return SDValue(); 01718 } 01719 01720 SDValue DAGCombiner::visitSUBC(SDNode *N) { 01721 SDValue N0 = N->getOperand(0); 01722 SDValue N1 = N->getOperand(1); 01723 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 01724 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 01725 EVT VT = N0.getValueType(); 01726 01727 // If the flag result is dead, turn this into an SUB. 01728 if (!N->hasAnyUseOfValue(1)) 01729 return CombineTo(N, DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1), 01730 DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), 01731 MVT::Glue)); 01732 01733 // fold (subc x, x) -> 0 + no borrow 01734 if (N0 == N1) 01735 return CombineTo(N, DAG.getConstant(0, VT), 01736 DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), 01737 MVT::Glue)); 01738 01739 // fold (subc x, 0) -> x + no borrow 01740 if (N1C && N1C->isNullValue()) 01741 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), 01742 MVT::Glue)); 01743 01744 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow 01745 if (N0C && N0C->isAllOnesValue()) 01746 return CombineTo(N, DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0), 01747 DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), 01748 MVT::Glue)); 01749 01750 return SDValue(); 01751 } 01752 01753 SDValue DAGCombiner::visitSUBE(SDNode *N) { 01754 SDValue N0 = N->getOperand(0); 01755 SDValue N1 = N->getOperand(1); 01756 SDValue CarryIn = N->getOperand(2); 01757 01758 // fold (sube x, y, false) -> (subc x, y) 01759 if (CarryIn.getOpcode() == ISD::CARRY_FALSE) 01760 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1); 01761 01762 return SDValue(); 01763 } 01764 01765 SDValue DAGCombiner::visitMUL(SDNode *N) { 01766 SDValue N0 = N->getOperand(0); 01767 SDValue N1 = N->getOperand(1); 01768 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 01769 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 01770 EVT VT = N0.getValueType(); 01771 01772 // fold vector ops 01773 if (VT.isVector()) { 01774 SDValue FoldedVOp = SimplifyVBinOp(N); 01775 if (FoldedVOp.getNode()) return FoldedVOp; 01776 } 01777 01778 // fold (mul x, undef) -> 0 01779 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 01780 return DAG.getConstant(0, VT); 01781 // fold (mul c1, c2) -> c1*c2 01782 if (N0C && N1C) 01783 return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0C, N1C); 01784 // canonicalize constant to RHS 01785 if (N0C && !N1C) 01786 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); 01787 // fold (mul x, 0) -> 0 01788 if (N1C && N1C->isNullValue()) 01789 return N1; 01790 // fold (mul x, -1) -> 0-x 01791 if (N1C && N1C->isAllOnesValue()) 01792 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 01793 DAG.getConstant(0, VT), N0); 01794 // fold (mul x, (1 << c)) -> x << c 01795 if (N1C && N1C->getAPIntValue().isPowerOf2()) 01796 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, 01797 DAG.getConstant(N1C->getAPIntValue().logBase2(), 01798 getShiftAmountTy(N0.getValueType()))); 01799 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c 01800 if (N1C && (-N1C->getAPIntValue()).isPowerOf2()) { 01801 unsigned Log2Val = (-N1C->getAPIntValue()).logBase2(); 01802 // FIXME: If the input is something that is easily negated (e.g. a 01803 // single-use add), we should put the negate there. 01804 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 01805 DAG.getConstant(0, VT), 01806 DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, 01807 DAG.getConstant(Log2Val, 01808 getShiftAmountTy(N0.getValueType())))); 01809 } 01810 // (mul (shl X, c1), c2) -> (mul X, c2 << c1) 01811 if (N1C && N0.getOpcode() == ISD::SHL && 01812 isa<ConstantSDNode>(N0.getOperand(1))) { 01813 SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, 01814 N1, N0.getOperand(1)); 01815 AddToWorkList(C3.getNode()); 01816 return DAG.getNode(ISD::MUL, SDLoc(N), VT, 01817 N0.getOperand(0), C3); 01818 } 01819 01820 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one 01821 // use. 01822 { 01823 SDValue Sh(0,0), Y(0,0); 01824 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). 01825 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) && 01826 N0.getNode()->hasOneUse()) { 01827 Sh = N0; Y = N1; 01828 } else if (N1.getOpcode() == ISD::SHL && 01829 isa<ConstantSDNode>(N1.getOperand(1)) && 01830 N1.getNode()->hasOneUse()) { 01831 Sh = N1; Y = N0; 01832 } 01833 01834 if (Sh.getNode()) { 01835 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, 01836 Sh.getOperand(0), Y); 01837 return DAG.getNode(ISD::SHL, SDLoc(N), VT, 01838 Mul, Sh.getOperand(1)); 01839 } 01840 } 01841 01842 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) 01843 if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && 01844 isa<ConstantSDNode>(N0.getOperand(1))) 01845 return DAG.getNode(ISD::ADD, SDLoc(N), VT, 01846 DAG.getNode(ISD::MUL, SDLoc(N0), VT, 01847 N0.getOperand(0), N1), 01848 DAG.getNode(ISD::MUL, SDLoc(N1), VT, 01849 N0.getOperand(1), N1)); 01850 01851 // reassociate mul 01852 SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1); 01853 if (RMUL.getNode() != 0) 01854 return RMUL; 01855 01856 return SDValue(); 01857 } 01858 01859 SDValue DAGCombiner::visitSDIV(SDNode *N) { 01860 SDValue N0 = N->getOperand(0); 01861 SDValue N1 = N->getOperand(1); 01862 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); 01863 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); 01864 EVT VT = N->getValueType(0); 01865 01866 // fold vector ops 01867 if (VT.isVector()) { 01868 SDValue FoldedVOp = SimplifyVBinOp(N); 01869 if (FoldedVOp.getNode()) return FoldedVOp; 01870 } 01871 01872 // fold (sdiv c1, c2) -> c1/c2 01873 if (N0C && N1C && !N1C->isNullValue()) 01874 return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C); 01875 // fold (sdiv X, 1) -> X 01876 if (N1C && N1C->getAPIntValue() == 1LL) 01877 return N0; 01878 // fold (sdiv X, -1) -> 0-X 01879 if (N1C && N1C->isAllOnesValue()) 01880 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 01881 DAG.getConstant(0, VT), N0); 01882 // If we know the sign bits of both operands are zero, strength reduce to a 01883 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 01884 if (!VT.isVector()) { 01885 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) 01886 return DAG.getNode(ISD::UDIV, SDLoc(N), N1.getValueType(), 01887 N0, N1); 01888 } 01889 // fold (sdiv X, pow2) -> simple ops after legalize 01890 if (N1C && !N1C->isNullValue() && 01891 (N1C->getAPIntValue().isPowerOf2() || 01892 (-N1C->getAPIntValue()).isPowerOf2())) { 01893 // If dividing by powers of two is cheap, then don't perform the following 01894 // fold. 01895 if (TLI.isPow2DivCheap()) 01896 return SDValue(); 01897 01898 unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); 01899 01900 // Splat the sign bit into the register 01901 SDValue SGN = DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, 01902 DAG.getConstant(VT.getSizeInBits()-1, 01903 getShiftAmountTy(N0.getValueType()))); 01904 AddToWorkList(SGN.getNode()); 01905 01906 // Add (N0 < 0) ? abs2 - 1 : 0; 01907 SDValue SRL = DAG.getNode(ISD::SRL, SDLoc(N), VT, SGN, 01908 DAG.getConstant(VT.getSizeInBits() - lg2, 01909 getShiftAmountTy(SGN.getValueType()))); 01910 SDValue ADD = DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, SRL); 01911 AddToWorkList(SRL.getNode()); 01912 AddToWorkList(ADD.getNode()); // Divide by pow2 01913 SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), VT, ADD, 01914 DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType()))); 01915 01916 // If we're dividing by a positive value, we're done. Otherwise, we must 01917 // negate the result. 01918 if (N1C->getAPIntValue().isNonNegative()) 01919 return SRA; 01920 01921 AddToWorkList(SRA.getNode()); 01922 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 01923 DAG.getConstant(0, VT), SRA); 01924 } 01925 01926 // if integer divide is expensive and we satisfy the requirements, emit an 01927 // alternate sequence. 01928 if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) { 01929 SDValue Op = BuildSDIV(N); 01930 if (Op.getNode()) return Op; 01931 } 01932 01933 // undef / X -> 0 01934 if (N0.getOpcode() == ISD::UNDEF) 01935 return DAG.getConstant(0, VT); 01936 // X / undef -> undef 01937 if (N1.getOpcode() == ISD::UNDEF) 01938 return N1; 01939 01940 return SDValue(); 01941 } 01942 01943 SDValue DAGCombiner::visitUDIV(SDNode *N) { 01944 SDValue N0 = N->getOperand(0); 01945 SDValue N1 = N->getOperand(1); 01946 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); 01947 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); 01948 EVT VT = N->getValueType(0); 01949 01950 // fold vector ops 01951 if (VT.isVector()) { 01952 SDValue FoldedVOp = SimplifyVBinOp(N); 01953 if (FoldedVOp.getNode()) return FoldedVOp; 01954 } 01955 01956 // fold (udiv c1, c2) -> c1/c2 01957 if (N0C && N1C && !N1C->isNullValue()) 01958 return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C); 01959 // fold (udiv x, (1 << c)) -> x >>u c 01960 if (N1C && N1C->getAPIntValue().isPowerOf2()) 01961 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, 01962 DAG.getConstant(N1C->getAPIntValue().logBase2(), 01963 getShiftAmountTy(N0.getValueType()))); 01964 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 01965 if (N1.getOpcode() == ISD::SHL) { 01966 if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { 01967 if (SHC->getAPIntValue().isPowerOf2()) { 01968 EVT ADDVT = N1.getOperand(1).getValueType(); 01969 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N), ADDVT, 01970 N1.getOperand(1), 01971 DAG.getConstant(SHC->getAPIntValue() 01972 .logBase2(), 01973 ADDVT)); 01974 AddToWorkList(Add.getNode()); 01975 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, Add); 01976 } 01977 } 01978 } 01979 // fold (udiv x, c) -> alternate 01980 if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) { 01981 SDValue Op = BuildUDIV(N); 01982 if (Op.getNode()) return Op; 01983 } 01984 01985 // undef / X -> 0 01986 if (N0.getOpcode() == ISD::UNDEF) 01987 return DAG.getConstant(0, VT); 01988 // X / undef -> undef 01989 if (N1.getOpcode() == ISD::UNDEF) 01990 return N1; 01991 01992 return SDValue(); 01993 } 01994 01995 SDValue DAGCombiner::visitSREM(SDNode *N) { 01996 SDValue N0 = N->getOperand(0); 01997 SDValue N1 = N->getOperand(1); 01998 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 01999 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 02000 EVT VT = N->getValueType(0); 02001 02002 // fold (srem c1, c2) -> c1%c2 02003 if (N0C && N1C && !N1C->isNullValue()) 02004 return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C); 02005 // If we know the sign bits of both operands are zero, strength reduce to a 02006 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 02007 if (!VT.isVector()) { 02008 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) 02009 return DAG.getNode(ISD::UREM, SDLoc(N), VT, N0, N1); 02010 } 02011 02012 // If X/C can be simplified by the division-by-constant logic, lower 02013 // X%C to the equivalent of X-X/C*C. 02014 if (N1C && !N1C->isNullValue()) { 02015 SDValue Div = DAG.getNode(ISD::SDIV, SDLoc(N), VT, N0, N1); 02016 AddToWorkList(Div.getNode()); 02017 SDValue OptimizedDiv = combine(Div.getNode()); 02018 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { 02019 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, 02020 OptimizedDiv, N1); 02021 SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul); 02022 AddToWorkList(Mul.getNode()); 02023 return Sub; 02024 } 02025 } 02026 02027 // undef % X -> 0 02028 if (N0.getOpcode() == ISD::UNDEF) 02029 return DAG.getConstant(0, VT); 02030 // X % undef -> undef 02031 if (N1.getOpcode() == ISD::UNDEF) 02032 return N1; 02033 02034 return SDValue(); 02035 } 02036 02037 SDValue DAGCombiner::visitUREM(SDNode *N) { 02038 SDValue N0 = N->getOperand(0); 02039 SDValue N1 = N->getOperand(1); 02040 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 02041 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 02042 EVT VT = N->getValueType(0); 02043 02044 // fold (urem c1, c2) -> c1%c2 02045 if (N0C && N1C && !N1C->isNullValue()) 02046 return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C); 02047 // fold (urem x, pow2) -> (and x, pow2-1) 02048 if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2()) 02049 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, 02050 DAG.getConstant(N1C->getAPIntValue()-1,VT)); 02051 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) 02052 if (N1.getOpcode() == ISD::SHL) { 02053 if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { 02054 if (SHC->getAPIntValue().isPowerOf2()) { 02055 SDValue Add = 02056 DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, 02057 DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), 02058 VT)); 02059 AddToWorkList(Add.getNode()); 02060 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, Add); 02061 } 02062 } 02063 } 02064 02065 // If X/C can be simplified by the division-by-constant logic, lower 02066 // X%C to the equivalent of X-X/C*C. 02067 if (N1C && !N1C->isNullValue()) { 02068 SDValue Div = DAG.getNode(ISD::UDIV, SDLoc(N), VT, N0, N1); 02069 AddToWorkList(Div.getNode()); 02070 SDValue OptimizedDiv = combine(Div.getNode()); 02071 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { 02072 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, 02073 OptimizedDiv, N1); 02074 SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul); 02075 AddToWorkList(Mul.getNode()); 02076 return Sub; 02077 } 02078 } 02079 02080 // undef % X -> 0 02081 if (N0.getOpcode() == ISD::UNDEF) 02082 return DAG.getConstant(0, VT); 02083 // X % undef -> undef 02084 if (N1.getOpcode() == ISD::UNDEF) 02085 return N1; 02086 02087 return SDValue(); 02088 } 02089 02090 SDValue DAGCombiner::visitMULHS(SDNode *N) { 02091 SDValue N0 = N->getOperand(0); 02092 SDValue N1 = N->getOperand(1); 02093 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 02094 EVT VT = N->getValueType(0); 02095 SDLoc DL(N); 02096 02097 // fold (mulhs x, 0) -> 0 02098 if (N1C && N1C->isNullValue()) 02099 return N1; 02100 // fold (mulhs x, 1) -> (sra x, size(x)-1) 02101 if (N1C && N1C->getAPIntValue() == 1) 02102 return DAG.getNode(ISD::SRA, SDLoc(N), N0.getValueType(), N0, 02103 DAG.getConstant(N0.getValueType().getSizeInBits() - 1, 02104 getShiftAmountTy(N0.getValueType()))); 02105 // fold (mulhs x, undef) -> 0 02106 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 02107 return DAG.getConstant(0, VT); 02108 02109 // If the type twice as wide is legal, transform the mulhs to a wider multiply 02110 // plus a shift. 02111 if (VT.isSimple() && !VT.isVector()) { 02112 MVT Simple = VT.getSimpleVT(); 02113 unsigned SimpleSize = Simple.getSizeInBits(); 02114 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 02115 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 02116 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0); 02117 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1); 02118 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); 02119 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, 02120 DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType()))); 02121 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); 02122 } 02123 } 02124 02125 return SDValue(); 02126 } 02127 02128 SDValue DAGCombiner::visitMULHU(SDNode *N) { 02129 SDValue N0 = N->getOperand(0); 02130 SDValue N1 = N->getOperand(1); 02131 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 02132 EVT VT = N->getValueType(0); 02133 SDLoc DL(N); 02134 02135 // fold (mulhu x, 0) -> 0 02136 if (N1C && N1C->isNullValue()) 02137 return N1; 02138 // fold (mulhu x, 1) -> 0 02139 if (N1C && N1C->getAPIntValue() == 1) 02140 return DAG.getConstant(0, N0.getValueType()); 02141 // fold (mulhu x, undef) -> 0 02142 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 02143 return DAG.getConstant(0, VT); 02144 02145 // If the type twice as wide is legal, transform the mulhu to a wider multiply 02146 // plus a shift. 02147 if (VT.isSimple() && !VT.isVector()) { 02148 MVT Simple = VT.getSimpleVT(); 02149 unsigned SimpleSize = Simple.getSizeInBits(); 02150 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 02151 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 02152 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0); 02153 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1); 02154 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); 02155 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, 02156 DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType()))); 02157 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); 02158 } 02159 } 02160 02161 return SDValue(); 02162 } 02163 02164 /// SimplifyNodeWithTwoResults - Perform optimizations common to nodes that 02165 /// compute two values. LoOp and HiOp give the opcodes for the two computations 02166 /// that are being performed. Return true if a simplification was made. 02167 /// 02168 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, 02169 unsigned HiOp) { 02170 // If the high half is not needed, just compute the low half. 02171 bool HiExists = N->hasAnyUseOfValue(1); 02172 if (!HiExists && 02173 (!LegalOperations || 02174 TLI.isOperationLegal(LoOp, N->getValueType(0)))) { 02175 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), 02176 N->op_begin(), N->getNumOperands()); 02177 return CombineTo(N, Res, Res); 02178 } 02179 02180 // If the low half is not needed, just compute the high half. 02181 bool LoExists = N->hasAnyUseOfValue(0); 02182 if (!LoExists && 02183 (!LegalOperations || 02184 TLI.isOperationLegal(HiOp, N->getValueType(1)))) { 02185 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), 02186 N->op_begin(), N->getNumOperands()); 02187 return CombineTo(N, Res, Res); 02188 } 02189 02190 // If both halves are used, return as it is. 02191 if (LoExists && HiExists) 02192 return SDValue(); 02193 02194 // If the two computed results can be simplified separately, separate them. 02195 if (LoExists) { 02196 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), 02197 N->op_begin(), N->getNumOperands()); 02198 AddToWorkList(Lo.getNode()); 02199 SDValue LoOpt = combine(Lo.getNode()); 02200 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() && 02201 (!LegalOperations || 02202 TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType()))) 02203 return CombineTo(N, LoOpt, LoOpt); 02204 } 02205 02206 if (HiExists) { 02207 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), 02208 N->op_begin(), N->getNumOperands()); 02209 AddToWorkList(Hi.getNode()); 02210 SDValue HiOpt = combine(Hi.getNode()); 02211 if (HiOpt.getNode() && HiOpt != Hi && 02212 (!LegalOperations || 02213 TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType()))) 02214 return CombineTo(N, HiOpt, HiOpt); 02215 } 02216 02217 return SDValue(); 02218 } 02219 02220 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { 02221 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS); 02222 if (Res.getNode()) return Res; 02223 02224 EVT VT = N->getValueType(0); 02225 SDLoc DL(N); 02226 02227 // If the type twice as wide is legal, transform the mulhu to a wider multiply 02228 // plus a shift. 02229 if (VT.isSimple() && !VT.isVector()) { 02230 MVT Simple = VT.getSimpleVT(); 02231 unsigned SimpleSize = Simple.getSizeInBits(); 02232 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 02233 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 02234 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0)); 02235 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1)); 02236 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); 02237 // Compute the high part as N1. 02238 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, 02239 DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType()))); 02240 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); 02241 // Compute the low part as N0. 02242 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); 02243 return CombineTo(N, Lo, Hi); 02244 } 02245 } 02246 02247 return SDValue(); 02248 } 02249 02250 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { 02251 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU); 02252 if (Res.getNode()) return Res; 02253 02254 EVT VT = N->getValueType(0); 02255 SDLoc DL(N); 02256 02257 // If the type twice as wide is legal, transform the mulhu to a wider multiply 02258 // plus a shift. 02259 if (VT.isSimple() && !VT.isVector()) { 02260 MVT Simple = VT.getSimpleVT(); 02261 unsigned SimpleSize = Simple.getSizeInBits(); 02262 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 02263 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 02264 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0)); 02265 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1)); 02266 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); 02267 // Compute the high part as N1. 02268 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, 02269 DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType()))); 02270 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); 02271 // Compute the low part as N0. 02272 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); 02273 return CombineTo(N, Lo, Hi); 02274 } 02275 } 02276 02277 return SDValue(); 02278 } 02279 02280 SDValue DAGCombiner::visitSMULO(SDNode *N) { 02281 // (smulo x, 2) -> (saddo x, x) 02282 if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) 02283 if (C2->getAPIntValue() == 2) 02284 return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(), 02285 N->getOperand(0), N->getOperand(0)); 02286 02287 return SDValue(); 02288 } 02289 02290 SDValue DAGCombiner::visitUMULO(SDNode *N) { 02291 // (umulo x, 2) -> (uaddo x, x) 02292 if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) 02293 if (C2->getAPIntValue() == 2) 02294 return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(), 02295 N->getOperand(0), N->getOperand(0)); 02296 02297 return SDValue(); 02298 } 02299 02300 SDValue DAGCombiner::visitSDIVREM(SDNode *N) { 02301 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM); 02302 if (Res.getNode()) return Res; 02303 02304 return SDValue(); 02305 } 02306 02307 SDValue DAGCombiner::visitUDIVREM(SDNode *N) { 02308 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM); 02309 if (Res.getNode()) return Res; 02310 02311 return SDValue(); 02312 } 02313 02314 /// SimplifyBinOpWithSameOpcodeHands - If this is a binary operator with 02315 /// two operands of the same opcode, try to simplify it. 02316 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { 02317 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 02318 EVT VT = N0.getValueType(); 02319 assert(N0.getOpcode() == N1.getOpcode() && "Bad input!"); 02320 02321 // Bail early if none of these transforms apply. 02322 if (N0.getNode()->getNumOperands() == 0) return SDValue(); 02323 02324 // For each of OP in AND/OR/XOR: 02325 // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) 02326 // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) 02327 // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) 02328 // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) 02329 // 02330 // do not sink logical op inside of a vector extend, since it may combine 02331 // into a vsetcc. 02332 EVT Op0VT = N0.getOperand(0).getValueType(); 02333 if ((N0.getOpcode() == ISD::ZERO_EXTEND || 02334 N0.getOpcode() == ISD::SIGN_EXTEND || 02335 // Avoid infinite looping with PromoteIntBinOp. 02336 (N0.getOpcode() == ISD::ANY_EXTEND && 02337 (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) || 02338 (N0.getOpcode() == ISD::TRUNCATE && 02339 (!TLI.isZExtFree(VT, Op0VT) || 02340 !TLI.isTruncateFree(Op0VT, VT)) && 02341 TLI.isTypeLegal(Op0VT))) && 02342 !VT.isVector() && 02343 Op0VT == N1.getOperand(0).getValueType() && 02344 (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) { 02345 SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), 02346 N0.getOperand(0).getValueType(), 02347 N0.getOperand(0), N1.getOperand(0)); 02348 AddToWorkList(ORNode.getNode()); 02349 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode); 02350 } 02351 02352 // For each of OP in SHL/SRL/SRA/AND... 02353 // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z) 02354 // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z) 02355 // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z) 02356 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL || 02357 N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) && 02358 N0.getOperand(1) == N1.getOperand(1)) { 02359 SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), 02360 N0.getOperand(0).getValueType(), 02361 N0.getOperand(0), N1.getOperand(0)); 02362 AddToWorkList(ORNode.getNode()); 02363 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, 02364 ORNode, N0.getOperand(1)); 02365 } 02366 02367 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) 02368 // Only perform this optimization after type legalization and before 02369 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by 02370 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and 02371 // we don't want to undo this promotion. 02372 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper 02373 // on scalars. 02374 if ((N0.getOpcode() == ISD::BITCAST || 02375 N0.getOpcode() == ISD::SCALAR_TO_VECTOR) && 02376 Level == AfterLegalizeTypes) { 02377 SDValue In0 = N0.getOperand(0); 02378 SDValue In1 = N1.getOperand(0); 02379 EVT In0Ty = In0.getValueType(); 02380 EVT In1Ty = In1.getValueType(); 02381 SDLoc DL(N); 02382 // If both incoming values are integers, and the original types are the 02383 // same. 02384 if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) { 02385 SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1); 02386 SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op); 02387 AddToWorkList(Op.getNode()); 02388 return BC; 02389 } 02390 } 02391 02392 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value). 02393 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B)) 02394 // If both shuffles use the same mask, and both shuffle within a single 02395 // vector, then it is worthwhile to move the swizzle after the operation. 02396 // The type-legalizer generates this pattern when loading illegal 02397 // vector types from memory. In many cases this allows additional shuffle 02398 // optimizations. 02399 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && 02400 N0.getOperand(1).getOpcode() == ISD::UNDEF && 02401 N1.getOperand(1).getOpcode() == ISD::UNDEF) { 02402 ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0); 02403 ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1); 02404 02405 assert(N0.getOperand(0).getValueType() == N1.getOperand(1).getValueType() && 02406 "Inputs to shuffles are not the same type"); 02407 02408 unsigned NumElts = VT.getVectorNumElements(); 02409 02410 // Check that both shuffles use the same mask. The masks are known to be of 02411 // the same length because the result vector type is the same. 02412 bool SameMask = true; 02413 for (unsigned i = 0; i != NumElts; ++i) { 02414 int Idx0 = SVN0->getMaskElt(i); 02415 int Idx1 = SVN1->getMaskElt(i); 02416 if (Idx0 != Idx1) { 02417 SameMask = false; 02418 break; 02419 } 02420 } 02421 02422 if (SameMask) { 02423 SDValue Op = DAG.getNode(N->getOpcode(), SDLoc(N), VT, 02424 N0.getOperand(0), N1.getOperand(0)); 02425 AddToWorkList(Op.getNode()); 02426 return DAG.getVectorShuffle(VT, SDLoc(N), Op, 02427 DAG.getUNDEF(VT), &SVN0->getMask()[0]); 02428 } 02429 } 02430 02431 return SDValue(); 02432 } 02433 02434 SDValue DAGCombiner::visitAND(SDNode *N) { 02435 SDValue N0 = N->getOperand(0); 02436 SDValue N1 = N->getOperand(1); 02437 SDValue LL, LR, RL, RR, CC0, CC1; 02438 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 02439 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 02440 EVT VT = N1.getValueType(); 02441 unsigned BitWidth = VT.getScalarType().getSizeInBits(); 02442 02443 // fold vector ops 02444 if (VT.isVector()) { 02445 SDValue FoldedVOp = SimplifyVBinOp(N); 02446 if (FoldedVOp.getNode()) return FoldedVOp; 02447 02448 // fold (and x, 0) -> 0, vector edition 02449 if (ISD::isBuildVectorAllZeros(N0.getNode())) 02450 return N0; 02451 if (ISD::isBuildVectorAllZeros(N1.getNode())) 02452 return N1; 02453 02454 // fold (and x, -1) -> x, vector edition 02455 if (ISD::isBuildVectorAllOnes(N0.getNode())) 02456 return N1; 02457 if (ISD::isBuildVectorAllOnes(N1.getNode())) 02458 return N0; 02459 } 02460 02461 // fold (and x, undef) -> 0 02462 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 02463 return DAG.getConstant(0, VT); 02464 // fold (and c1, c2) -> c1&c2 02465 if (N0C && N1C) 02466 return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C); 02467 // canonicalize constant to RHS 02468 if (N0C && !N1C) 02469 return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0); 02470 // fold (and x, -1) -> x 02471 if (N1C && N1C->isAllOnesValue()) 02472 return N0; 02473 // if (and x, c) is known to be zero, return 0 02474 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), 02475 APInt::getAllOnesValue(BitWidth))) 02476 return DAG.getConstant(0, VT); 02477 // reassociate and 02478 SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1); 02479 if (RAND.getNode() != 0) 02480 return RAND; 02481 // fold (and (or x, C), D) -> D if (C & D) == D 02482 if (N1C && N0.getOpcode() == ISD::OR) 02483 if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) 02484 if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue()) 02485 return N1; 02486 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits. 02487 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { 02488 SDValue N0Op0 = N0.getOperand(0); 02489 APInt Mask = ~N1C->getAPIntValue(); 02490 Mask = Mask.trunc(N0Op0.getValueSizeInBits()); 02491 if (DAG.MaskedValueIsZero(N0Op0, Mask)) { 02492 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), 02493 N0.getValueType(), N0Op0); 02494 02495 // Replace uses of the AND with uses of the Zero extend node. 02496 CombineTo(N, Zext); 02497 02498 // We actually want to replace all uses of the any_extend with the 02499 // zero_extend, to avoid duplicating things. This will later cause this 02500 // AND to be folded. 02501 CombineTo(N0.getNode(), Zext); 02502 return SDValue(N, 0); // Return N so it doesn't get rechecked! 02503 } 02504 } 02505 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> 02506 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must 02507 // already be zero by virtue of the width of the base type of the load. 02508 // 02509 // the 'X' node here can either be nothing or an extract_vector_elt to catch 02510 // more cases. 02511 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 02512 N0.getOperand(0).getOpcode() == ISD::LOAD) || 02513 N0.getOpcode() == ISD::LOAD) { 02514 LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ? 02515 N0 : N0.getOperand(0) ); 02516 02517 // Get the constant (if applicable) the zero'th operand is being ANDed with. 02518 // This can be a pure constant or a vector splat, in which case we treat the 02519 // vector as a scalar and use the splat value. 02520 APInt Constant = APInt::getNullValue(1); 02521 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { 02522 Constant = C->getAPIntValue(); 02523 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) { 02524 APInt SplatValue, SplatUndef; 02525 unsigned SplatBitSize; 02526 bool HasAnyUndefs; 02527 bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef, 02528 SplatBitSize, HasAnyUndefs); 02529 if (IsSplat) { 02530 // Undef bits can contribute to a possible optimisation if set, so 02531 // set them. 02532 SplatValue |= SplatUndef; 02533 02534 // The splat value may be something like "0x00FFFFFF", which means 0 for 02535 // the first vector value and FF for the rest, repeating. We need a mask 02536 // that will apply equally to all members of the vector, so AND all the 02537 // lanes of the constant together. 02538 EVT VT = Vector->getValueType(0); 02539 unsigned BitWidth = VT.getVectorElementType().getSizeInBits(); 02540 02541 // If the splat value has been compressed to a bitlength lower 02542 // than the size of the vector lane, we need to re-expand it to 02543 // the lane size. 02544 if (BitWidth > SplatBitSize) 02545 for (SplatValue = SplatValue.zextOrTrunc(BitWidth); 02546 SplatBitSize < BitWidth; 02547 SplatBitSize = SplatBitSize * 2) 02548 SplatValue |= SplatValue.shl(SplatBitSize); 02549 02550 Constant = APInt::getAllOnesValue(BitWidth); 02551 for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i) 02552 Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth); 02553 } 02554 } 02555 02556 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is 02557 // actually legal and isn't going to get expanded, else this is a false 02558 // optimisation. 02559 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD, 02560 Load->getMemoryVT()); 02561 02562 // Resize the constant to the same size as the original memory access before 02563 // extension. If it is still the AllOnesValue then this AND is completely 02564 // unneeded. 02565 Constant = 02566 Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits()); 02567 02568 bool B; 02569 switch (Load->getExtensionType()) { 02570 default: B = false; break; 02571 case ISD::EXTLOAD: B = CanZextLoadProfitably; break; 02572 case ISD::ZEXTLOAD: 02573 case ISD::NON_EXTLOAD: B = true; break; 02574 } 02575 02576 if (B && Constant.isAllOnesValue()) { 02577 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to 02578 // preserve semantics once we get rid of the AND. 02579 SDValue NewLoad(Load, 0); 02580 if (Load->getExtensionType() == ISD::EXTLOAD) { 02581 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD, 02582 Load->getValueType(0), SDLoc(Load), 02583 Load->getChain(), Load->getBasePtr(), 02584 Load->getOffset(), Load->getMemoryVT(), 02585 Load->getMemOperand()); 02586 // Replace uses of the EXTLOAD with the new ZEXTLOAD. 02587 if (Load->getNumValues() == 3) { 02588 // PRE/POST_INC loads have 3 values. 02589 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1), 02590 NewLoad.getValue(2) }; 02591 CombineTo(Load, To, 3, true); 02592 } else { 02593 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1)); 02594 } 02595 } 02596 02597 // Fold the AND away, taking care not to fold to the old load node if we 02598 // replaced it. 02599 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0); 02600 02601 return SDValue(N, 0); // Return N so it doesn't get rechecked! 02602 } 02603 } 02604 // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) 02605 if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ 02606 ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); 02607 ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); 02608 02609 if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && 02610 LL.getValueType().isInteger()) { 02611 // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0) 02612 if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) { 02613 SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), 02614 LR.getValueType(), LL, RL); 02615 AddToWorkList(ORNode.getNode()); 02616 return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); 02617 } 02618 // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) 02619 if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) { 02620 SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0), 02621 LR.getValueType(), LL, RL); 02622 AddToWorkList(ANDNode.getNode()); 02623 return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1); 02624 } 02625 // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) 02626 if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) { 02627 SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), 02628 LR.getValueType(), LL, RL); 02629 AddToWorkList(ORNode.getNode()); 02630 return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); 02631 } 02632 } 02633 // canonicalize equivalent to ll == rl 02634 if (LL == RR && LR == RL) { 02635 Op1 = ISD::getSetCCSwappedOperands(Op1); 02636 std::swap(RL, RR); 02637 } 02638 if (LL == RL && LR == RR) { 02639 bool isInteger = LL.getValueType().isInteger(); 02640 ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger); 02641 if (Result != ISD::SETCC_INVALID && 02642 (!LegalOperations || 02643 (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && 02644 TLI.isOperationLegal(ISD::SETCC, 02645 getSetCCResultType(N0.getSimpleValueType()))))) 02646 return DAG.getSetCC(SDLoc(N), N0.getValueType(), 02647 LL, LR, Result); 02648 } 02649 } 02650 02651 // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) 02652 if (N0.getOpcode() == N1.getOpcode()) { 02653 SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); 02654 if (Tmp.getNode()) return Tmp; 02655 } 02656 02657 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) 02658 // fold (and (sra)) -> (and (srl)) when possible. 02659 if (!VT.isVector() && 02660 SimplifyDemandedBits(SDValue(N, 0))) 02661 return SDValue(N, 0); 02662 02663 // fold (zext_inreg (extload x)) -> (zextload x) 02664 if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { 02665 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 02666 EVT MemVT = LN0->getMemoryVT(); 02667 // If we zero all the possible extended bits, then we can turn this into 02668 // a zextload if we are running before legalize or the operation is legal. 02669 unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); 02670 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, 02671 BitWidth - MemVT.getScalarType().getSizeInBits())) && 02672 ((!LegalOperations && !LN0->isVolatile()) || 02673 TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { 02674 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, 02675 LN0->getChain(), LN0->getBasePtr(), 02676 LN0->getPointerInfo(), MemVT, 02677 LN0->isVolatile(), LN0->isNonTemporal(), 02678 LN0->getAlignment()); 02679 AddToWorkList(N); 02680 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 02681 return SDValue(N, 0); // Return N so it doesn't get rechecked! 02682 } 02683 } 02684 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use 02685 if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 02686 N0.hasOneUse()) { 02687 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 02688 EVT MemVT = LN0->getMemoryVT(); 02689 // If we zero all the possible extended bits, then we can turn this into 02690 // a zextload if we are running before legalize or the operation is legal. 02691 unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); 02692 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, 02693 BitWidth - MemVT.getScalarType().getSizeInBits())) && 02694 ((!LegalOperations && !LN0->isVolatile()) || 02695 TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { 02696 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, 02697 LN0->getChain(), 02698 LN0->getBasePtr(), LN0->getPointerInfo(), 02699 MemVT, 02700 LN0->isVolatile(), LN0->isNonTemporal(), 02701 LN0->getAlignment()); 02702 AddToWorkList(N); 02703 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 02704 return SDValue(N, 0); // Return N so it doesn't get rechecked! 02705 } 02706 } 02707 02708 // fold (and (load x), 255) -> (zextload x, i8) 02709 // fold (and (extload x, i16), 255) -> (zextload x, i8) 02710 // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8) 02711 if (N1C && (N0.getOpcode() == ISD::LOAD || 02712 (N0.getOpcode() == ISD::ANY_EXTEND && 02713 N0.getOperand(0).getOpcode() == ISD::LOAD))) { 02714 bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND; 02715 LoadSDNode *LN0 = HasAnyExt 02716 ? cast<LoadSDNode>(N0.getOperand(0)) 02717 : cast<LoadSDNode>(N0); 02718 if (LN0->getExtensionType() != ISD::SEXTLOAD && 02719 LN0->isUnindexed() && N0.hasOneUse() && LN0->hasOneUse()) { 02720 uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits(); 02721 if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){ 02722 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); 02723 EVT LoadedVT = LN0->getMemoryVT(); 02724 02725 if (ExtVT == LoadedVT && 02726 (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) { 02727 EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; 02728 02729 SDValue NewLoad = 02730 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, 02731 LN0->getChain(), LN0->getBasePtr(), 02732 LN0->getPointerInfo(), 02733 ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), 02734 LN0->getAlignment()); 02735 AddToWorkList(N); 02736 CombineTo(LN0, NewLoad, NewLoad.getValue(1)); 02737 return SDValue(N, 0); // Return N so it doesn't get rechecked! 02738 } 02739 02740 // Do not change the width of a volatile load. 02741 // Do not generate loads of non-round integer types since these can 02742 // be expensive (and would be wrong if the type is not byte sized). 02743 if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() && 02744 (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) { 02745 EVT PtrType = LN0->getOperand(1).getValueType(); 02746 02747 unsigned Alignment = LN0->getAlignment(); 02748 SDValue NewPtr = LN0->getBasePtr(); 02749 02750 // For big endian targets, we need to add an offset to the pointer 02751 // to load the correct bytes. For little endian systems, we merely 02752 // need to read fewer bytes from the same pointer. 02753 if (TLI.isBigEndian()) { 02754 unsigned LVTStoreBytes = LoadedVT.getStoreSize(); 02755 unsigned EVTStoreBytes = ExtVT.getStoreSize(); 02756 unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; 02757 NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0), PtrType, 02758 NewPtr, DAG.getConstant(PtrOff, PtrType)); 02759 Alignment = MinAlign(Alignment, PtrOff); 02760 } 02761 02762 AddToWorkList(NewPtr.getNode()); 02763 02764 EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; 02765 SDValue Load = 02766 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, 02767 LN0->getChain(), NewPtr, 02768 LN0->getPointerInfo(), 02769 ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), 02770 Alignment); 02771 AddToWorkList(N); 02772 CombineTo(LN0, Load, Load.getValue(1)); 02773 return SDValue(N, 0); // Return N so it doesn't get rechecked! 02774 } 02775 } 02776 } 02777 } 02778 02779 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL && 02780 VT.getSizeInBits() <= 64) { 02781 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 02782 APInt ADDC = ADDI->getAPIntValue(); 02783 if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) { 02784 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal 02785 // immediate for an add, but it is legal if its top c2 bits are set, 02786 // transform the ADD so the immediate doesn't need to be materialized 02787 // in a register. 02788 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) { 02789 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 02790 SRLI->getZExtValue()); 02791 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) { 02792 ADDC |= Mask; 02793 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) { 02794 SDValue NewAdd = 02795 DAG.getNode(ISD::ADD, SDLoc(N0), VT, 02796 N0.getOperand(0), DAG.getConstant(ADDC, VT)); 02797 CombineTo(N0.getNode(), NewAdd); 02798 return SDValue(N, 0); // Return N so it doesn't get rechecked! 02799 } 02800 } 02801 } 02802 } 02803 } 02804 } 02805 02806 return SDValue(); 02807 } 02808 02809 /// MatchBSwapHWord - Match (a >> 8) | (a << 8) as (bswap a) >> 16 02810 /// 02811 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, 02812 bool DemandHighBits) { 02813 if (!LegalOperations) 02814 return SDValue(); 02815 02816 EVT VT = N->getValueType(0); 02817 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16) 02818 return SDValue(); 02819 if (!TLI.isOperationLegal(ISD::BSWAP, VT)) 02820 return SDValue(); 02821 02822 // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00) 02823 bool LookPassAnd0 = false; 02824 bool LookPassAnd1 = false; 02825 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL) 02826 std::swap(N0, N1); 02827 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL) 02828 std::swap(N0, N1); 02829 if (N0.getOpcode() == ISD::AND) { 02830 if (!N0.getNode()->hasOneUse()) 02831 return SDValue(); 02832 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 02833 if (!N01C || N01C->getZExtValue() != 0xFF00) 02834 return SDValue(); 02835 N0 = N0.getOperand(0); 02836 LookPassAnd0 = true; 02837 } 02838 02839 if (N1.getOpcode() == ISD::AND) { 02840 if (!N1.getNode()->hasOneUse()) 02841 return SDValue(); 02842 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 02843 if (!N11C || N11C->getZExtValue() != 0xFF) 02844 return SDValue(); 02845 N1 = N1.getOperand(0); 02846 LookPassAnd1 = true; 02847 } 02848 02849 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL) 02850 std::swap(N0, N1); 02851 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL) 02852 return SDValue(); 02853 if (!N0.getNode()->hasOneUse() || 02854 !N1.getNode()->hasOneUse()) 02855 return SDValue(); 02856 02857 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 02858 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 02859 if (!N01C || !N11C) 02860 return SDValue(); 02861 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8) 02862 return SDValue(); 02863 02864 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8) 02865 SDValue N00 = N0->getOperand(0); 02866 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) { 02867 if (!N00.getNode()->hasOneUse()) 02868 return SDValue(); 02869 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1)); 02870 if (!N001C || N001C->getZExtValue() != 0xFF) 02871 return SDValue(); 02872 N00 = N00.getOperand(0); 02873 LookPassAnd0 = true; 02874 } 02875 02876 SDValue N10 = N1->getOperand(0); 02877 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) { 02878 if (!N10.getNode()->hasOneUse()) 02879 return SDValue(); 02880 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1)); 02881 if (!N101C || N101C->getZExtValue() != 0xFF00) 02882 return SDValue(); 02883 N10 = N10.getOperand(0); 02884 LookPassAnd1 = true; 02885 } 02886 02887 if (N00 != N10) 02888 return SDValue(); 02889 02890 // Make sure everything beyond the low halfword is zero since the SRL 16 02891 // will clear the top bits. 02892 unsigned OpSizeInBits = VT.getSizeInBits(); 02893 if (DemandHighBits && OpSizeInBits > 16 && 02894 (!LookPassAnd0 || !LookPassAnd1) && 02895 !DAG.MaskedValueIsZero(N10, APInt::getHighBitsSet(OpSizeInBits, 16))) 02896 return SDValue(); 02897 02898 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00); 02899 if (OpSizeInBits > 16) 02900 Res = DAG.getNode(ISD::SRL, SDLoc(N), VT, Res, 02901 DAG.getConstant(OpSizeInBits-16, getShiftAmountTy(VT))); 02902 return Res; 02903 } 02904 02905 /// isBSwapHWordElement - Return true if the specified node is an element 02906 /// that makes up a 32-bit packed halfword byteswap. i.e. 02907 /// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8) 02908 static bool isBSwapHWordElement(SDValue N, SmallVector<SDNode*,4> &Parts) { 02909 if (!N.getNode()->hasOneUse()) 02910 return false; 02911 02912 unsigned Opc = N.getOpcode(); 02913 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL) 02914 return false; 02915 02916 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 02917 if (!N1C) 02918 return false; 02919 02920 unsigned Num; 02921 switch (N1C->getZExtValue()) { 02922 default: 02923 return false; 02924 case 0xFF: Num = 0; break; 02925 case 0xFF00: Num = 1; break; 02926 case 0xFF0000: Num = 2; break; 02927 case 0xFF000000: Num = 3; break; 02928 } 02929 02930 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00). 02931 SDValue N0 = N.getOperand(0); 02932 if (Opc == ISD::AND) { 02933 if (Num == 0 || Num == 2) { 02934 // (x >> 8) & 0xff 02935 // (x >> 8) & 0xff0000 02936 if (N0.getOpcode() != ISD::SRL) 02937 return false; 02938 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 02939 if (!C || C->getZExtValue() != 8) 02940 return false; 02941 } else { 02942 // (x << 8) & 0xff00 02943 // (x << 8) & 0xff000000 02944 if (N0.getOpcode() != ISD::SHL) 02945 return false; 02946 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 02947 if (!C || C->getZExtValue() != 8) 02948 return false; 02949 } 02950 } else if (Opc == ISD::SHL) { 02951 // (x & 0xff) << 8 02952 // (x & 0xff0000) << 8 02953 if (Num != 0 && Num != 2) 02954 return false; 02955 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 02956 if (!C || C->getZExtValue() != 8) 02957 return false; 02958 } else { // Opc == ISD::SRL 02959 // (x & 0xff00) >> 8 02960 // (x & 0xff000000) >> 8 02961 if (Num != 1 && Num != 3) 02962 return false; 02963 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 02964 if (!C || C->getZExtValue() != 8) 02965 return false; 02966 } 02967 02968 if (Parts[Num]) 02969 return false; 02970 02971 Parts[Num] = N0.getOperand(0).getNode(); 02972 return true; 02973 } 02974 02975 /// MatchBSwapHWord - Match a 32-bit packed halfword bswap. That is 02976 /// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8) 02977 /// => (rotl (bswap x), 16) 02978 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { 02979 if (!LegalOperations) 02980 return SDValue(); 02981 02982 EVT VT = N->getValueType(0); 02983 if (VT != MVT::i32) 02984 return SDValue(); 02985 if (!TLI.isOperationLegal(ISD::BSWAP, VT)) 02986 return SDValue(); 02987 02988 SmallVector<SDNode*,4> Parts(4, (SDNode*)0); 02989 // Look for either 02990 // (or (or (and), (and)), (or (and), (and))) 02991 // (or (or (or (and), (and)), (and)), (and)) 02992 if (N0.getOpcode() != ISD::OR) 02993 return SDValue(); 02994 SDValue N00 = N0.getOperand(0); 02995 SDValue N01 = N0.getOperand(1); 02996 02997 if (N1.getOpcode() == ISD::OR && 02998 N00.getNumOperands() == 2 && N01.getNumOperands() == 2) { 02999 // (or (or (and), (and)), (or (and), (and))) 03000 SDValue N000 = N00.getOperand(0); 03001 if (!isBSwapHWordElement(N000, Parts)) 03002 return SDValue(); 03003 03004 SDValue N001 = N00.getOperand(1); 03005 if (!isBSwapHWordElement(N001, Parts)) 03006 return SDValue(); 03007 SDValue N010 = N01.getOperand(0); 03008 if (!isBSwapHWordElement(N010, Parts)) 03009 return SDValue(); 03010 SDValue N011 = N01.getOperand(1); 03011 if (!isBSwapHWordElement(N011, Parts)) 03012 return SDValue(); 03013 } else { 03014 // (or (or (or (and), (and)), (and)), (and)) 03015 if (!isBSwapHWordElement(N1, Parts)) 03016 return SDValue(); 03017 if (!isBSwapHWordElement(N01, Parts)) 03018 return SDValue(); 03019 if (N00.getOpcode() != ISD::OR) 03020 return SDValue(); 03021 SDValue N000 = N00.getOperand(0); 03022 if (!isBSwapHWordElement(N000, Parts)) 03023 return SDValue(); 03024 SDValue N001 = N00.getOperand(1); 03025 if (!isBSwapHWordElement(N001, Parts)) 03026 return SDValue(); 03027 } 03028 03029 // Make sure the parts are all coming from the same node. 03030 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) 03031 return SDValue(); 03032 03033 SDValue BSwap = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, 03034 SDValue(Parts[0],0)); 03035 03036 // Result of the bswap should be rotated by 16. If it's not legal, than 03037 // do (x << 16) | (x >> 16). 03038 SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT)); 03039 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) 03040 return DAG.getNode(ISD::ROTL, SDLoc(N), VT, BSwap, ShAmt); 03041 if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) 03042 return DAG.getNode(ISD::ROTR, SDLoc(N), VT, BSwap, ShAmt); 03043 return DAG.getNode(ISD::OR, SDLoc(N), VT, 03044 DAG.getNode(ISD::SHL, SDLoc(N), VT, BSwap, ShAmt), 03045 DAG.getNode(ISD::SRL, SDLoc(N), VT, BSwap, ShAmt)); 03046 } 03047 03048 SDValue DAGCombiner::visitOR(SDNode *N) { 03049 SDValue N0 = N->getOperand(0); 03050 SDValue N1 = N->getOperand(1); 03051 SDValue LL, LR, RL, RR, CC0, CC1; 03052 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 03053 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 03054 EVT VT = N1.getValueType(); 03055 03056 // fold vector ops 03057 if (VT.isVector()) { 03058 SDValue FoldedVOp = SimplifyVBinOp(N); 03059 if (FoldedVOp.getNode()) return FoldedVOp; 03060 03061 // fold (or x, 0) -> x, vector edition 03062 if (ISD::isBuildVectorAllZeros(N0.getNode())) 03063 return N1; 03064 if (ISD::isBuildVectorAllZeros(N1.getNode())) 03065 return N0; 03066 03067 // fold (or x, -1) -> -1, vector edition 03068 if (ISD::isBuildVectorAllOnes(N0.getNode())) 03069 return N0; 03070 if (ISD::isBuildVectorAllOnes(N1.getNode())) 03071 return N1; 03072 } 03073 03074 // fold (or x, undef) -> -1 03075 if (!LegalOperations && 03076 (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) { 03077 EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; 03078 return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); 03079 } 03080 // fold (or c1, c2) -> c1|c2 03081 if (N0C && N1C) 03082 return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C); 03083 // canonicalize constant to RHS 03084 if (N0C && !N1C) 03085 return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0); 03086 // fold (or x, 0) -> x 03087 if (N1C && N1C->isNullValue()) 03088 return N0; 03089 // fold (or x, -1) -> -1 03090 if (N1C && N1C->isAllOnesValue()) 03091 return N1; 03092 // fold (or x, c) -> c iff (x & ~c) == 0 03093 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())) 03094 return N1; 03095 03096 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16) 03097 SDValue BSwap = MatchBSwapHWord(N, N0, N1); 03098 if (BSwap.getNode() != 0) 03099 return BSwap; 03100 BSwap = MatchBSwapHWordLow(N, N0, N1); 03101 if (BSwap.getNode() != 0) 03102 return BSwap; 03103 03104 // reassociate or 03105 SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1); 03106 if (ROR.getNode() != 0) 03107 return ROR; 03108 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) 03109 // iff (c1 & c2) == 0. 03110 if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && 03111 isa<ConstantSDNode>(N0.getOperand(1))) { 03112 ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1)); 03113 if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) 03114 return DAG.getNode(ISD::AND, SDLoc(N), VT, 03115 DAG.getNode(ISD::OR, SDLoc(N0), VT, 03116 N0.getOperand(0), N1), 03117 DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1)); 03118 } 03119 // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) 03120 if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ 03121 ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); 03122 ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); 03123 03124 if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && 03125 LL.getValueType().isInteger()) { 03126 // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0) 03127 // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0) 03128 if (cast<ConstantSDNode>(LR)->isNullValue() && 03129 (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { 03130 SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR), 03131 LR.getValueType(), LL, RL); 03132 AddToWorkList(ORNode.getNode()); 03133 return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); 03134 } 03135 // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) 03136 // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1) 03137 if (cast<ConstantSDNode>(LR)->isAllOnesValue() && 03138 (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { 03139 SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR), 03140 LR.getValueType(), LL, RL); 03141 AddToWorkList(ANDNode.getNode()); 03142 return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1); 03143 } 03144 } 03145 // canonicalize equivalent to ll == rl 03146 if (LL == RR && LR == RL) { 03147 Op1 = ISD::getSetCCSwappedOperands(Op1); 03148 std::swap(RL, RR); 03149 } 03150 if (LL == RL && LR == RR) { 03151 bool isInteger = LL.getValueType().isInteger(); 03152 ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger); 03153 if (Result != ISD::SETCC_INVALID && 03154 (!LegalOperations || 03155 (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && 03156 TLI.isOperationLegal(ISD::SETCC, 03157 getSetCCResultType(N0.getValueType()))))) 03158 return DAG.getSetCC(SDLoc(N), N0.getValueType(), 03159 LL, LR, Result); 03160 } 03161 } 03162 03163 // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) 03164 if (N0.getOpcode() == N1.getOpcode()) { 03165 SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); 03166 if (Tmp.getNode()) return Tmp; 03167 } 03168 03169 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible. 03170 if (N0.getOpcode() == ISD::AND && 03171 N1.getOpcode() == ISD::AND && 03172 N0.getOperand(1).getOpcode() == ISD::Constant && 03173 N1.getOperand(1).getOpcode() == ISD::Constant && 03174 // Don't increase # computations. 03175 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { 03176 // We can only do this xform if we know that bits from X that are set in C2 03177 // but not in C1 are already zero. Likewise for Y. 03178 const APInt &LHSMask = 03179 cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 03180 const APInt &RHSMask = 03181 cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue(); 03182 03183 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && 03184 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { 03185 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, 03186 N0.getOperand(0), N1.getOperand(0)); 03187 return DAG.getNode(ISD::AND, SDLoc(N), VT, X, 03188 DAG.getConstant(LHSMask | RHSMask, VT)); 03189 } 03190 } 03191 03192 // See if this is some rotate idiom. 03193 if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N))) 03194 return SDValue(Rot, 0); 03195 03196 // Simplify the operands using demanded-bits information. 03197 if (!VT.isVector() && 03198 SimplifyDemandedBits(SDValue(N, 0))) 03199 return SDValue(N, 0); 03200 03201 return SDValue(); 03202 } 03203 03204 /// MatchRotateHalf - Match "(X shl/srl V1) & V2" where V2 may not be present. 03205 static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { 03206 if (Op.getOpcode() == ISD::AND) { 03207 if (isa<ConstantSDNode>(Op.getOperand(1))) { 03208 Mask = Op.getOperand(1); 03209 Op = Op.getOperand(0); 03210 } else { 03211 return false; 03212 } 03213 } 03214 03215 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) { 03216 Shift = Op; 03217 return true; 03218 } 03219 03220 return false; 03221 } 03222 03223 // MatchRotate - Handle an 'or' of two operands. If this is one of the many 03224 // idioms for rotate, and if the target supports rotation instructions, generate 03225 // a rot[lr]. 03226 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { 03227 // Must be a legal type. Expanded 'n promoted things won't work with rotates. 03228 EVT VT = LHS.getValueType(); 03229 if (!TLI.isTypeLegal(VT)) return 0; 03230 03231 // The target must have at least one rotate flavor. 03232 bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT); 03233 bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT); 03234 if (!HasROTL && !HasROTR) return 0; 03235 03236 // Match "(X shl/srl V1) & V2" where V2 may not be present. 03237 SDValue LHSShift; // The shift. 03238 SDValue LHSMask; // AND value if any. 03239 if (!MatchRotateHalf(LHS, LHSShift, LHSMask)) 03240 return 0; // Not part of a rotate. 03241 03242 SDValue RHSShift; // The shift. 03243 SDValue RHSMask; // AND value if any. 03244 if (!MatchRotateHalf(RHS, RHSShift, RHSMask)) 03245 return 0; // Not part of a rotate. 03246 03247 if (LHSShift.getOperand(0) != RHSShift.getOperand(0)) 03248 return 0; // Not shifting the same value. 03249 03250 if (LHSShift.getOpcode() == RHSShift.getOpcode()) 03251 return 0; // Shifts must disagree. 03252 03253 // Canonicalize shl to left side in a shl/srl pair. 03254 if (RHSShift.getOpcode() == ISD::SHL) { 03255 std::swap(LHS, RHS); 03256 std::swap(LHSShift, RHSShift); 03257 std::swap(LHSMask , RHSMask ); 03258 } 03259 03260 unsigned OpSizeInBits = VT.getSizeInBits(); 03261 SDValue LHSShiftArg = LHSShift.getOperand(0); 03262 SDValue LHSShiftAmt = LHSShift.getOperand(1); 03263 SDValue RHSShiftAmt = RHSShift.getOperand(1); 03264 03265 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) 03266 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) 03267 if (LHSShiftAmt.getOpcode() == ISD::Constant && 03268 RHSShiftAmt.getOpcode() == ISD::Constant) { 03269 uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue(); 03270 uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue(); 03271 if ((LShVal + RShVal) != OpSizeInBits) 03272 return 0; 03273 03274 SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, 03275 LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt); 03276 03277 // If there is an AND of either shifted operand, apply it to the result. 03278 if (LHSMask.getNode() || RHSMask.getNode()) { 03279 APInt Mask = APInt::getAllOnesValue(OpSizeInBits); 03280 03281 if (LHSMask.getNode()) { 03282 APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal); 03283 Mask &= cast<ConstantSDNode>(LHSMask)->getAPIntValue() | RHSBits; 03284 } 03285 if (RHSMask.getNode()) { 03286 APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal); 03287 Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits; 03288 } 03289 03290 Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, VT)); 03291 } 03292 03293 return Rot.getNode(); 03294 } 03295 03296 // If there is a mask here, and we have a variable shift, we can't be sure 03297 // that we're masking out the right stuff. 03298 if (LHSMask.getNode() || RHSMask.getNode()) 03299 return 0; 03300 03301 // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotl x, y) 03302 // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotr x, (sub 32, y)) 03303 if (RHSShiftAmt.getOpcode() == ISD::SUB && 03304 LHSShiftAmt == RHSShiftAmt.getOperand(1)) { 03305 if (ConstantSDNode *SUBC = 03306 dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) { 03307 if (SUBC->getAPIntValue() == OpSizeInBits) { 03308 return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, 03309 HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); 03310 } 03311 } 03312 } 03313 03314 // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotr x, y) 03315 // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotl x, (sub 32, y)) 03316 if (LHSShiftAmt.getOpcode() == ISD::SUB && 03317 RHSShiftAmt == LHSShiftAmt.getOperand(1)) { 03318 if (ConstantSDNode *SUBC = 03319 dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0))) { 03320 if (SUBC->getAPIntValue() == OpSizeInBits) { 03321 return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg, 03322 HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); 03323 } 03324 } 03325 } 03326 03327 // Look for sign/zext/any-extended or truncate cases: 03328 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || 03329 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || 03330 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || 03331 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) && 03332 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || 03333 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || 03334 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || 03335 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { 03336 SDValue LExtOp0 = LHSShiftAmt.getOperand(0); 03337 SDValue RExtOp0 = RHSShiftAmt.getOperand(0); 03338 if (RExtOp0.getOpcode() == ISD::SUB && 03339 RExtOp0.getOperand(1) == LExtOp0) { 03340 // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> 03341 // (rotl x, y) 03342 // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> 03343 // (rotr x, (sub 32, y)) 03344 if (ConstantSDNode *SUBC = 03345 dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0))) { 03346 if (SUBC->getAPIntValue() == OpSizeInBits) { 03347 return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, 03348 LHSShiftArg, 03349 HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); 03350 } 03351 } 03352 } else if (LExtOp0.getOpcode() == ISD::SUB && 03353 RExtOp0 == LExtOp0.getOperand(1)) { 03354 // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> 03355 // (rotr x, y) 03356 // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> 03357 // (rotl x, (sub 32, y)) 03358 if (ConstantSDNode *SUBC = 03359 dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0))) { 03360 if (SUBC->getAPIntValue() == OpSizeInBits) { 03361 return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, 03362 LHSShiftArg, 03363 HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); 03364 } 03365 } 03366 } 03367 } 03368 03369 return 0; 03370 } 03371 03372 SDValue DAGCombiner::visitXOR(SDNode *N) { 03373 SDValue N0 = N->getOperand(0); 03374 SDValue N1 = N->getOperand(1); 03375 SDValue LHS, RHS, CC; 03376 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 03377 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 03378 EVT VT = N0.getValueType(); 03379 03380 // fold vector ops 03381 if (VT.isVector()) { 03382 SDValue FoldedVOp = SimplifyVBinOp(N); 03383 if (FoldedVOp.getNode()) return FoldedVOp; 03384 03385 // fold (xor x, 0) -> x, vector edition 03386 if (ISD::isBuildVectorAllZeros(N0.getNode())) 03387 return N1; 03388 if (ISD::isBuildVectorAllZeros(N1.getNode())) 03389 return N0; 03390 } 03391 03392 // fold (xor undef, undef) -> 0. This is a common idiom (misuse). 03393 if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) 03394 return DAG.getConstant(0, VT); 03395 // fold (xor x, undef) -> undef 03396 if (N0.getOpcode() == ISD::UNDEF) 03397 return N0; 03398 if (N1.getOpcode() == ISD::UNDEF) 03399 return N1; 03400 // fold (xor c1, c2) -> c1^c2 03401 if (N0C && N1C) 03402 return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C); 03403 // canonicalize constant to RHS 03404 if (N0C && !N1C) 03405 return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); 03406 // fold (xor x, 0) -> x 03407 if (N1C && N1C->isNullValue()) 03408 return N0; 03409 // reassociate xor 03410 SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1); 03411 if (RXOR.getNode() != 0) 03412 return RXOR; 03413 03414 // fold !(x cc y) -> (x !cc y) 03415 if (N1C && N1C->getAPIntValue() == 1 && isSetCCEquivalent(N0, LHS, RHS, CC)) { 03416 bool isInt = LHS.getValueType().isInteger(); 03417 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), 03418 isInt); 03419 03420 if (!LegalOperations || 03421 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) { 03422 switch (N0.getOpcode()) { 03423 default: 03424 llvm_unreachable("Unhandled SetCC Equivalent!"); 03425 case ISD::SETCC: 03426 return DAG.getSetCC(SDLoc(N), VT, LHS, RHS, NotCC); 03427 case ISD::SELECT_CC: 03428 return DAG.getSelectCC(SDLoc(N), LHS, RHS, N0.getOperand(2), 03429 N0.getOperand(3), NotCC); 03430 } 03431 } 03432 } 03433 03434 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y))) 03435 if (N1C && N1C->getAPIntValue() == 1 && N0.getOpcode() == ISD::ZERO_EXTEND && 03436 N0.getNode()->hasOneUse() && 03437 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){ 03438 SDValue V = N0.getOperand(0); 03439 V = DAG.getNode(ISD::XOR, SDLoc(N0), V.getValueType(), V, 03440 DAG.getConstant(1, V.getValueType())); 03441 AddToWorkList(V.getNode()); 03442 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V); 03443 } 03444 03445 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc 03446 if (N1C && N1C->getAPIntValue() == 1 && VT == MVT::i1 && 03447 (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { 03448 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 03449 if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) { 03450 unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; 03451 LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS 03452 RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS 03453 AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode()); 03454 return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); 03455 } 03456 } 03457 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants 03458 if (N1C && N1C->isAllOnesValue() && 03459 (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { 03460 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 03461 if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) { 03462 unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; 03463 LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS 03464 RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS 03465 AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode()); 03466 return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); 03467 } 03468 } 03469 // fold (xor (and x, y), y) -> (and (not x), y) 03470 if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && 03471 N0->getOperand(1) == N1) { 03472 SDValue X = N0->getOperand(0); 03473 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT); 03474 AddToWorkList(NotX.getNode()); 03475 return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1); 03476 } 03477 // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2)) 03478 if (N1C && N0.getOpcode() == ISD::XOR) { 03479 ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0)); 03480 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 03481 if (N00C) 03482 return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(1), 03483 DAG.getConstant(N1C->getAPIntValue() ^ 03484 N00C->getAPIntValue(), VT)); 03485 if (N01C) 03486 return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(0), 03487 DAG.getConstant(N1C->getAPIntValue() ^ 03488 N01C->getAPIntValue(), VT)); 03489 } 03490 // fold (xor x, x) -> 0 03491 if (N0 == N1) 03492 return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations); 03493 03494 // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) 03495 if (N0.getOpcode() == N1.getOpcode()) { 03496 SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); 03497 if (Tmp.getNode()) return Tmp; 03498 } 03499 03500 // Simplify the expression using non-local knowledge. 03501 if (!VT.isVector() && 03502 SimplifyDemandedBits(SDValue(N, 0))) 03503 return SDValue(N, 0); 03504 03505 return SDValue(); 03506 } 03507 03508 /// visitShiftByConstant - Handle transforms common to the three shifts, when 03509 /// the shift amount is a constant. 03510 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) { 03511 SDNode *LHS = N->getOperand(0).getNode(); 03512 if (!LHS->hasOneUse()) return SDValue(); 03513 03514 // We want to pull some binops through shifts, so that we have (and (shift)) 03515 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of 03516 // thing happens with address calculations, so it's important to canonicalize 03517 // it. 03518 bool HighBitSet = false; // Can we transform this if the high bit is set? 03519 03520 switch (LHS->getOpcode()) { 03521 default: return SDValue(); 03522 case ISD::OR: 03523 case ISD::XOR: 03524 HighBitSet = false; // We can only transform sra if the high bit is clear. 03525 break; 03526 case ISD::AND: 03527 HighBitSet = true; // We can only transform sra if the high bit is set. 03528 break; 03529 case ISD::ADD: 03530 if (N->getOpcode() != ISD::SHL) 03531 return SDValue(); // only shl(add) not sr[al](add). 03532 HighBitSet = false; // We can only transform sra if the high bit is clear. 03533 break; 03534 } 03535 03536 // We require the RHS of the binop to be a constant as well. 03537 ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1)); 03538 if (!BinOpCst) return SDValue(); 03539 03540 // FIXME: disable this unless the input to the binop is a shift by a constant. 03541 // If it is not a shift, it pessimizes some common cases like: 03542 // 03543 // void foo(int *X, int i) { X[i & 1235] = 1; } 03544 // int bar(int *X, int i) { return X[i & 255]; } 03545 SDNode *BinOpLHSVal = LHS->getOperand(0).getNode(); 03546 if ((BinOpLHSVal->getOpcode() != ISD::SHL && 03547 BinOpLHSVal->getOpcode() != ISD::SRA && 03548 BinOpLHSVal->getOpcode() != ISD::SRL) || 03549 !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) 03550 return SDValue(); 03551 03552 EVT VT = N->getValueType(0); 03553 03554 // If this is a signed shift right, and the high bit is modified by the 03555 // logical operation, do not perform the transformation. The highBitSet 03556 // boolean indicates the value of the high bit of the constant which would 03557 // cause it to be modified for this operation. 03558 if (N->getOpcode() == ISD::SRA) { 03559 bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative(); 03560 if (BinOpRHSSignSet != HighBitSet) 03561 return SDValue(); 03562 } 03563 03564 // Fold the constants, shifting the binop RHS by the shift amount. 03565 SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)), 03566 N->getValueType(0), 03567 LHS->getOperand(1), N->getOperand(1)); 03568 03569 // Create the new shift. 03570 SDValue NewShift = DAG.getNode(N->getOpcode(), 03571 SDLoc(LHS->getOperand(0)), 03572 VT, LHS->getOperand(0), N->getOperand(1)); 03573 03574 // Create the new binop. 03575 return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS); 03576 } 03577 03578 SDValue DAGCombiner::visitSHL(SDNode *N) { 03579 SDValue N0 = N->getOperand(0); 03580 SDValue N1 = N->getOperand(1); 03581 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 03582 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 03583 EVT VT = N0.getValueType(); 03584 unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); 03585 03586 // fold (shl c1, c2) -> c1<<c2 03587 if (N0C && N1C) 03588 return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C); 03589 // fold (shl 0, x) -> 0 03590 if (N0C && N0C->isNullValue()) 03591 return N0; 03592 // fold (shl x, c >= size(x)) -> undef 03593 if (N1C && N1C->getZExtValue() >= OpSizeInBits) 03594 return DAG.getUNDEF(VT); 03595 // fold (shl x, 0) -> x 03596 if (N1C && N1C->isNullValue()) 03597 return N0; 03598 // fold (shl undef, x) -> 0 03599 if (N0.getOpcode() == ISD::UNDEF) 03600 return DAG.getConstant(0, VT); 03601 // if (shl x, c) is known to be zero, return 0 03602 if (DAG.MaskedValueIsZero(SDValue(N, 0), 03603 APInt::getAllOnesValue(OpSizeInBits))) 03604 return DAG.getConstant(0, VT); 03605 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). 03606 if (N1.getOpcode() == ISD::TRUNCATE && 03607 N1.getOperand(0).getOpcode() == ISD::AND && 03608 N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { 03609 SDValue N101 = N1.getOperand(0).getOperand(1); 03610 if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { 03611 EVT TruncVT = N1.getValueType(); 03612 SDValue N100 = N1.getOperand(0).getOperand(0); 03613 APInt TruncC = N101C->getAPIntValue(); 03614 TruncC = TruncC.trunc(TruncVT.getSizeInBits()); 03615 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, 03616 DAG.getNode(ISD::AND, SDLoc(N), TruncVT, 03617 DAG.getNode(ISD::TRUNCATE, 03618 SDLoc(N), 03619 TruncVT, N100), 03620 DAG.getConstant(TruncC, TruncVT))); 03621 } 03622 } 03623 03624 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 03625 return SDValue(N, 0); 03626 03627 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2)) 03628 if (N1C && N0.getOpcode() == ISD::SHL && 03629 N0.getOperand(1).getOpcode() == ISD::Constant) { 03630 uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); 03631 uint64_t c2 = N1C->getZExtValue(); 03632 if (c1 + c2 >= OpSizeInBits) 03633 return DAG.getConstant(0, VT); 03634 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), 03635 DAG.getConstant(c1 + c2, N1.getValueType())); 03636 } 03637 03638 // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2))) 03639 // For this to be valid, the second form must not preserve any of the bits 03640 // that are shifted out by the inner shift in the first form. This means 03641 // the outer shift size must be >= the number of bits added by the ext. 03642 // As a corollary, we don't care what kind of ext it is. 03643 if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND || 03644 N0.getOpcode() == ISD::ANY_EXTEND || 03645 N0.getOpcode() == ISD::SIGN_EXTEND) && 03646 N0.getOperand(0).getOpcode() == ISD::SHL && 03647 isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) { 03648 uint64_t c1 = 03649 cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue(); 03650 uint64_t c2 = N1C->getZExtValue(); 03651 EVT InnerShiftVT = N0.getOperand(0).getValueType(); 03652 uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits(); 03653 if (c2 >= OpSizeInBits - InnerShiftSize) { 03654 if (c1 + c2 >= OpSizeInBits) 03655 return DAG.getConstant(0, VT); 03656 return DAG.getNode(ISD::SHL, SDLoc(N0), VT, 03657 DAG.getNode(N0.getOpcode(), SDLoc(N0), VT, 03658 N0.getOperand(0)->getOperand(0)), 03659 DAG.getConstant(c1 + c2, N1.getValueType())); 03660 } 03661 } 03662 03663 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or 03664 // (and (srl x, (sub c1, c2), MASK) 03665 // Only fold this if the inner shift has no other uses -- if it does, folding 03666 // this will increase the total number of instructions. 03667 if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() && 03668 N0.getOperand(1).getOpcode() == ISD::Constant) { 03669 uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); 03670 if (c1 < VT.getSizeInBits()) { 03671 uint64_t c2 = N1C->getZExtValue(); 03672 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 03673 VT.getSizeInBits() - c1); 03674 SDValue Shift; 03675 if (c2 > c1) { 03676 Mask = Mask.shl(c2-c1); 03677 Shift = DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), 03678 DAG.getConstant(c2-c1, N1.getValueType())); 03679 } else { 03680 Mask = Mask.lshr(c1-c2); 03681 Shift = DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), 03682 DAG.getConstant(c1-c2, N1.getValueType())); 03683 } 03684 return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift, 03685 DAG.getConstant(Mask, VT)); 03686 } 03687 } 03688 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) 03689 if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) { 03690 SDValue HiBitsMask = 03691 DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(), 03692 VT.getSizeInBits() - 03693 N1C->getZExtValue()), 03694 VT); 03695 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), 03696 HiBitsMask); 03697 } 03698 03699 if (N1C) { 03700 SDValue NewSHL = visitShiftByConstant(N, N1C->getZExtValue()); 03701 if (NewSHL.getNode()) 03702 return NewSHL; 03703 } 03704 03705 return SDValue(); 03706 } 03707 03708 SDValue DAGCombiner::visitSRA(SDNode *N) { 03709 SDValue N0 = N->getOperand(0); 03710 SDValue N1 = N->getOperand(1); 03711 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 03712 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 03713 EVT VT = N0.getValueType(); 03714 unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); 03715 03716 // fold (sra c1, c2) -> (sra c1, c2) 03717 if (N0C && N1C) 03718 return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C); 03719 // fold (sra 0, x) -> 0 03720 if (N0C && N0C->isNullValue()) 03721 return N0; 03722 // fold (sra -1, x) -> -1 03723 if (N0C && N0C->isAllOnesValue()) 03724 return N0; 03725 // fold (sra x, (setge c, size(x))) -> undef 03726 if (N1C && N1C->getZExtValue() >= OpSizeInBits) 03727 return DAG.getUNDEF(VT); 03728 // fold (sra x, 0) -> x 03729 if (N1C && N1C->isNullValue()) 03730 return N0; 03731 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports 03732 // sext_inreg. 03733 if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) { 03734 unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue(); 03735 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits); 03736 if (VT.isVector()) 03737 ExtVT = EVT::getVectorVT(*DAG.getContext(), 03738 ExtVT, VT.getVectorNumElements()); 03739 if ((!LegalOperations || 03740 TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT))) 03741 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, 03742 N0.getOperand(0), DAG.getValueType(ExtVT)); 03743 } 03744 03745 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) 03746 if (N1C && N0.getOpcode() == ISD::SRA) { 03747 if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 03748 unsigned Sum = N1C->getZExtValue() + C1->getZExtValue(); 03749 if (Sum >= OpSizeInBits) Sum = OpSizeInBits-1; 03750 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0), 03751 DAG.getConstant(Sum, N1C->getValueType(0))); 03752 } 03753 } 03754 03755 // fold (sra (shl X, m), (sub result_size, n)) 03756 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for 03757 // result_size - n != m. 03758 // If truncate is free for the target sext(shl) is likely to result in better 03759 // code. 03760 if (N0.getOpcode() == ISD::SHL) { 03761 // Get the two constanst of the shifts, CN0 = m, CN = n. 03762 const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 03763 if (N01C && N1C) { 03764 // Determine what the truncate's result bitsize and type would be. 03765 EVT TruncVT = 03766 EVT::getIntegerVT(*DAG.getContext(), 03767 OpSizeInBits - N1C->getZExtValue()); 03768 // Determine the residual right-shift amount. 03769 signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); 03770 03771 // If the shift is not a no-op (in which case this should be just a sign 03772 // extend already), the truncated to type is legal, sign_extend is legal 03773 // on that type, and the truncate to that type is both legal and free, 03774 // perform the transform. 03775 if ((ShiftAmt > 0) && 03776 TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) && 03777 TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) && 03778 TLI.isTruncateFree(VT, TruncVT)) { 03779 03780 SDValue Amt = DAG.getConstant(ShiftAmt, 03781 getShiftAmountTy(N0.getOperand(0).getValueType())); 03782 SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0), VT, 03783 N0.getOperand(0), Amt); 03784 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), TruncVT, 03785 Shift); 03786 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), 03787 N->getValueType(0), Trunc); 03788 } 03789 } 03790 } 03791 03792 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))). 03793 if (N1.getOpcode() == ISD::TRUNCATE && 03794 N1.getOperand(0).getOpcode() == ISD::AND && 03795 N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { 03796 SDValue N101 = N1.getOperand(0).getOperand(1); 03797 if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { 03798 EVT TruncVT = N1.getValueType(); 03799 SDValue N100 = N1.getOperand(0).getOperand(0); 03800 APInt TruncC = N101C->getAPIntValue(); 03801 TruncC = TruncC.trunc(TruncVT.getScalarType().getSizeInBits()); 03802 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, 03803 DAG.getNode(ISD::AND, SDLoc(N), 03804 TruncVT, 03805 DAG.getNode(ISD::TRUNCATE, 03806 SDLoc(N), 03807 TruncVT, N100), 03808 DAG.getConstant(TruncC, TruncVT))); 03809 } 03810 } 03811 03812 // fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2)) 03813 // if c1 is equal to the number of bits the trunc removes 03814 if (N0.getOpcode() == ISD::TRUNCATE && 03815 (N0.getOperand(0).getOpcode() == ISD::SRL || 03816 N0.getOperand(0).getOpcode() == ISD::SRA) && 03817 N0.getOperand(0).hasOneUse() && 03818 N0.getOperand(0).getOperand(1).hasOneUse() && 03819 N1C && isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) { 03820 EVT LargeVT = N0.getOperand(0).getValueType(); 03821 ConstantSDNode *LargeShiftAmt = 03822 cast<ConstantSDNode>(N0.getOperand(0).getOperand(1)); 03823 03824 if (LargeVT.getScalarType().getSizeInBits() - OpSizeInBits == 03825 LargeShiftAmt->getZExtValue()) { 03826 SDValue Amt = 03827 DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(), 03828 getShiftAmountTy(N0.getOperand(0).getOperand(0).getValueType())); 03829 SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), LargeVT, 03830 N0.getOperand(0).getOperand(0), Amt); 03831 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, SRA); 03832 } 03833 } 03834 03835 // Simplify, based on bits shifted out of the LHS. 03836 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 03837 return SDValue(N, 0); 03838 03839 03840 // If the sign bit is known to be zero, switch this to a SRL. 03841 if (DAG.SignBitIsZero(N0)) 03842 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1); 03843 03844 if (N1C) { 03845 SDValue NewSRA = visitShiftByConstant(N, N1C->getZExtValue()); 03846 if (NewSRA.getNode()) 03847 return NewSRA; 03848 } 03849 03850 return SDValue(); 03851 } 03852 03853 SDValue DAGCombiner::visitSRL(SDNode *N) { 03854 SDValue N0 = N->getOperand(0); 03855 SDValue N1 = N->getOperand(1); 03856 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 03857 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 03858 EVT VT = N0.getValueType(); 03859 unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); 03860 03861 // fold (srl c1, c2) -> c1 >>u c2 03862 if (N0C && N1C) 03863 return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C); 03864 // fold (srl 0, x) -> 0 03865 if (N0C && N0C->isNullValue()) 03866 return N0; 03867 // fold (srl x, c >= size(x)) -> undef 03868 if (N1C && N1C->getZExtValue() >= OpSizeInBits) 03869 return DAG.getUNDEF(VT); 03870 // fold (srl x, 0) -> x 03871 if (N1C && N1C->isNullValue()) 03872 return N0; 03873 // if (srl x, c) is known to be zero, return 0 03874 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), 03875 APInt::getAllOnesValue(OpSizeInBits))) 03876 return DAG.getConstant(0, VT); 03877 03878 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) 03879 if (N1C && N0.getOpcode() == ISD::SRL && 03880 N0.getOperand(1).getOpcode() == ISD::Constant) { 03881 uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); 03882 uint64_t c2 = N1C->getZExtValue(); 03883 if (c1 + c2 >= OpSizeInBits) 03884 return DAG.getConstant(0, VT); 03885 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), 03886 DAG.getConstant(c1 + c2, N1.getValueType())); 03887 } 03888 03889 // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2))) 03890 if (N1C && N0.getOpcode() == ISD::TRUNCATE && 03891 N0.getOperand(0).getOpcode() == ISD::SRL && 03892 isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) { 03893 uint64_t c1 = 03894 cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue(); 03895 uint64_t c2 = N1C->getZExtValue(); 03896 EVT InnerShiftVT = N0.getOperand(0).getValueType(); 03897 EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType(); 03898 uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits(); 03899 // This is only valid if the OpSizeInBits + c1 = size of inner shift. 03900 if (c1 + OpSizeInBits == InnerShiftSize) { 03901 if (c1 + c2 >= InnerShiftSize) 03902 return DAG.getConstant(0, VT); 03903 return DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, 03904 DAG.getNode(ISD::SRL, SDLoc(N0), InnerShiftVT, 03905 N0.getOperand(0)->getOperand(0), 03906 DAG.getConstant(c1 + c2, ShiftCountVT))); 03907 } 03908 } 03909 03910 // fold (srl (shl x, c), c) -> (and x, cst2) 03911 if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 && 03912 N0.getValueSizeInBits() <= 64) { 03913 uint64_t ShAmt = N1C->getZExtValue()+64-N0.getValueSizeInBits(); 03914 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), 03915 DAG.getConstant(~0ULL >> ShAmt, VT)); 03916 } 03917 03918 03919 // fold (srl (anyextend x), c) -> (anyextend (srl x, c)) 03920 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { 03921 // Shifting in all undef bits? 03922 EVT SmallVT = N0.getOperand(0).getValueType(); 03923 if (N1C->getZExtValue() >= SmallVT.getSizeInBits()) 03924 return DAG.getUNDEF(VT); 03925 03926 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { 03927 uint64_t ShiftAmt = N1C->getZExtValue(); 03928 SDValue SmallShift = DAG.getNode(ISD::SRL, SDLoc(N0), SmallVT, 03929 N0.getOperand(0), 03930 DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT))); 03931 AddToWorkList(SmallShift.getNode()); 03932 return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SmallShift); 03933 } 03934 } 03935 03936 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign 03937 // bit, which is unmodified by sra. 03938 if (N1C && N1C->getZExtValue() + 1 == VT.getSizeInBits()) { 03939 if (N0.getOpcode() == ISD::SRA) 03940 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1); 03941 } 03942 03943 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit). 03944 if (N1C && N0.getOpcode() == ISD::CTLZ && 03945 N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) { 03946 APInt KnownZero, KnownOne; 03947 DAG.ComputeMaskedBits(N0.getOperand(0), KnownZero, KnownOne); 03948 03949 // If any of the input bits are KnownOne, then the input couldn't be all 03950 // zeros, thus the result of the srl will always be zero. 03951 if (KnownOne.getBoolValue()) return DAG.getConstant(0, VT); 03952 03953 // If all of the bits input the to ctlz node are known to be zero, then 03954 // the result of the ctlz is "32" and the result of the shift is one. 03955 APInt UnknownBits = ~KnownZero; 03956 if (UnknownBits == 0) return DAG.getConstant(1, VT); 03957 03958 // Otherwise, check to see if there is exactly one bit input to the ctlz. 03959 if ((UnknownBits & (UnknownBits - 1)) == 0) { 03960 // Okay, we know that only that the single bit specified by UnknownBits 03961 // could be set on input to the CTLZ node. If this bit is set, the SRL 03962 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair 03963 // to an SRL/XOR pair, which is likely to simplify more. 03964 unsigned ShAmt = UnknownBits.countTrailingZeros(); 03965 SDValue Op = N0.getOperand(0); 03966 03967 if (ShAmt) { 03968 Op = DAG.getNode(ISD::SRL, SDLoc(N0), VT, Op, 03969 DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType()))); 03970 AddToWorkList(Op.getNode()); 03971 } 03972 03973 return DAG.getNode(ISD::XOR, SDLoc(N), VT, 03974 Op, DAG.getConstant(1, VT)); 03975 } 03976 } 03977 03978 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))). 03979 if (N1.getOpcode() == ISD::TRUNCATE && 03980 N1.getOperand(0).getOpcode() == ISD::AND && 03981 N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { 03982 SDValue N101 = N1.getOperand(0).getOperand(1); 03983 if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { 03984 EVT TruncVT = N1.getValueType(); 03985 SDValue N100 = N1.getOperand(0).getOperand(0); 03986 APInt TruncC = N101C->getAPIntValue(); 03987 TruncC = TruncC.trunc(TruncVT.getSizeInBits()); 03988 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, 03989 DAG.getNode(ISD::AND, SDLoc(N), 03990 TruncVT, 03991 DAG.getNode(ISD::TRUNCATE, 03992 SDLoc(N), 03993 TruncVT, N100), 03994 DAG.getConstant(TruncC, TruncVT))); 03995 } 03996 } 03997 03998 // fold operands of srl based on knowledge that the low bits are not 03999 // demanded. 04000 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 04001 return SDValue(N, 0); 04002 04003 if (N1C) { 04004 SDValue NewSRL = visitShiftByConstant(N, N1C->getZExtValue()); 04005 if (NewSRL.getNode()) 04006 return NewSRL; 04007 } 04008 04009 // Attempt to convert a srl of a load into a narrower zero-extending load. 04010 SDValue NarrowLoad = ReduceLoadWidth(N); 04011 if (NarrowLoad.getNode()) 04012 return NarrowLoad; 04013 04014 // Here is a common situation. We want to optimize: 04015 // 04016 // %a = ... 04017 // %b = and i32 %a, 2 04018 // %c = srl i32 %b, 1 04019 // brcond i32 %c ... 04020 // 04021 // into 04022 // 04023 // %a = ... 04024 // %b = and %a, 2 04025 // %c = setcc eq %b, 0 04026 // brcond %c ... 04027 // 04028 // However when after the source operand of SRL is optimized into AND, the SRL 04029 // itself may not be optimized further. Look for it and add the BRCOND into 04030 // the worklist. 04031 if (N->hasOneUse()) { 04032 SDNode *Use = *N->use_begin(); 04033 if (Use->getOpcode() == ISD::BRCOND) 04034 AddToWorkList(Use); 04035 else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) { 04036 // Also look pass the truncate. 04037 Use = *Use->use_begin(); 04038 if (Use->getOpcode() == ISD::BRCOND) 04039 AddToWorkList(Use); 04040 } 04041 } 04042 04043 return SDValue(); 04044 } 04045 04046 SDValue DAGCombiner::visitCTLZ(SDNode *N) { 04047 SDValue N0 = N->getOperand(0); 04048 EVT VT = N->getValueType(0); 04049 04050 // fold (ctlz c1) -> c2 04051 if (isa<ConstantSDNode>(N0)) 04052 return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0); 04053 return SDValue(); 04054 } 04055 04056 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) { 04057 SDValue N0 = N->getOperand(0); 04058 EVT VT = N->getValueType(0); 04059 04060 // fold (ctlz_zero_undef c1) -> c2 04061 if (isa<ConstantSDNode>(N0)) 04062 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0); 04063 return SDValue(); 04064 } 04065 04066 SDValue DAGCombiner::visitCTTZ(SDNode *N) { 04067 SDValue N0 = N->getOperand(0); 04068 EVT VT = N->getValueType(0); 04069 04070 // fold (cttz c1) -> c2 04071 if (isa<ConstantSDNode>(N0)) 04072 return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0); 04073 return SDValue(); 04074 } 04075 04076 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) { 04077 SDValue N0 = N->getOperand(0); 04078 EVT VT = N->getValueType(0); 04079 04080 // fold (cttz_zero_undef c1) -> c2 04081 if (isa<ConstantSDNode>(N0)) 04082 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); 04083 return SDValue(); 04084 } 04085 04086 SDValue DAGCombiner::visitCTPOP(SDNode *N) { 04087 SDValue N0 = N->getOperand(0); 04088 EVT VT = N->getValueType(0); 04089 04090 // fold (ctpop c1) -> c2 04091 if (isa<ConstantSDNode>(N0)) 04092 return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0); 04093 return SDValue(); 04094 } 04095 04096 SDValue DAGCombiner::visitSELECT(SDNode *N) { 04097 SDValue N0 = N->getOperand(0); 04098 SDValue N1 = N->getOperand(1); 04099 SDValue N2 = N->getOperand(2); 04100 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 04101 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 04102 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); 04103 EVT VT = N->getValueType(0); 04104 EVT VT0 = N0.getValueType(); 04105 04106 // fold (select C, X, X) -> X 04107 if (N1 == N2) 04108 return N1; 04109 // fold (select true, X, Y) -> X 04110 if (N0C && !N0C->isNullValue()) 04111 return N1; 04112 // fold (select false, X, Y) -> Y 04113 if (N0C && N0C->isNullValue()) 04114 return N2; 04115 // fold (select C, 1, X) -> (or C, X) 04116 if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1) 04117 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); 04118 // fold (select C, 0, 1) -> (xor C, 1) 04119 if (VT.isInteger() && 04120 (VT0 == MVT::i1 || 04121 (VT0.isInteger() && 04122 TLI.getBooleanContents(false) == 04123 TargetLowering::ZeroOrOneBooleanContent)) && 04124 N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) { 04125 SDValue XORNode; 04126 if (VT == VT0) 04127 return DAG.getNode(ISD::XOR, SDLoc(N), VT0, 04128 N0, DAG.getConstant(1, VT0)); 04129 XORNode = DAG.getNode(ISD::XOR, SDLoc(N0), VT0, 04130 N0, DAG.getConstant(1, VT0)); 04131 AddToWorkList(XORNode.getNode()); 04132 if (VT.bitsGT(VT0)) 04133 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode); 04134 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode); 04135 } 04136 // fold (select C, 0, X) -> (and (not C), X) 04137 if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) { 04138 SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); 04139 AddToWorkList(NOTNode.getNode()); 04140 return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2); 04141 } 04142 // fold (select C, X, 1) -> (or (not C), X) 04143 if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) { 04144 SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); 04145 AddToWorkList(NOTNode.getNode()); 04146 return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1); 04147 } 04148 // fold (select C, X, 0) -> (and C, X) 04149 if (VT == MVT::i1 && N2C && N2C->isNullValue()) 04150 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); 04151 // fold (select X, X, Y) -> (or X, Y) 04152 // fold (select X, 1, Y) -> (or X, Y) 04153 if (VT == MVT::i1 && (N0 == N1 || (N1C && N1C->getAPIntValue() == 1))) 04154 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); 04155 // fold (select X, Y, X) -> (and X, Y) 04156 // fold (select X, Y, 0) -> (and X, Y) 04157 if (VT == MVT::i1 && (N0 == N2 || (N2C && N2C->getAPIntValue() == 0))) 04158 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); 04159 04160 // If we can fold this based on the true/false value, do so. 04161 if (SimplifySelectOps(N, N1, N2)) 04162 return SDValue(N, 0); // Don't revisit N. 04163 04164 // fold selects based on a setcc into other things, such as min/max/abs 04165 if (N0.getOpcode() == ISD::SETCC) { 04166 // FIXME: 04167 // Check against MVT::Other for SELECT_CC, which is a workaround for targets 04168 // having to say they don't support SELECT_CC on every type the DAG knows 04169 // about, since there is no way to mark an opcode illegal at all value types 04170 if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other) && 04171 TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) 04172 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, 04173 N0.getOperand(0), N0.getOperand(1), 04174 N1, N2, N0.getOperand(2)); 04175 return SimplifySelect(SDLoc(N), N0, N1, N2); 04176 } 04177 04178 return SDValue(); 04179 } 04180 04181 SDValue DAGCombiner::visitVSELECT(SDNode *N) { 04182 SDValue N0 = N->getOperand(0); 04183 SDValue N1 = N->getOperand(1); 04184 SDValue N2 = N->getOperand(2); 04185 SDLoc DL(N); 04186 04187 // Canonicalize integer abs. 04188 // vselect (setg[te] X, 0), X, -X -> 04189 // vselect (setgt X, -1), X, -X -> 04190 // vselect (setl[te] X, 0), -X, X -> 04191 // Y = sra (X, size(X)-1); xor (add (X, Y), Y) 04192 if (N0.getOpcode() == ISD::SETCC) { 04193 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 04194 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); 04195 bool isAbs = false; 04196 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode()); 04197 04198 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) || 04199 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) && 04200 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1)) 04201 isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode()); 04202 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) && 04203 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1)) 04204 isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode()); 04205 04206 if (isAbs) { 04207 EVT VT = LHS.getValueType(); 04208 SDValue Shift = DAG.getNode( 04209 ISD::SRA, DL, VT, LHS, 04210 DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, VT)); 04211 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift); 04212 AddToWorkList(Shift.getNode()); 04213 AddToWorkList(Add.getNode()); 04214 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift); 04215 } 04216 } 04217 04218 return SDValue(); 04219 } 04220 04221 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { 04222 SDValue N0 = N->getOperand(0); 04223 SDValue N1 = N->getOperand(1); 04224 SDValue N2 = N->getOperand(2); 04225 SDValue N3 = N->getOperand(3); 04226 SDValue N4 = N->getOperand(4); 04227 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get(); 04228 04229 // fold select_cc lhs, rhs, x, x, cc -> x 04230 if (N2 == N3) 04231 return N2; 04232 04233 // Determine if the condition we're dealing with is constant 04234 SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), 04235 N0, N1, CC, SDLoc(N), false); 04236 if (SCC.getNode()) AddToWorkList(SCC.getNode()); 04237 04238 if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) { 04239 if (!SCCC->isNullValue()) 04240 return N2; // cond always true -> true val 04241 else 04242 return N3; // cond always false -> false val 04243 } 04244 04245 // Fold to a simpler select_cc 04246 if (SCC.getNode() && SCC.getOpcode() == ISD::SETCC) 04247 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(), 04248 SCC.getOperand(0), SCC.getOperand(1), N2, N3, 04249 SCC.getOperand(2)); 04250 04251 // If we can fold this based on the true/false value, do so. 04252 if (SimplifySelectOps(N, N2, N3)) 04253 return SDValue(N, 0); // Don't revisit N. 04254 04255 // fold select_cc into other things, such as min/max/abs 04256 return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC); 04257 } 04258 04259 SDValue DAGCombiner::visitSETCC(SDNode *N) { 04260 return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1), 04261 cast<CondCodeSDNode>(N->getOperand(2))->get(), 04262 SDLoc(N)); 04263 } 04264 04265 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this: 04266 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))" 04267 // transformation. Returns true if extension are possible and the above 04268 // mentioned transformation is profitable. 04269 static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, 04270 unsigned ExtOpc, 04271 SmallVector<SDNode*, 4> &ExtendNodes, 04272 const TargetLowering &TLI) { 04273 bool HasCopyToRegUses = false; 04274 bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType()); 04275 for (SDNode::use_iterator UI = N0.getNode()->use_begin(), 04276 UE = N0.getNode()->use_end(); 04277 UI != UE; ++UI) { 04278 SDNode *User = *UI; 04279 if (User == N) 04280 continue; 04281 if (UI.getUse().getResNo() != N0.getResNo()) 04282 continue; 04283 // FIXME: Only extend SETCC N, N and SETCC N, c for now. 04284 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) { 04285 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get(); 04286 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC)) 04287 // Sign bits will be lost after a zext. 04288 return false; 04289 bool Add = false; 04290 for (unsigned i = 0; i != 2; ++i) { 04291 SDValue UseOp = User->getOperand(i); 04292 if (UseOp == N0) 04293 continue; 04294 if (!isa<ConstantSDNode>(UseOp)) 04295 return false; 04296 Add = true; 04297 } 04298 if (Add) 04299 ExtendNodes.push_back(User); 04300 continue; 04301 } 04302 // If truncates aren't free and there are users we can't 04303 // extend, it isn't worthwhile. 04304 if (!isTruncFree) 04305 return false; 04306 // Remember if this value is live-out. 04307 if (User->getOpcode() == ISD::CopyToReg) 04308 HasCopyToRegUses = true; 04309 } 04310 04311 if (HasCopyToRegUses) { 04312 bool BothLiveOut = false; 04313 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); 04314 UI != UE; ++UI) { 04315 SDUse &Use = UI.getUse(); 04316 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) { 04317 BothLiveOut = true; 04318 break; 04319 } 04320 } 04321 if (BothLiveOut) 04322 // Both unextended and extended values are live out. There had better be 04323 // a good reason for the transformation. 04324 return ExtendNodes.size(); 04325 } 04326 return true; 04327 } 04328 04329 void DAGCombiner::ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs, 04330 SDValue Trunc, SDValue ExtLoad, SDLoc DL, 04331 ISD::NodeType ExtType) { 04332 // Extend SetCC uses if necessary. 04333 for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { 04334 SDNode *SetCC = SetCCs[i]; 04335 SmallVector<SDValue, 4> Ops; 04336 04337 for (unsigned j = 0; j != 2; ++j) { 04338 SDValue SOp = SetCC->getOperand(j); 04339 if (SOp == Trunc) 04340 Ops.push_back(ExtLoad); 04341 else 04342 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp)); 04343 } 04344 04345 Ops.push_back(SetCC->getOperand(2)); 04346 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), 04347 &Ops[0], Ops.size())); 04348 } 04349 } 04350 04351 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { 04352 SDValue N0 = N->getOperand(0); 04353 EVT VT = N->getValueType(0); 04354 04355 // fold (sext c1) -> c1 04356 if (isa<ConstantSDNode>(N0)) 04357 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N0); 04358 04359 // fold (sext (sext x)) -> (sext x) 04360 // fold (sext (aext x)) -> (sext x) 04361 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) 04362 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, 04363 N0.getOperand(0)); 04364 04365 if (N0.getOpcode() == ISD::TRUNCATE) { 04366 // fold (sext (truncate (load x))) -> (sext (smaller load x)) 04367 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n))) 04368 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 04369 if (NarrowLoad.getNode()) { 04370 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 04371 if (NarrowLoad.getNode() != N0.getNode()) { 04372 CombineTo(N0.getNode(), NarrowLoad); 04373 // CombineTo deleted the truncate, if needed, but not what's under it. 04374 AddToWorkList(oye); 04375 } 04376 return SDValue(N, 0); // Return N so it doesn't get rechecked! 04377 } 04378 04379 // See if the value being truncated is already sign extended. If so, just 04380 // eliminate the trunc/sext pair. 04381 SDValue Op = N0.getOperand(0); 04382 unsigned OpBits = Op.getValueType().getScalarType().getSizeInBits(); 04383 unsigned MidBits = N0.getValueType().getScalarType().getSizeInBits(); 04384 unsigned DestBits = VT.getScalarType().getSizeInBits(); 04385 unsigned NumSignBits = DAG.ComputeNumSignBits(Op); 04386 04387 if (OpBits == DestBits) { 04388 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign 04389 // bits, it is already ready. 04390 if (NumSignBits > DestBits-MidBits) 04391 return Op; 04392 } else if (OpBits < DestBits) { 04393 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign 04394 // bits, just sext from i32. 04395 if (NumSignBits > OpBits-MidBits) 04396 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, Op); 04397 } else { 04398 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign 04399 // bits, just truncate to i32. 04400 if (NumSignBits > OpBits-MidBits) 04401 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); 04402 } 04403 04404 // fold (sext (truncate x)) -> (sextinreg x). 04405 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, 04406 N0.getValueType())) { 04407 if (OpBits < DestBits) 04408 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op); 04409 else if (OpBits > DestBits) 04410 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op); 04411 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, Op, 04412 DAG.getValueType(N0.getValueType())); 04413 } 04414 } 04415 04416 // fold (sext (load x)) -> (sext (truncate (sextload x))) 04417 // None of the supported targets knows how to perform load and sign extend 04418 // on vectors in one instruction. We only perform this transformation on 04419 // scalars. 04420 if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && 04421 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 04422 TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) { 04423 bool DoXform = true; 04424 SmallVector<SDNode*, 4> SetCCs; 04425 if (!N0.hasOneUse()) 04426 DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI); 04427 if (DoXform) { 04428 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 04429 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, 04430 LN0->getChain(), 04431 LN0->getBasePtr(), LN0->getPointerInfo(), 04432 N0.getValueType(), 04433 LN0->isVolatile(), LN0->isNonTemporal(), 04434 LN0->getAlignment()); 04435 CombineTo(N, ExtLoad); 04436 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), 04437 N0.getValueType(), ExtLoad); 04438 CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); 04439 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), 04440 ISD::SIGN_EXTEND); 04441 return SDValue(N, 0); // Return N so it doesn't get rechecked! 04442 } 04443 } 04444 04445 // fold (sext (sextload x)) -> (sext (truncate (sextload x))) 04446 // fold (sext ( extload x)) -> (sext (truncate (sextload x))) 04447 if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && 04448 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { 04449 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 04450 EVT MemVT = LN0->getMemoryVT(); 04451 if ((!LegalOperations && !LN0->isVolatile()) || 04452 TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) { 04453 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, 04454 LN0->getChain(), 04455 LN0->getBasePtr(), LN0->getPointerInfo(), 04456 MemVT, 04457 LN0->isVolatile(), LN0->isNonTemporal(), 04458 LN0->getAlignment()); 04459 CombineTo(N, ExtLoad); 04460 CombineTo(N0.getNode(), 04461 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), 04462 N0.getValueType(), ExtLoad), 04463 ExtLoad.getValue(1)); 04464 return SDValue(N, 0); // Return N so it doesn't get rechecked! 04465 } 04466 } 04467 04468 // fold (sext (and/or/xor (load x), cst)) -> 04469 // (and/or/xor (sextload x), (sext cst)) 04470 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || 04471 N0.getOpcode() == ISD::XOR) && 04472 isa<LoadSDNode>(N0.getOperand(0)) && 04473 N0.getOperand(1).getOpcode() == ISD::Constant && 04474 TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()) && 04475 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { 04476 LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); 04477 if (LN0->getExtensionType() != ISD::ZEXTLOAD) { 04478 bool DoXform = true; 04479 SmallVector<SDNode*, 4> SetCCs; 04480 if (!N0.hasOneUse()) 04481 DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND, 04482 SetCCs, TLI); 04483 if (DoXform) { 04484 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT, 04485 LN0->getChain(), LN0->getBasePtr(), 04486 LN0->getPointerInfo(), 04487 LN0->getMemoryVT(), 04488 LN0->isVolatile(), 04489 LN0->isNonTemporal(), 04490 LN0->getAlignment()); 04491 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 04492 Mask = Mask.sext(VT.getSizeInBits()); 04493 SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT, 04494 ExtLoad, DAG.getConstant(Mask, VT)); 04495 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, 04496 SDLoc(N0.getOperand(0)), 04497 N0.getOperand(0).getValueType(), ExtLoad); 04498 CombineTo(N, And); 04499 CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); 04500 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), 04501 ISD::SIGN_EXTEND); 04502 return SDValue(N, 0); // Return N so it doesn't get rechecked! 04503 } 04504 } 04505 } 04506 04507 if (N0.getOpcode() == ISD::SETCC) { 04508 // sext(setcc) -> sext_in_reg(vsetcc) for vectors. 04509 // Only do this before legalize for now. 04510 if (VT.isVector() && !LegalOperations && 04511 TLI.getBooleanContents(true) == 04512 TargetLowering::ZeroOrNegativeOneBooleanContent) { 04513 EVT N0VT = N0.getOperand(0).getValueType(); 04514 // On some architectures (such as SSE/NEON/etc) the SETCC result type is 04515 // of the same size as the compared operands. Only optimize sext(setcc()) 04516 // if this is the case. 04517 EVT SVT = getSetCCResultType(N0VT); 04518 04519 // We know that the # elements of the results is the same as the 04520 // # elements of the compare (and the # elements of the compare result 04521 // for that matter). Check to see that they are the same size. If so, 04522 // we know that the element size of the sext'd result matches the 04523 // element size of the compare operands. 04524 if (VT.getSizeInBits() == SVT.getSizeInBits()) 04525 return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), 04526 N0.getOperand(1), 04527 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 04528 04529 // If the desired elements are smaller or larger than the source 04530 // elements we can use a matching integer vector type and then 04531 // truncate/sign extend 04532 EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger(); 04533 if (SVT == MatchingVectorType) { 04534 SDValue VsetCC = DAG.getSetCC(SDLoc(N), MatchingVectorType, 04535 N0.getOperand(0), N0.getOperand(1), 04536 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 04537 return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT); 04538 } 04539 } 04540 04541 // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc) 04542 unsigned ElementWidth = VT.getScalarType().getSizeInBits(); 04543 SDValue NegOne = 04544 DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT); 04545 SDValue SCC = 04546 SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1), 04547 NegOne, DAG.getConstant(0, VT), 04548 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); 04549 if (SCC.getNode()) return SCC; 04550 if (!VT.isVector() && (!LegalOperations || 04551 TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(VT)))) 04552 return DAG.getNode(ISD::SELECT, SDLoc(N), VT, 04553 DAG.getSetCC(SDLoc(N), 04554 getSetCCResultType(VT), 04555 N0.getOperand(0), N0.getOperand(1), 04556 cast<CondCodeSDNode>(N0.getOperand(2))->get()), 04557 NegOne, DAG.getConstant(0, VT)); 04558 } 04559 04560 // fold (sext x) -> (zext x) if the sign bit is known zero. 04561 if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && 04562 DAG.SignBitIsZero(N0)) 04563 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0); 04564 04565 return SDValue(); 04566 } 04567 04568 // isTruncateOf - If N is a truncate of some other value, return true, record 04569 // the value being truncated in Op and which of Op's bits are zero in KnownZero. 04570 // This function computes KnownZero to avoid a duplicated call to 04571 // ComputeMaskedBits in the caller. 04572 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, 04573 APInt &KnownZero) { 04574 APInt KnownOne; 04575 if (N->getOpcode() == ISD::TRUNCATE) { 04576 Op = N->getOperand(0); 04577 DAG.ComputeMaskedBits(Op, KnownZero, KnownOne); 04578 return true; 04579 } 04580 04581 if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 || 04582 cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE) 04583 return false; 04584 04585 SDValue Op0 = N->getOperand(0); 04586 SDValue Op1 = N->getOperand(1); 04587 assert(Op0.getValueType() == Op1.getValueType()); 04588 04589 ConstantSDNode *COp0 = dyn_cast<ConstantSDNode>(Op0); 04590 ConstantSDNode *COp1 = dyn_cast<ConstantSDNode>(Op1); 04591 if (COp0 && COp0->isNullValue()) 04592 Op = Op1; 04593 else if (COp1 && COp1->isNullValue()) 04594 Op = Op0; 04595 else 04596 return false; 04597 04598 DAG.ComputeMaskedBits(Op, KnownZero, KnownOne); 04599 04600 if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue()) 04601 return false; 04602 04603 return true; 04604 } 04605 04606 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { 04607 SDValue N0 = N->getOperand(0); 04608 EVT VT = N->getValueType(0); 04609 04610 // fold (zext c1) -> c1 04611 if (isa<ConstantSDNode>(N0)) 04612 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0); 04613 // fold (zext (zext x)) -> (zext x) 04614 // fold (zext (aext x)) -> (zext x) 04615 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) 04616 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, 04617 N0.getOperand(0)); 04618 04619 // fold (zext (truncate x)) -> (zext x) or 04620 // (zext (truncate x)) -> (truncate x) 04621 // This is valid when the truncated bits of x are already zero. 04622 // FIXME: We should extend this to work for vectors too. 04623 SDValue Op; 04624 APInt KnownZero; 04625 if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) { 04626 APInt TruncatedBits = 04627 (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ? 04628 APInt(Op.getValueSizeInBits(), 0) : 04629 APInt::getBitsSet(Op.getValueSizeInBits(), 04630 N0.getValueSizeInBits(), 04631 std::min(Op.getValueSizeInBits(), 04632 VT.getSizeInBits())); 04633 if (TruncatedBits == (KnownZero & TruncatedBits)) { 04634 if (VT.bitsGT(Op.getValueType())) 04635 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Op); 04636 if (VT.bitsLT(Op.getValueType())) 04637 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); 04638 04639 return Op; 04640 } 04641 } 04642 04643 // fold (zext (truncate (load x))) -> (zext (smaller load x)) 04644 // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n))) 04645 if (N0.getOpcode() == ISD::TRUNCATE) { 04646 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 04647 if (NarrowLoad.getNode()) { 04648 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 04649 if (NarrowLoad.getNode() != N0.getNode()) { 04650 CombineTo(N0.getNode(), NarrowLoad); 04651 // CombineTo deleted the truncate, if needed, but not what's under it. 04652 AddToWorkList(oye); 04653 } 04654 return SDValue(N, 0); // Return N so it doesn't get rechecked! 04655 } 04656 } 04657 04658 // fold (zext (truncate x)) -> (and x, mask) 04659 if (N0.getOpcode() == ISD::TRUNCATE && 04660 (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) { 04661 04662 // fold (zext (truncate (load x))) -> (zext (smaller load x)) 04663 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n))) 04664 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 04665 if (NarrowLoad.getNode()) { 04666 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 04667 if (NarrowLoad.getNode() != N0.getNode()) { 04668 CombineTo(N0.getNode(), NarrowLoad); 04669 // CombineTo deleted the truncate, if needed, but not what's under it. 04670 AddToWorkList(oye); 04671 } 04672 return SDValue(N, 0); // Return N so it doesn't get rechecked! 04673 } 04674 04675 SDValue Op = N0.getOperand(0); 04676 if (Op.getValueType().bitsLT(VT)) { 04677 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op); 04678 AddToWorkList(Op.getNode()); 04679 } else if (Op.getValueType().bitsGT(VT)) { 04680 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); 04681 AddToWorkList(Op.getNode()); 04682 } 04683 return DAG.getZeroExtendInReg(Op, SDLoc(N), 04684 N0.getValueType().getScalarType()); 04685 } 04686 04687 // Fold (zext (and (trunc x), cst)) -> (and x, cst), 04688 // if either of the casts is not free. 04689 if (N0.getOpcode() == ISD::AND && 04690 N0.getOperand(0).getOpcode() == ISD::TRUNCATE && 04691 N0.getOperand(1).getOpcode() == ISD::Constant && 04692 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), 04693 N0.getValueType()) || 04694 !TLI.isZExtFree(N0.getValueType(), VT))) { 04695 SDValue X = N0.getOperand(0).getOperand(0); 04696 if (X.getValueType().bitsLT(VT)) { 04697 X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(X), VT, X); 04698 } else if (X.getValueType().bitsGT(VT)) { 04699 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); 04700 } 04701 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 04702 Mask = Mask.zext(VT.getSizeInBits()); 04703 return DAG.getNode(ISD::AND, SDLoc(N), VT, 04704 X, DAG.getConstant(Mask, VT)); 04705 } 04706 04707 // fold (zext (load x)) -> (zext (truncate (zextload x))) 04708 // None of the supported targets knows how to perform load and vector_zext 04709 // on vectors in one instruction. We only perform this transformation on 04710 // scalars. 04711 if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && 04712 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 04713 TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) { 04714 bool DoXform = true; 04715 SmallVector<SDNode*, 4> SetCCs; 04716 if (!N0.hasOneUse()) 04717 DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI); 04718 if (DoXform) { 04719 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 04720 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, 04721 LN0->getChain(), 04722 LN0->getBasePtr(), LN0->getPointerInfo(), 04723 N0.getValueType(), 04724 LN0->isVolatile(), LN0->isNonTemporal(), 04725 LN0->getAlignment()); 04726 CombineTo(N, ExtLoad); 04727 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), 04728 N0.getValueType(), ExtLoad); 04729 CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); 04730 04731 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), 04732 ISD::ZERO_EXTEND); 04733 return SDValue(N, 0); // Return N so it doesn't get rechecked! 04734 } 04735 } 04736 04737 // fold (zext (and/or/xor (load x), cst)) -> 04738 // (and/or/xor (zextload x), (zext cst)) 04739 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || 04740 N0.getOpcode() == ISD::XOR) && 04741 isa<LoadSDNode>(N0.getOperand(0)) && 04742 N0.getOperand(1).getOpcode() == ISD::Constant && 04743 TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()) && 04744 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { 04745 LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); 04746 if (LN0->getExtensionType() != ISD::SEXTLOAD) { 04747 bool DoXform = true; 04748 SmallVector<SDNode*, 4> SetCCs; 04749 if (!N0.hasOneUse()) 04750 DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND, 04751 SetCCs, TLI); 04752 if (DoXform) { 04753 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT, 04754 LN0->getChain(), LN0->getBasePtr(), 04755 LN0->getPointerInfo(), 04756 LN0->getMemoryVT(), 04757 LN0->isVolatile(), 04758 LN0->isNonTemporal(), 04759 LN0->getAlignment()); 04760 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 04761 Mask = Mask.zext(VT.getSizeInBits()); 04762 SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT, 04763 ExtLoad, DAG.getConstant(Mask, VT)); 04764 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, 04765 SDLoc(N0.getOperand(0)), 04766 N0.getOperand(0).getValueType(), ExtLoad); 04767 CombineTo(N, And); 04768 CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); 04769 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), 04770 ISD::ZERO_EXTEND); 04771 return SDValue(N, 0); // Return N so it doesn't get rechecked! 04772 } 04773 } 04774 } 04775 04776 // fold (zext (zextload x)) -> (zext (truncate (zextload x))) 04777 // fold (zext ( extload x)) -> (zext (truncate (zextload x))) 04778 if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && 04779 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { 04780 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 04781 EVT MemVT = LN0->getMemoryVT(); 04782 if ((!LegalOperations && !LN0->isVolatile()) || 04783 TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) { 04784 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, 04785 LN0->getChain(), 04786 LN0->getBasePtr(), LN0->getPointerInfo(), 04787 MemVT, 04788 LN0->isVolatile(), LN0->isNonTemporal(), 04789 LN0->getAlignment()); 04790 CombineTo(N, ExtLoad); 04791 CombineTo(N0.getNode(), 04792 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), 04793 ExtLoad), 04794 ExtLoad.getValue(1)); 04795 return SDValue(N, 0); // Return N so it doesn't get rechecked! 04796 } 04797 } 04798 04799 if (N0.getOpcode() == ISD::SETCC) { 04800 if (!LegalOperations && VT.isVector()) { 04801 // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors. 04802 // Only do this before legalize for now. 04803 EVT N0VT = N0.getOperand(0).getValueType(); 04804 EVT EltVT = VT.getVectorElementType(); 04805 SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(), 04806 DAG.getConstant(1, EltVT)); 04807 if (VT.getSizeInBits() == N0VT.getSizeInBits()) 04808 // We know that the # elements of the results is the same as the 04809 // # elements of the compare (and the # elements of the compare result 04810 // for that matter). Check to see that they are the same size. If so, 04811 // we know that the element size of the sext'd result matches the 04812 // element size of the compare operands. 04813 return DAG.getNode(ISD::AND, SDLoc(N), VT, 04814 DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), 04815 N0.getOperand(1), 04816 cast<CondCodeSDNode>(N0.getOperand(2))->get()), 04817 DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, 04818 &OneOps[0], OneOps.size())); 04819 04820 // If the desired elements are smaller or larger than the source 04821 // elements we can use a matching integer vector type and then 04822 // truncate/sign extend 04823 EVT MatchingElementType = 04824 EVT::getIntegerVT(*DAG.getContext(), 04825 N0VT.getScalarType().getSizeInBits()); 04826 EVT MatchingVectorType = 04827 EVT::getVectorVT(*DAG.getContext(), MatchingElementType, 04828 N0VT.getVectorNumElements()); 04829 SDValue VsetCC = 04830 DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), 04831 N0.getOperand(1), 04832 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 04833 return DAG.getNode(ISD::AND, SDLoc(N), VT, 04834 DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT), 04835 DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, 04836 &OneOps[0], OneOps.size())); 04837 } 04838 04839 // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc 04840 SDValue SCC = 04841 SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1), 04842 DAG.getConstant(1, VT), DAG.getConstant(0, VT), 04843 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); 04844 if (SCC.getNode()) return SCC; 04845 } 04846 04847 // (zext (shl (zext x), cst)) -> (shl (zext x), cst) 04848 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) && 04849 isa<ConstantSDNode>(N0.getOperand(1)) && 04850 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND && 04851 N0.hasOneUse()) { 04852 SDValue ShAmt = N0.getOperand(1); 04853 unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue(); 04854 if (N0.getOpcode() == ISD::SHL) { 04855 SDValue InnerZExt = N0.getOperand(0); 04856 // If the original shl may be shifting out bits, do not perform this 04857 // transformation. 04858 unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() - 04859 InnerZExt.getOperand(0).getValueType().getSizeInBits(); 04860 if (ShAmtVal > KnownZeroBits) 04861 return SDValue(); 04862 } 04863 04864 SDLoc DL(N); 04865 04866 // Ensure that the shift amount is wide enough for the shifted value. 04867 if (VT.getSizeInBits() >= 256) 04868 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt); 04869 04870 return DAG.getNode(N0.getOpcode(), DL, VT, 04871 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)), 04872 ShAmt); 04873 } 04874 04875 return SDValue(); 04876 } 04877 04878 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { 04879 SDValue N0 = N->getOperand(0); 04880 EVT VT = N->getValueType(0); 04881 04882 // fold (aext c1) -> c1 04883 if (isa<ConstantSDNode>(N0)) 04884 return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, N0); 04885 // fold (aext (aext x)) -> (aext x) 04886 // fold (aext (zext x)) -> (zext x) 04887 // fold (aext (sext x)) -> (sext x) 04888 if (N0.getOpcode() == ISD::ANY_EXTEND || 04889 N0.getOpcode() == ISD::ZERO_EXTEND || 04890 N0.getOpcode() == ISD::SIGN_EXTEND) 04891 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0)); 04892 04893 // fold (aext (truncate (load x))) -> (aext (smaller load x)) 04894 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n))) 04895 if (N0.getOpcode() == ISD::TRUNCATE) { 04896 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 04897 if (NarrowLoad.getNode()) { 04898 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 04899 if (NarrowLoad.getNode() != N0.getNode()) { 04900 CombineTo(N0.getNode(), NarrowLoad); 04901 // CombineTo deleted the truncate, if needed, but not what's under it. 04902 AddToWorkList(oye); 04903 } 04904 return SDValue(N, 0); // Return N so it doesn't get rechecked! 04905 } 04906 } 04907 04908 // fold (aext (truncate x)) 04909 if (N0.getOpcode() == ISD::TRUNCATE) { 04910 SDValue TruncOp = N0.getOperand(0); 04911 if (TruncOp.getValueType() == VT) 04912 return TruncOp; // x iff x size == zext size. 04913 if (TruncOp.getValueType().bitsGT(VT)) 04914 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, TruncOp); 04915 return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, TruncOp); 04916 } 04917 04918 // Fold (aext (and (trunc x), cst)) -> (and x, cst) 04919 // if the trunc is not free. 04920 if (N0.getOpcode() == ISD::AND && 04921 N0.getOperand(0).getOpcode() == ISD::TRUNCATE && 04922 N0.getOperand(1).getOpcode() == ISD::Constant && 04923 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), 04924 N0.getValueType())) { 04925 SDValue X = N0.getOperand(0).getOperand(0); 04926 if (X.getValueType().bitsLT(VT)) { 04927 X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, X); 04928 } else if (X.getValueType().bitsGT(VT)) { 04929 X = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X); 04930 } 04931 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 04932 Mask = Mask.zext(VT.getSizeInBits()); 04933 return DAG.getNode(ISD::AND, SDLoc(N), VT, 04934 X, DAG.getConstant(Mask, VT)); 04935 } 04936 04937 // fold (aext (load x)) -> (aext (truncate (extload x))) 04938 // None of the supported targets knows how to perform load and any_ext 04939 // on vectors in one instruction. We only perform this transformation on 04940 // scalars. 04941 if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && 04942 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 04943 TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { 04944 bool DoXform = true; 04945 SmallVector<SDNode*, 4> SetCCs; 04946 if (!N0.hasOneUse()) 04947 DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI); 04948 if (DoXform) { 04949 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 04950 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, 04951 LN0->getChain(), 04952 LN0->getBasePtr(), LN0->getPointerInfo(), 04953 N0.getValueType(), 04954 LN0->isVolatile(), LN0->isNonTemporal(), 04955 LN0->getAlignment()); 04956 CombineTo(N, ExtLoad); 04957 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), 04958 N0.getValueType(), ExtLoad); 04959 CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); 04960 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), 04961 ISD::ANY_EXTEND); 04962 return SDValue(N, 0); // Return N so it doesn't get rechecked! 04963 } 04964 } 04965 04966 // fold (aext (zextload x)) -> (aext (truncate (zextload x))) 04967 // fold (aext (sextload x)) -> (aext (truncate (sextload x))) 04968 // fold (aext ( extload x)) -> (aext (truncate (extload x))) 04969 if (N0.getOpcode() == ISD::LOAD && 04970 !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 04971 N0.hasOneUse()) { 04972 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 04973 EVT MemVT = LN0->getMemoryVT(); 04974 SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(N), 04975 VT, LN0->getChain(), LN0->getBasePtr(), 04976 LN0->getPointerInfo(), MemVT, 04977 LN0->isVolatile(), LN0->isNonTemporal(), 04978 LN0->getAlignment()); 04979 CombineTo(N, ExtLoad); 04980 CombineTo(N0.getNode(), 04981 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), 04982 N0.getValueType(), ExtLoad), 04983 ExtLoad.getValue(1)); 04984 return SDValue(N, 0); // Return N so it doesn't get rechecked! 04985 } 04986 04987 if (N0.getOpcode() == ISD::SETCC) { 04988 // aext(setcc) -> sext_in_reg(vsetcc) for vectors. 04989 // Only do this before legalize for now. 04990 if (VT.isVector() && !LegalOperations) { 04991 EVT N0VT = N0.getOperand(0).getValueType(); 04992 // We know that the # elements of the results is the same as the 04993 // # elements of the compare (and the # elements of the compare result 04994 // for that matter). Check to see that they are the same size. If so, 04995 // we know that the element size of the sext'd result matches the 04996 // element size of the compare operands. 04997 if (VT.getSizeInBits() == N0VT.getSizeInBits()) 04998 return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), 04999 N0.getOperand(1), 05000 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 05001 // If the desired elements are smaller or larger than the source 05002 // elements we can use a matching integer vector type and then 05003 // truncate/sign extend 05004 else { 05005 EVT MatchingElementType = 05006 EVT::getIntegerVT(*DAG.getContext(), 05007 N0VT.getScalarType().getSizeInBits()); 05008 EVT MatchingVectorType = 05009 EVT::getVectorVT(*DAG.getContext(), MatchingElementType, 05010 N0VT.getVectorNumElements()); 05011 SDValue VsetCC = 05012 DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), 05013 N0.getOperand(1), 05014 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 05015 return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT); 05016 } 05017 } 05018 05019 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc 05020 SDValue SCC = 05021 SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1), 05022 DAG.getConstant(1, VT), DAG.getConstant(0, VT), 05023 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); 05024 if (SCC.getNode()) 05025 return SCC; 05026 } 05027 05028 return SDValue(); 05029 } 05030 05031 /// GetDemandedBits - See if the specified operand can be simplified with the 05032 /// knowledge that only the bits specified by Mask are used. If so, return the 05033 /// simpler operand, otherwise return a null SDValue. 05034 SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { 05035 switch (V.getOpcode()) { 05036 default: break; 05037 case ISD::Constant: { 05038 const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode()); 05039 assert(CV != 0 && "Const value should be ConstSDNode."); 05040 const APInt &CVal = CV->getAPIntValue(); 05041 APInt NewVal = CVal & Mask; 05042 if (NewVal != CVal) { 05043 return DAG.getConstant(NewVal, V.getValueType()); 05044 } 05045 break; 05046 } 05047 case ISD::OR: 05048 case ISD::XOR: 05049 // If the LHS or RHS don't contribute bits to the or, drop them. 05050 if (DAG.MaskedValueIsZero(V.getOperand(0), Mask)) 05051 return V.getOperand(1); 05052 if (DAG.MaskedValueIsZero(V.getOperand(1), Mask)) 05053 return V.getOperand(0); 05054 break; 05055 case ISD::SRL: 05056 // Only look at single-use SRLs. 05057 if (!V.getNode()->hasOneUse()) 05058 break; 05059 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) { 05060 // See if we can recursively simplify the LHS. 05061 unsigned Amt = RHSC->getZExtValue(); 05062 05063 // Watch out for shift count overflow though. 05064 if (Amt >= Mask.getBitWidth()) break; 05065 APInt NewMask = Mask << Amt; 05066 SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask); 05067 if (SimplifyLHS.getNode()) 05068 return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(), 05069 SimplifyLHS, V.getOperand(1)); 05070 } 05071 } 05072 return SDValue(); 05073 } 05074 05075 /// ReduceLoadWidth - If the result of a wider load is shifted to right of N 05076 /// bits and then truncated to a narrower type and where N is a multiple 05077 /// of number of bits of the narrower type, transform it to a narrower load 05078 /// from address + N / num of bits of new type. If the result is to be 05079 /// extended, also fold the extension to form a extending load. 05080 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { 05081 unsigned Opc = N->getOpcode(); 05082 05083 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; 05084 SDValue N0 = N->getOperand(0); 05085 EVT VT = N->getValueType(0); 05086 EVT ExtVT = VT; 05087 05088 // This transformation isn't valid for vector loads. 05089 if (VT.isVector()) 05090 return SDValue(); 05091 05092 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then 05093 // extended to VT. 05094 if (Opc == ISD::SIGN_EXTEND_INREG) { 05095 ExtType = ISD::SEXTLOAD; 05096 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT(); 05097 } else if (Opc == ISD::SRL) { 05098 // Another special-case: SRL is basically zero-extending a narrower value. 05099 ExtType = ISD::ZEXTLOAD; 05100 N0 = SDValue(N, 0); 05101 ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 05102 if (!N01) return SDValue(); 05103 ExtVT = EVT::getIntegerVT(*DAG.getContext(), 05104 VT.getSizeInBits() - N01->getZExtValue()); 05105 } 05106 if (LegalOperations && !TLI.isLoadExtLegal(ExtType, ExtVT)) 05107 return SDValue(); 05108 05109 unsigned EVTBits = ExtVT.getSizeInBits(); 05110 05111 // Do not generate loads of non-round integer types since these can 05112 // be expensive (and would be wrong if the type is not byte sized). 05113 if (!ExtVT.isRound()) 05114 return SDValue(); 05115 05116 unsigned ShAmt = 0; 05117 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { 05118 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 05119 ShAmt = N01->getZExtValue(); 05120 // Is the shift amount a multiple of size of VT? 05121 if ((ShAmt & (EVTBits-1)) == 0) { 05122 N0 = N0.getOperand(0); 05123 // Is the load width a multiple of size of VT? 05124 if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0) 05125 return SDValue(); 05126 } 05127 05128 // At this point, we must have a load or else we can't do the transform. 05129 if (!isa<LoadSDNode>(N0)) return SDValue(); 05130 05131 // Because a SRL must be assumed to *need* to zero-extend the high bits 05132 // (as opposed to anyext the high bits), we can't combine the zextload 05133 // lowering of SRL and an sextload. 05134 if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD) 05135 return SDValue(); 05136 05137 // If the shift amount is larger than the input type then we're not 05138 // accessing any of the loaded bytes. If the load was a zextload/extload 05139 // then the result of the shift+trunc is zero/undef (handled elsewhere). 05140 if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits()) 05141 return SDValue(); 05142 } 05143 } 05144 05145 // If the load is shifted left (and the result isn't shifted back right), 05146 // we can fold the truncate through the shift. 05147 unsigned ShLeftAmt = 0; 05148 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() && 05149 ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) { 05150 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 05151 ShLeftAmt = N01->getZExtValue(); 05152 N0 = N0.getOperand(0); 05153 } 05154 } 05155 05156 // If we haven't found a load, we can't narrow it. Don't transform one with 05157 // multiple uses, this would require adding a new load. 05158 if (!isa<LoadSDNode>(N0) || !N0.hasOneUse()) 05159 return SDValue(); 05160 05161 // Don't change the width of a volatile load. 05162 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 05163 if (LN0->isVolatile()) 05164 return SDValue(); 05165 05166 // Verify that we are actually reducing a load width here. 05167 if (LN0->getMemoryVT().getSizeInBits() < EVTBits) 05168 return SDValue(); 05169 05170 // For the transform to be legal, the load must produce only two values 05171 // (the value loaded and the chain). Don't transform a pre-increment 05172 // load, for example, which produces an extra value. Otherwise the 05173 // transformation is not equivalent, and the downstream logic to replace 05174 // uses gets things wrong. 05175 if (LN0->getNumValues() > 2) 05176 return SDValue(); 05177 05178 EVT PtrType = N0.getOperand(1).getValueType(); 05179 05180 if (PtrType == MVT::Untyped || PtrType.isExtended()) 05181 // It's not possible to generate a constant of extended or untyped type. 05182 return SDValue(); 05183 05184 // For big endian targets, we need to adjust the offset to the pointer to 05185 // load the correct bytes. 05186 if (TLI.isBigEndian()) { 05187 unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits(); 05188 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits(); 05189 ShAmt = LVTStoreBits - EVTStoreBits - ShAmt; 05190 } 05191 05192 uint64_t PtrOff = ShAmt / 8; 05193 unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); 05194 SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0), 05195 PtrType, LN0->getBasePtr(), 05196 DAG.getConstant(PtrOff, PtrType)); 05197 AddToWorkList(NewPtr.getNode()); 05198 05199 SDValue Load; 05200 if (ExtType == ISD::NON_EXTLOAD) 05201 Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr, 05202 LN0->getPointerInfo().getWithOffset(PtrOff), 05203 LN0->isVolatile(), LN0->isNonTemporal(), 05204 LN0->isInvariant(), NewAlign); 05205 else 05206 Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(),NewPtr, 05207 LN0->getPointerInfo().getWithOffset(PtrOff), 05208 ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), 05209 NewAlign); 05210 05211 // Replace the old load's chain with the new load's chain. 05212 WorkListRemover DeadNodes(*this); 05213 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); 05214 05215 // Shift the result left, if we've swallowed a left shift. 05216 SDValue Result = Load; 05217 if (ShLeftAmt != 0) { 05218 EVT ShImmTy = getShiftAmountTy(Result.getValueType()); 05219 if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt)) 05220 ShImmTy = VT; 05221 // If the shift amount is as large as the result size (but, presumably, 05222 // no larger than the source) then the useful bits of the result are 05223 // zero; we can't simply return the shortened shift, because the result 05224 // of that operation is undefined. 05225 if (ShLeftAmt >= VT.getSizeInBits()) 05226 Result = DAG.getConstant(0, VT); 05227 else 05228 Result = DAG.getNode(ISD::SHL, SDLoc(N0), VT, 05229 Result, DAG.getConstant(ShLeftAmt, ShImmTy)); 05230 } 05231 05232 // Return the new loaded value. 05233 return Result; 05234 } 05235 05236 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { 05237 SDValue N0 = N->getOperand(0); 05238 SDValue N1 = N->getOperand(1); 05239 EVT VT = N->getValueType(0); 05240 EVT EVT = cast<VTSDNode>(N1)->getVT(); 05241 unsigned VTBits = VT.getScalarType().getSizeInBits(); 05242 unsigned EVTBits = EVT.getScalarType().getSizeInBits(); 05243 05244 // fold (sext_in_reg c1) -> c1 05245 if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF) 05246 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1); 05247 05248 // If the input is already sign extended, just drop the extension. 05249 if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1) 05250 return N0; 05251 05252 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 05253 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && 05254 EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) { 05255 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, 05256 N0.getOperand(0), N1); 05257 } 05258 05259 // fold (sext_in_reg (sext x)) -> (sext x) 05260 // fold (sext_in_reg (aext x)) -> (sext x) 05261 // if x is small enough. 05262 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { 05263 SDValue N00 = N0.getOperand(0); 05264 if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits && 05265 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) 05266 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1); 05267 } 05268 05269 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero. 05270 if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits))) 05271 return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT); 05272 05273 // fold operands of sext_in_reg based on knowledge that the top bits are not 05274 // demanded. 05275 if (SimplifyDemandedBits(SDValue(N, 0))) 05276 return SDValue(N, 0); 05277 05278 // fold (sext_in_reg (load x)) -> (smaller sextload x) 05279 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits)) 05280 SDValue NarrowLoad = ReduceLoadWidth(N); 05281 if (NarrowLoad.getNode()) 05282 return NarrowLoad; 05283 05284 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24) 05285 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible. 05286 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above. 05287 if (N0.getOpcode() == ISD::SRL) { 05288 if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1))) 05289 if (ShAmt->getZExtValue()+EVTBits <= VTBits) { 05290 // We can turn this into an SRA iff the input to the SRL is already sign 05291 // extended enough. 05292 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0)); 05293 if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits) 05294 return DAG.getNode(ISD::SRA, SDLoc(N), VT, 05295 N0.getOperand(0), N0.getOperand(1)); 05296 } 05297 } 05298 05299 // fold (sext_inreg (extload x)) -> (sextload x) 05300 if (ISD::isEXTLoad(N0.getNode()) && 05301 ISD::isUNINDEXEDLoad(N0.getNode()) && 05302 EVT == cast<LoadSDNode>(N0)->getMemoryVT() && 05303 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 05304 TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { 05305 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 05306 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, 05307 LN0->getChain(), 05308 LN0->getBasePtr(), LN0->getPointerInfo(), 05309 EVT, 05310 LN0->isVolatile(), LN0->isNonTemporal(), 05311 LN0->getAlignment()); 05312 CombineTo(N, ExtLoad); 05313 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 05314 AddToWorkList(ExtLoad.getNode()); 05315 return SDValue(N, 0); // Return N so it doesn't get rechecked! 05316 } 05317 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use 05318 if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 05319 N0.hasOneUse() && 05320 EVT == cast<LoadSDNode>(N0)->getMemoryVT() && 05321 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 05322 TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { 05323 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 05324 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, 05325 LN0->getChain(), 05326 LN0->getBasePtr(), LN0->getPointerInfo(), 05327 EVT, 05328 LN0->isVolatile(), LN0->isNonTemporal(), 05329 LN0->getAlignment()); 05330 CombineTo(N, ExtLoad); 05331 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 05332 return SDValue(N, 0); // Return N so it doesn't get rechecked! 05333 } 05334 05335 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16)) 05336 if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) { 05337 SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), 05338 N0.getOperand(1), false); 05339 if (BSwap.getNode() != 0) 05340 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, 05341 BSwap, N1); 05342 } 05343 05344 return SDValue(); 05345 } 05346 05347 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { 05348 SDValue N0 = N->getOperand(0); 05349 EVT VT = N->getValueType(0); 05350 bool isLE = TLI.isLittleEndian(); 05351 05352 // noop truncate 05353 if (N0.getValueType() == N->getValueType(0)) 05354 return N0; 05355 // fold (truncate c1) -> c1 05356 if (isa<ConstantSDNode>(N0)) 05357 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0); 05358 // fold (truncate (truncate x)) -> (truncate x) 05359 if (N0.getOpcode() == ISD::TRUNCATE) 05360 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); 05361 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x 05362 if (N0.getOpcode() == ISD::ZERO_EXTEND || 05363 N0.getOpcode() == ISD::SIGN_EXTEND || 05364 N0.getOpcode() == ISD::ANY_EXTEND) { 05365 if (N0.getOperand(0).getValueType().bitsLT(VT)) 05366 // if the source is smaller than the dest, we still need an extend 05367 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, 05368 N0.getOperand(0)); 05369 if (N0.getOperand(0).getValueType().bitsGT(VT)) 05370 // if the source is larger than the dest, than we just need the truncate 05371 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); 05372 // if the source and dest are the same type, we can drop both the extend 05373 // and the truncate. 05374 return N0.getOperand(0); 05375 } 05376 05377 // Fold extract-and-trunc into a narrow extract. For example: 05378 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1) 05379 // i32 y = TRUNCATE(i64 x) 05380 // -- becomes -- 05381 // v16i8 b = BITCAST (v2i64 val) 05382 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8) 05383 // 05384 // Note: We only run this optimization after type legalization (which often 05385 // creates this pattern) and before operation legalization after which 05386 // we need to be more careful about the vector instructions that we generate. 05387 if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 05388 LegalTypes && !LegalOperations && N0->hasOneUse()) { 05389 05390 EVT VecTy = N0.getOperand(0).getValueType(); 05391 EVT ExTy = N0.getValueType(); 05392 EVT TrTy = N->getValueType(0); 05393 05394 unsigned NumElem = VecTy.getVectorNumElements(); 05395 unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits(); 05396 05397 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem); 05398 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size"); 05399 05400 SDValue EltNo = N0->getOperand(1); 05401 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) { 05402 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 05403 EVT IndexTy = N0->getOperand(1).getValueType(); 05404 int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1)); 05405 05406 SDValue V = DAG.getNode(ISD::BITCAST, SDLoc(N), 05407 NVT, N0.getOperand(0)); 05408 05409 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 05410 SDLoc(N), TrTy, V, 05411 DAG.getConstant(Index, IndexTy)); 05412 } 05413 } 05414 05415 // Fold a series of buildvector, bitcast, and truncate if possible. 05416 // For example fold 05417 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to 05418 // (2xi32 (buildvector x, y)). 05419 if (Level == AfterLegalizeVectorOps && VT.isVector() && 05420 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() && 05421 N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR && 05422 N0.getOperand(0).hasOneUse()) { 05423 05424 SDValue BuildVect = N0.getOperand(0); 05425 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType(); 05426 EVT TruncVecEltTy = VT.getVectorElementType(); 05427 05428 // Check that the element types match. 05429 if (BuildVectEltTy == TruncVecEltTy) { 05430 // Now we only need to compute the offset of the truncated elements. 05431 unsigned BuildVecNumElts = BuildVect.getNumOperands(); 05432 unsigned TruncVecNumElts = VT.getVectorNumElements(); 05433 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts; 05434 05435 assert((BuildVecNumElts % TruncVecNumElts) == 0 && 05436 "Invalid number of elements"); 05437 05438 SmallVector<SDValue, 8> Opnds; 05439 for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset) 05440 Opnds.push_back(BuildVect.getOperand(i)); 05441 05442 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, &Opnds[0], 05443 Opnds.size()); 05444 } 05445 } 05446 05447 // See if we can simplify the input to this truncate through knowledge that 05448 // only the low bits are being used. 05449 // For example "trunc (or (shl x, 8), y)" // -> trunc y 05450 // Currently we only perform this optimization on scalars because vectors 05451 // may have different active low bits. 05452 if (!VT.isVector()) { 05453 SDValue Shorter = 05454 GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(), 05455 VT.getSizeInBits())); 05456 if (Shorter.getNode()) 05457 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter); 05458 } 05459 // fold (truncate (load x)) -> (smaller load x) 05460 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) 05461 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) { 05462 SDValue Reduced = ReduceLoadWidth(N); 05463 if (Reduced.getNode()) 05464 return Reduced; 05465 } 05466 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)), 05467 // where ... are all 'undef'. 05468 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) { 05469 SmallVector<EVT, 8> VTs; 05470 SDValue V; 05471 unsigned Idx = 0; 05472 unsigned NumDefs = 0; 05473 05474 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) { 05475 SDValue X = N0.getOperand(i); 05476 if (X.getOpcode() != ISD::UNDEF) { 05477 V = X; 05478 Idx = i; 05479 NumDefs++; 05480 } 05481 // Stop if more than one members are non-undef. 05482 if (NumDefs > 1) 05483 break; 05484 VTs.push_back(EVT::getVectorVT(*DAG.getContext(), 05485 VT.getVectorElementType(), 05486 X.getValueType().getVectorNumElements())); 05487 } 05488 05489 if (NumDefs == 0) 05490 return DAG.getUNDEF(VT); 05491 05492 if (NumDefs == 1) { 05493 assert(V.getNode() && "The single defined operand is empty!"); 05494 SmallVector<SDValue, 8> Opnds; 05495 for (unsigned i = 0, e = VTs.size(); i != e; ++i) { 05496 if (i != Idx) { 05497 Opnds.push_back(DAG.getUNDEF(VTs[i])); 05498 continue; 05499 } 05500 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V); 05501 AddToWorkList(NV.getNode()); 05502 Opnds.push_back(NV); 05503 } 05504 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, 05505 &Opnds[0], Opnds.size()); 05506 } 05507 } 05508 05509 // Simplify the operands using demanded-bits information. 05510 if (!VT.isVector() && 05511 SimplifyDemandedBits(SDValue(N, 0))) 05512 return SDValue(N, 0); 05513 05514 return SDValue(); 05515 } 05516 05517 static SDNode *getBuildPairElt(SDNode *N, unsigned i) { 05518 SDValue Elt = N->getOperand(i); 05519 if (Elt.getOpcode() != ISD::MERGE_VALUES) 05520 return Elt.getNode(); 05521 return Elt.getOperand(Elt.getResNo()).getNode(); 05522 } 05523 05524 /// CombineConsecutiveLoads - build_pair (load, load) -> load 05525 /// if load locations are consecutive. 05526 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { 05527 assert(N->getOpcode() == ISD::BUILD_PAIR); 05528 05529 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0)); 05530 LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1)); 05531 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() || 05532 LD1->getPointerInfo().getAddrSpace() != 05533 LD2->getPointerInfo().getAddrSpace()) 05534 return SDValue(); 05535 EVT LD1VT = LD1->getValueType(0); 05536 05537 if (ISD::isNON_EXTLoad(LD2) && 05538 LD2->hasOneUse() && 05539 // If both are volatile this would reduce the number of volatile loads. 05540 // If one is volatile it might be ok, but play conservative and bail out. 05541 !LD1->isVolatile() && 05542 !LD2->isVolatile() && 05543 DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) { 05544 unsigned Align = LD1->getAlignment(); 05545 unsigned NewAlign = TLI.getDataLayout()-> 05546 getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); 05547 05548 if (NewAlign <= Align && 05549 (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) 05550 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), 05551 LD1->getBasePtr(), LD1->getPointerInfo(), 05552 false, false, false, Align); 05553 } 05554 05555 return SDValue(); 05556 } 05557 05558 SDValue DAGCombiner::visitBITCAST(SDNode *N) { 05559 SDValue N0 = N->getOperand(0); 05560 EVT VT = N->getValueType(0); 05561 05562 // If the input is a BUILD_VECTOR with all constant elements, fold this now. 05563 // Only do this before legalize, since afterward the target may be depending 05564 // on the bitconvert. 05565 // First check to see if this is all constant. 05566 if (!LegalTypes && 05567 N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() && 05568 VT.isVector()) { 05569 bool isSimple = true; 05570 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) 05571 if (N0.getOperand(i).getOpcode() != ISD::UNDEF && 05572 N0.getOperand(i).getOpcode() != ISD::Constant && 05573 N0.getOperand(i).getOpcode() != ISD::ConstantFP) { 05574 isSimple = false; 05575 break; 05576 } 05577 05578 EVT DestEltVT = N->getValueType(0).getVectorElementType(); 05579 assert(!DestEltVT.isVector() && 05580 "Element type of vector ValueType must not be vector!"); 05581 if (isSimple) 05582 return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT); 05583 } 05584 05585 // If the input is a constant, let getNode fold it. 05586 if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) { 05587 SDValue Res = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0); 05588 if (Res.getNode() != N) { 05589 if (!LegalOperations || 05590 TLI.isOperationLegal(Res.getNode()->getOpcode(), VT)) 05591 return Res; 05592 05593 // Folding it resulted in an illegal node, and it's too late to 05594 // do that. Clean up the old node and forego the transformation. 05595 // Ideally this won't happen very often, because instcombine 05596 // and the earlier dagcombine runs (where illegal nodes are 05597 // permitted) should have folded most of them already. 05598 DAG.DeleteNode(Res.getNode()); 05599 } 05600 } 05601 05602 // (conv (conv x, t1), t2) -> (conv x, t2) 05603 if (N0.getOpcode() == ISD::BITCAST) 05604 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, 05605 N0.getOperand(0)); 05606 05607 // fold (conv (load x)) -> (load (conv*)x) 05608 // If the resultant load doesn't need a higher alignment than the original! 05609 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && 05610 // Do not change the width of a volatile load. 05611 !cast<LoadSDNode>(N0)->isVolatile() && 05612 (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) { 05613 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 05614 unsigned Align = TLI.getDataLayout()-> 05615 getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); 05616 unsigned OrigAlign = LN0->getAlignment(); 05617 05618 if (Align <= OrigAlign) { 05619 SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), 05620 LN0->getBasePtr(), LN0->getPointerInfo(), 05621 LN0->isVolatile(), LN0->isNonTemporal(), 05622 LN0->isInvariant(), OrigAlign); 05623 AddToWorkList(N); 05624 CombineTo(N0.getNode(), 05625 DAG.getNode(ISD::BITCAST, SDLoc(N0), 05626 N0.getValueType(), Load), 05627 Load.getValue(1)); 05628 return Load; 05629 } 05630 } 05631 05632 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 05633 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 05634 // This often reduces constant pool loads. 05635 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(VT)) || 05636 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(VT))) && 05637 N0.getNode()->hasOneUse() && VT.isInteger() && 05638 !VT.isVector() && !N0.getValueType().isVector()) { 05639 SDValue NewConv = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT, 05640 N0.getOperand(0)); 05641 AddToWorkList(NewConv.getNode()); 05642 05643 APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); 05644 if (N0.getOpcode() == ISD::FNEG) 05645 return DAG.getNode(ISD::XOR, SDLoc(N), VT, 05646 NewConv, DAG.getConstant(SignBit, VT)); 05647 assert(N0.getOpcode() == ISD::FABS); 05648 return DAG.getNode(ISD::AND, SDLoc(N), VT, 05649 NewConv, DAG.getConstant(~SignBit, VT)); 05650 } 05651 05652 // fold (bitconvert (fcopysign cst, x)) -> 05653 // (or (and (bitconvert x), sign), (and cst, (not sign))) 05654 // Note that we don't handle (copysign x, cst) because this can always be 05655 // folded to an fneg or fabs. 05656 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() && 05657 isa<ConstantFPSDNode>(N0.getOperand(0)) && 05658 VT.isInteger() && !VT.isVector()) { 05659 unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits(); 05660 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth); 05661 if (isTypeLegal(IntXVT)) { 05662 SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0), 05663 IntXVT, N0.getOperand(1)); 05664 AddToWorkList(X.getNode()); 05665 05666 // If X has a different width than the result/lhs, sext it or truncate it. 05667 unsigned VTWidth = VT.getSizeInBits(); 05668 if (OrigXWidth < VTWidth) { 05669 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X); 05670 AddToWorkList(X.getNode()); 05671 } else if (OrigXWidth > VTWidth) { 05672 // To get the sign bit in the right place, we have to shift it right 05673 // before truncating. 05674 X = DAG.getNode(ISD::SRL, SDLoc(X), 05675 X.getValueType(), X, 05676 DAG.getConstant(OrigXWidth-VTWidth, X.getValueType())); 05677 AddToWorkList(X.getNode()); 05678 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); 05679 AddToWorkList(X.getNode()); 05680 } 05681 05682 APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); 05683 X = DAG.getNode(ISD::AND, SDLoc(X), VT, 05684 X, DAG.getConstant(SignBit, VT)); 05685 AddToWorkList(X.getNode()); 05686 05687 SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0), 05688 VT, N0.getOperand(0)); 05689 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT, 05690 Cst, DAG.getConstant(~SignBit, VT)); 05691 AddToWorkList(Cst.getNode()); 05692 05693 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst); 05694 } 05695 } 05696 05697 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive. 05698 if (N0.getOpcode() == ISD::BUILD_PAIR) { 05699 SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT); 05700 if (CombineLD.getNode()) 05701 return CombineLD; 05702 } 05703 05704 return SDValue(); 05705 } 05706 05707 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) { 05708 EVT VT = N->getValueType(0); 05709 return CombineConsecutiveLoads(N, VT); 05710 } 05711 05712 /// ConstantFoldBITCASTofBUILD_VECTOR - We know that BV is a build_vector 05713 /// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the 05714 /// destination element value type. 05715 SDValue DAGCombiner:: 05716 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { 05717 EVT SrcEltVT = BV->getValueType(0).getVectorElementType(); 05718 05719 // If this is already the right type, we're done. 05720 if (SrcEltVT == DstEltVT) return SDValue(BV, 0); 05721 05722 unsigned SrcBitSize = SrcEltVT.getSizeInBits(); 05723 unsigned DstBitSize = DstEltVT.getSizeInBits(); 05724 05725 // If this is a conversion of N elements of one type to N elements of another 05726 // type, convert each element. This handles FP<->INT cases. 05727 if (SrcBitSize == DstBitSize) { 05728 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, 05729 BV->getValueType(0).getVectorNumElements()); 05730 05731 // Due to the FP element handling below calling this routine recursively, 05732 // we can end up with a scalar-to-vector node here. 05733 if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR) 05734 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT, 05735 DAG.getNode(ISD::BITCAST, SDLoc(BV), 05736 DstEltVT, BV->getOperand(0))); 05737 05738 SmallVector<SDValue, 8> Ops; 05739 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { 05740 SDValue Op = BV->getOperand(i); 05741 // If the vector element type is not legal, the BUILD_VECTOR operands 05742 // are promoted and implicitly truncated. Make that explicit here. 05743 if (Op.getValueType() != SrcEltVT) 05744 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op); 05745 Ops.push_back(DAG.getNode(ISD::BITCAST, SDLoc(BV), 05746 DstEltVT, Op)); 05747 AddToWorkList(Ops.back().getNode()); 05748 } 05749 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, 05750 &Ops[0], Ops.size()); 05751 } 05752 05753 // Otherwise, we're growing or shrinking the elements. To avoid having to 05754 // handle annoying details of growing/shrinking FP values, we convert them to 05755 // int first. 05756 if (SrcEltVT.isFloatingPoint()) { 05757 // Convert the input float vector to a int vector where the elements are the 05758 // same sizes. 05759 assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!"); 05760 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits()); 05761 BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode(); 05762 SrcEltVT = IntVT; 05763 } 05764 05765 // Now we know the input is an integer vector. If the output is a FP type, 05766 // convert to integer first, then to FP of the right size. 05767 if (DstEltVT.isFloatingPoint()) { 05768 assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!"); 05769 EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits()); 05770 SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode(); 05771 05772 // Next, convert to FP elements of the same size. 05773 return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT); 05774 } 05775 05776 // Okay, we know the src/dst types are both integers of differing types. 05777 // Handling growing first. 05778 assert(SrcEltVT.isInteger() && DstEltVT.isInteger()); 05779 if (SrcBitSize < DstBitSize) { 05780 unsigned NumInputsPerOutput = DstBitSize/SrcBitSize; 05781 05782 SmallVector<SDValue, 8> Ops; 05783 for (unsigned i = 0, e = BV->getNumOperands(); i != e; 05784 i += NumInputsPerOutput) { 05785 bool isLE = TLI.isLittleEndian(); 05786 APInt NewBits = APInt(DstBitSize, 0); 05787 bool EltIsUndef = true; 05788 for (unsigned j = 0; j != NumInputsPerOutput; ++j) { 05789 // Shift the previously computed bits over. 05790 NewBits <<= SrcBitSize; 05791 SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j)); 05792 if (Op.getOpcode() == ISD::UNDEF) continue; 05793 EltIsUndef = false; 05794 05795 NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue(). 05796 zextOrTrunc(SrcBitSize).zext(DstBitSize); 05797 } 05798 05799 if (EltIsUndef) 05800 Ops.push_back(DAG.getUNDEF(DstEltVT)); 05801 else 05802 Ops.push_back(DAG.getConstant(NewBits, DstEltVT)); 05803 } 05804 05805 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); 05806 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, 05807 &Ops[0], Ops.size()); 05808 } 05809 05810 // Finally, this must be the case where we are shrinking elements: each input 05811 // turns into multiple outputs. 05812 bool isS2V = ISD::isScalarToVector(BV); 05813 unsigned NumOutputsPerInput = SrcBitSize/DstBitSize; 05814 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, 05815 NumOutputsPerInput*BV->getNumOperands()); 05816 SmallVector<SDValue, 8> Ops; 05817 05818 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { 05819 if (BV->getOperand(i).getOpcode() == ISD::UNDEF) { 05820 for (unsigned j = 0; j != NumOutputsPerInput; ++j) 05821 Ops.push_back(DAG.getUNDEF(DstEltVT)); 05822 continue; 05823 } 05824 05825 APInt OpVal = cast<ConstantSDNode>(BV->getOperand(i))-> 05826 getAPIntValue().zextOrTrunc(SrcBitSize); 05827 05828 for (unsigned j = 0; j != NumOutputsPerInput; ++j) { 05829 APInt ThisVal = OpVal.trunc(DstBitSize); 05830 Ops.push_back(DAG.getConstant(ThisVal, DstEltVT)); 05831 if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal) 05832 // Simply turn this into a SCALAR_TO_VECTOR of the new type. 05833 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT, 05834 Ops[0]); 05835 OpVal = OpVal.lshr(DstBitSize); 05836 } 05837 05838 // For big endian targets, swap the order of the pieces of each element. 05839 if (TLI.isBigEndian()) 05840 std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); 05841 } 05842 05843 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, 05844 &Ops[0], Ops.size()); 05845 } 05846 05847 SDValue DAGCombiner::visitFADD(SDNode *N) { 05848 SDValue N0 = N->getOperand(0); 05849 SDValue N1 = N->getOperand(1); 05850 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 05851 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 05852 EVT VT = N->getValueType(0); 05853 05854 // fold vector ops 05855 if (VT.isVector()) { 05856 SDValue FoldedVOp = SimplifyVBinOp(N); 05857 if (FoldedVOp.getNode()) return FoldedVOp; 05858 } 05859 05860 // fold (fadd c1, c2) -> c1 + c2 05861 if (N0CFP && N1CFP) 05862 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N1); 05863 // canonicalize constant to RHS 05864 if (N0CFP && !N1CFP) 05865 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N0); 05866 // fold (fadd A, 0) -> A 05867 if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && 05868 N1CFP->getValueAPF().isZero()) 05869 return N0; 05870 // fold (fadd A, (fneg B)) -> (fsub A, B) 05871 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && 05872 isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2) 05873 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, 05874 GetNegatedExpression(N1, DAG, LegalOperations)); 05875 // fold (fadd (fneg A), B) -> (fsub B, A) 05876 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && 05877 isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2) 05878 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N1, 05879 GetNegatedExpression(N0, DAG, LegalOperations)); 05880 05881 // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) 05882 if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && 05883 N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && 05884 isa<ConstantFPSDNode>(N0.getOperand(1))) 05885 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0.getOperand(0), 05886 DAG.getNode(ISD::FADD, SDLoc(N), VT, 05887 N0.getOperand(1), N1)); 05888 05889 // No FP constant should be created after legalization as Instruction 05890 // Selection pass has hard time in dealing with FP constant. 05891 // 05892 // We don't need test this condition for transformation like following, as 05893 // the DAG being transformed implies it is legal to take FP constant as 05894 // operand. 05895 // 05896 // (fadd (fmul c, x), x) -> (fmul c+1, x) 05897 // 05898 bool AllowNewFpConst = (Level < AfterLegalizeDAG); 05899 05900 // If allow, fold (fadd (fneg x), x) -> 0.0 05901 if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath && 05902 N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) { 05903 return DAG.getConstantFP(0.0, VT); 05904 } 05905 05906 // If allow, fold (fadd x, (fneg x)) -> 0.0 05907 if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath && 05908 N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) { 05909 return DAG.getConstantFP(0.0, VT); 05910 } 05911 05912 // In unsafe math mode, we can fold chains of FADD's of the same value 05913 // into multiplications. This transform is not safe in general because 05914 // we are reducing the number of rounding steps. 05915 if (DAG.getTarget().Options.UnsafeFPMath && 05916 TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && 05917 !N0CFP && !N1CFP) { 05918 if (N0.getOpcode() == ISD::FMUL) { 05919 ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); 05920 ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); 05921 05922 // (fadd (fmul c, x), x) -> (fmul c+1, x) 05923 if (CFP00 && !CFP01 && N0.getOperand(1) == N1) { 05924 SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, 05925 SDValue(CFP00, 0), 05926 DAG.getConstantFP(1.0, VT)); 05927 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 05928 N1, NewCFP); 05929 } 05930 05931 // (fadd (fmul x, c), x) -> (fmul c+1, x) 05932 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { 05933 SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, 05934 SDValue(CFP01, 0), 05935 DAG.getConstantFP(1.0, VT)); 05936 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 05937 N1, NewCFP); 05938 } 05939 05940 // (fadd (fmul c, x), (fadd x, x)) -> (fmul c+2, x) 05941 if (CFP00 && !CFP01 && N1.getOpcode() == ISD::FADD && 05942 N1.getOperand(0) == N1.getOperand(1) && 05943 N0.getOperand(1) == N1.getOperand(0)) { 05944 SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, 05945 SDValue(CFP00, 0), 05946 DAG.getConstantFP(2.0, VT)); 05947 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 05948 N0.getOperand(1), NewCFP); 05949 } 05950 05951 // (fadd (fmul x, c), (fadd x, x)) -> (fmul c+2, x) 05952 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD && 05953 N1.getOperand(0) == N1.getOperand(1) && 05954 N0.getOperand(0) == N1.getOperand(0)) { 05955 SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, 05956 SDValue(CFP01, 0), 05957 DAG.getConstantFP(2.0, VT)); 05958 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 05959 N0.getOperand(0), NewCFP); 05960 } 05961 } 05962 05963 if (N1.getOpcode() == ISD::FMUL) { 05964 ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); 05965 ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1)); 05966 05967 // (fadd x, (fmul c, x)) -> (fmul c+1, x) 05968 if (CFP10 && !CFP11 && N1.getOperand(1) == N0) { 05969 SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, 05970 SDValue(CFP10, 0), 05971 DAG.getConstantFP(1.0, VT)); 05972 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 05973 N0, NewCFP); 05974 } 05975 05976 // (fadd x, (fmul x, c)) -> (fmul c+1, x) 05977 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { 05978 SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, 05979 SDValue(CFP11, 0), 05980 DAG.getConstantFP(1.0, VT)); 05981 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 05982 N0, NewCFP); 05983 } 05984 05985 05986 // (fadd (fadd x, x), (fmul c, x)) -> (fmul c+2, x) 05987 if (CFP10 && !CFP11 && N1.getOpcode() == ISD::FADD && 05988 N1.getOperand(0) == N1.getOperand(1) && 05989 N0.getOperand(1) == N1.getOperand(0)) { 05990 SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, 05991 SDValue(CFP10, 0), 05992 DAG.getConstantFP(2.0, VT)); 05993 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 05994 N0.getOperand(1), NewCFP); 05995 } 05996 05997 // (fadd (fadd x, x), (fmul x, c)) -> (fmul c+2, x) 05998 if (CFP11 && !CFP10 && N1.getOpcode() == ISD::FADD && 05999 N1.getOperand(0) == N1.getOperand(1) && 06000 N0.getOperand(0) == N1.getOperand(0)) { 06001 SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, 06002 SDValue(CFP11, 0), 06003 DAG.getConstantFP(2.0, VT)); 06004 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 06005 N0.getOperand(0), NewCFP); 06006 } 06007 } 06008 06009 if (N0.getOpcode() == ISD::FADD && AllowNewFpConst) { 06010 ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); 06011 // (fadd (fadd x, x), x) -> (fmul 3.0, x) 06012 if (!CFP && N0.getOperand(0) == N0.getOperand(1) && 06013 (N0.getOperand(0) == N1)) { 06014 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 06015 N1, DAG.getConstantFP(3.0, VT)); 06016 } 06017 } 06018 06019 if (N1.getOpcode() == ISD::FADD && AllowNewFpConst) { 06020 ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); 06021 // (fadd x, (fadd x, x)) -> (fmul 3.0, x) 06022 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && 06023 N1.getOperand(0) == N0) { 06024 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 06025 N0, DAG.getConstantFP(3.0, VT)); 06026 } 06027 } 06028 06029 // (fadd (fadd x, x), (fadd x, x)) -> (fmul 4.0, x) 06030 if (AllowNewFpConst && 06031 N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && 06032 N0.getOperand(0) == N0.getOperand(1) && 06033 N1.getOperand(0) == N1.getOperand(1) && 06034 N0.getOperand(0) == N1.getOperand(0)) { 06035 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 06036 N0.getOperand(0), 06037 DAG.getConstantFP(4.0, VT)); 06038 } 06039 } 06040 06041 // FADD -> FMA combines: 06042 if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || 06043 DAG.getTarget().Options.UnsafeFPMath) && 06044 DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) && 06045 TLI.isOperationLegalOrCustom(ISD::FMA, VT)) { 06046 06047 // fold (fadd (fmul x, y), z) -> (fma x, y, z) 06048 if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) { 06049 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 06050 N0.getOperand(0), N0.getOperand(1), N1); 06051 } 06052 06053 // fold (fadd x, (fmul y, z)) -> (fma y, z, x) 06054 // Note: Commutes FADD operands. 06055 if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) { 06056 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 06057 N1.getOperand(0), N1.getOperand(1), N0); 06058 } 06059 } 06060 06061 return SDValue(); 06062 } 06063 06064 SDValue DAGCombiner::visitFSUB(SDNode *N) { 06065 SDValue N0 = N->getOperand(0); 06066 SDValue N1 = N->getOperand(1); 06067 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 06068 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 06069 EVT VT = N->getValueType(0); 06070 SDLoc dl(N); 06071 06072 // fold vector ops 06073 if (VT.isVector()) { 06074 SDValue FoldedVOp = SimplifyVBinOp(N); 06075 if (FoldedVOp.getNode()) return FoldedVOp; 06076 } 06077 06078 // fold (fsub c1, c2) -> c1-c2 06079 if (N0CFP && N1CFP) 06080 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, N1); 06081 // fold (fsub A, 0) -> A 06082 if (DAG.getTarget().Options.UnsafeFPMath && 06083 N1CFP && N1CFP->getValueAPF().isZero()) 06084 return N0; 06085 // fold (fsub 0, B) -> -B 06086 if (DAG.getTarget().Options.UnsafeFPMath && 06087 N0CFP && N0CFP->getValueAPF().isZero()) { 06088 if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) 06089 return GetNegatedExpression(N1, DAG, LegalOperations); 06090 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) 06091 return DAG.getNode(ISD::FNEG, dl, VT, N1); 06092 } 06093 // fold (fsub A, (fneg B)) -> (fadd A, B) 06094 if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) 06095 return DAG.getNode(ISD::FADD, dl, VT, N0, 06096 GetNegatedExpression(N1, DAG, LegalOperations)); 06097 06098 // If 'unsafe math' is enabled, fold 06099 // (fsub x, x) -> 0.0 & 06100 // (fsub x, (fadd x, y)) -> (fneg y) & 06101 // (fsub x, (fadd y, x)) -> (fneg y) 06102 if (DAG.getTarget().Options.UnsafeFPMath) { 06103 if (N0 == N1) 06104 return DAG.getConstantFP(0.0f, VT); 06105 06106 if (N1.getOpcode() == ISD::FADD) { 06107 SDValue N10 = N1->getOperand(0); 06108 SDValue N11 = N1->getOperand(1); 06109 06110 if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, 06111 &DAG.getTarget().Options)) 06112 return GetNegatedExpression(N11, DAG, LegalOperations); 06113 else if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, 06114 &DAG.getTarget().Options)) 06115 return GetNegatedExpression(N10, DAG, LegalOperations); 06116 } 06117 } 06118 06119 // FSUB -> FMA combines: 06120 if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || 06121 DAG.getTarget().Options.UnsafeFPMath) && 06122 DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) && 06123 TLI.isOperationLegalOrCustom(ISD::FMA, VT)) { 06124 06125 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) 06126 if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) { 06127 return DAG.getNode(ISD::FMA, dl, VT, 06128 N0.getOperand(0), N0.getOperand(1), 06129 DAG.getNode(ISD::FNEG, dl, VT, N1)); 06130 } 06131 06132 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) 06133 // Note: Commutes FSUB operands. 06134 if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) { 06135 return DAG.getNode(ISD::FMA, dl, VT, 06136 DAG.getNode(ISD::FNEG, dl, VT, 06137 N1.getOperand(0)), 06138 N1.getOperand(1), N0); 06139 } 06140 06141 // fold (fsub (-(fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) 06142 if (N0.getOpcode() == ISD::FNEG && 06143 N0.getOperand(0).getOpcode() == ISD::FMUL && 06144 N0->hasOneUse() && N0.getOperand(0).hasOneUse()) { 06145 SDValue N00 = N0.getOperand(0).getOperand(0); 06146 SDValue N01 = N0.getOperand(0).getOperand(1); 06147 return DAG.getNode(ISD::FMA, dl, VT, 06148 DAG.getNode(ISD::FNEG, dl, VT, N00), N01, 06149 DAG.getNode(ISD::FNEG, dl, VT, N1)); 06150 } 06151 } 06152 06153 return SDValue(); 06154 } 06155 06156 SDValue DAGCombiner::visitFMUL(SDNode *N) { 06157 SDValue N0 = N->getOperand(0); 06158 SDValue N1 = N->getOperand(1); 06159 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 06160 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 06161 EVT VT = N->getValueType(0); 06162 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 06163 06164 // fold vector ops 06165 if (VT.isVector()) { 06166 SDValue FoldedVOp = SimplifyVBinOp(N); 06167 if (FoldedVOp.getNode()) return FoldedVOp; 06168 } 06169 06170 // fold (fmul c1, c2) -> c1*c2 06171 if (N0CFP && N1CFP) 06172 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, N1); 06173 // canonicalize constant to RHS 06174 if (N0CFP && !N1CFP) 06175 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, N0); 06176 // fold (fmul A, 0) -> 0 06177 if (DAG.getTarget().Options.UnsafeFPMath && 06178 N1CFP && N1CFP->getValueAPF().isZero()) 06179 return N1; 06180 // fold (fmul A, 0) -> 0, vector edition. 06181 if (DAG.getTarget().Options.UnsafeFPMath && 06182 ISD::isBuildVectorAllZeros(N1.getNode())) 06183 return N1; 06184 // fold (fmul A, 1.0) -> A 06185 if (N1CFP && N1CFP->isExactlyValue(1.0)) 06186 return N0; 06187 // fold (fmul X, 2.0) -> (fadd X, X) 06188 if (N1CFP && N1CFP->isExactlyValue(+2.0)) 06189 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N0); 06190 // fold (fmul X, -1.0) -> (fneg X) 06191 if (N1CFP && N1CFP->isExactlyValue(-1.0)) 06192 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) 06193 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0); 06194 06195 // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) 06196 if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, 06197 &DAG.getTarget().Options)) { 06198 if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, 06199 &DAG.getTarget().Options)) { 06200 // Both can be negated for free, check to see if at least one is cheaper 06201 // negated. 06202 if (LHSNeg == 2 || RHSNeg == 2) 06203 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 06204 GetNegatedExpression(N0, DAG, LegalOperations), 06205 GetNegatedExpression(N1, DAG, LegalOperations)); 06206 } 06207 } 06208 06209 // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2)) 06210 if (DAG.getTarget().Options.UnsafeFPMath && 06211 N1CFP && N0.getOpcode() == ISD::FMUL && 06212 N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1))) 06213 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), 06214 DAG.getNode(ISD::FMUL, SDLoc(N), VT, 06215 N0.getOperand(1), N1)); 06216 06217 return SDValue(); 06218 } 06219 06220 SDValue DAGCombiner::visitFMA(SDNode *N) { 06221 SDValue N0 = N->getOperand(0); 06222 SDValue N1 = N->getOperand(1); 06223 SDValue N2 = N->getOperand(2); 06224 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 06225 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 06226 EVT VT = N->getValueType(0); 06227 SDLoc dl(N); 06228 06229 if (DAG.getTarget().Options.UnsafeFPMath) { 06230 if (N0CFP && N0CFP->isZero()) 06231 return N2; 06232 if (N1CFP && N1CFP->isZero()) 06233 return N2; 06234 } 06235 if (N0CFP && N0CFP->isExactlyValue(1.0)) 06236 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2); 06237 if (N1CFP && N1CFP->isExactlyValue(1.0)) 06238 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2); 06239 06240 // Canonicalize (fma c, x, y) -> (fma x, c, y) 06241 if (N0CFP && !N1CFP) 06242 return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2); 06243 06244 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) 06245 if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && 06246 N2.getOpcode() == ISD::FMUL && 06247 N0 == N2.getOperand(0) && 06248 N2.getOperand(1).getOpcode() == ISD::ConstantFP) { 06249 return DAG.getNode(ISD::FMUL, dl, VT, N0, 06250 DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1))); 06251 } 06252 06253 06254 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) 06255 if (DAG.getTarget().Options.UnsafeFPMath && 06256 N0.getOpcode() == ISD::FMUL && N1CFP && 06257 N0.getOperand(1).getOpcode() == ISD::ConstantFP) { 06258 return DAG.getNode(ISD::FMA, dl, VT, 06259 N0.getOperand(0), 06260 DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1)), 06261 N2); 06262 } 06263 06264 // (fma x, 1, y) -> (fadd x, y) 06265 // (fma x, -1, y) -> (fadd (fneg x), y) 06266 if (N1CFP) { 06267 if (N1CFP->isExactlyValue(1.0)) 06268 return DAG.getNode(ISD::FADD, dl, VT, N0, N2); 06269 06270 if (N1CFP->isExactlyValue(-1.0) && 06271 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) { 06272 SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0); 06273 AddToWorkList(RHSNeg.getNode()); 06274 return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg); 06275 } 06276 } 06277 06278 // (fma x, c, x) -> (fmul x, (c+1)) 06279 if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0 == N2) { 06280 return DAG.getNode(ISD::FMUL, dl, VT, 06281 N0, 06282 DAG.getNode(ISD::FADD, dl, VT, 06283 N1, DAG.getConstantFP(1.0, VT))); 06284 } 06285 06286 // (fma x, c, (fneg x)) -> (fmul x, (c-1)) 06287 if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && 06288 N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) { 06289 return DAG.getNode(ISD::FMUL, dl, VT, 06290 N0, 06291 DAG.getNode(ISD::FADD, dl, VT, 06292 N1, DAG.getConstantFP(-1.0, VT))); 06293 } 06294 06295 06296 return SDValue(); 06297 } 06298 06299 SDValue DAGCombiner::visitFDIV(SDNode *N) { 06300 SDValue