LCOV - code coverage report
Current view: top level - lib/CodeGen/SelectionDAG - DAGCombiner.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 8515 9040 94.2 %
Date: 2017-09-14 15:23:50 Functions: 274 277 98.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
      11             : // both before and after the DAG is legalized.
      12             : //
      13             : // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
      14             : // primarily intended to handle simplification opportunities that are implicit
      15             : // in the LLVM IR and exposed by the various codegen lowering phases.
      16             : //
      17             : //===----------------------------------------------------------------------===//
      18             : 
      19             : #include "llvm/ADT/SetVector.h"
      20             : #include "llvm/ADT/SmallBitVector.h"
      21             : #include "llvm/ADT/SmallPtrSet.h"
      22             : #include "llvm/ADT/SmallSet.h"
      23             : #include "llvm/ADT/Statistic.h"
      24             : #include "llvm/Analysis/AliasAnalysis.h"
      25             : #include "llvm/CodeGen/MachineFrameInfo.h"
      26             : #include "llvm/CodeGen/MachineFunction.h"
      27             : #include "llvm/CodeGen/SelectionDAG.h"
      28             : #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
      29             : #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
      30             : #include "llvm/IR/DataLayout.h"
      31             : #include "llvm/IR/DerivedTypes.h"
      32             : #include "llvm/IR/Function.h"
      33             : #include "llvm/IR/LLVMContext.h"
      34             : #include "llvm/Support/CommandLine.h"
      35             : #include "llvm/Support/Debug.h"
      36             : #include "llvm/Support/ErrorHandling.h"
      37             : #include "llvm/Support/KnownBits.h"
      38             : #include "llvm/Support/MathExtras.h"
      39             : #include "llvm/Support/raw_ostream.h"
      40             : #include "llvm/Target/TargetLowering.h"
      41             : #include "llvm/Target/TargetOptions.h"
      42             : #include "llvm/Target/TargetRegisterInfo.h"
      43             : #include "llvm/Target/TargetSubtargetInfo.h"
      44             : #include <algorithm>
      45             : using namespace llvm;
      46             : 
      47             : #define DEBUG_TYPE "dagcombine"
      48             : 
      49             : STATISTIC(NodesCombined   , "Number of dag nodes combined");
      50             : STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
      51             : STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
      52             : STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
      53             : STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
      54             : STATISTIC(SlicedLoads, "Number of load sliced");
      55             : 
      56             : namespace {
      57             :   static cl::opt<bool>
      58       72306 :     CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
      59      144612 :                cl::desc("Enable DAG combiner's use of IR alias analysis"));
      60             : 
      61             :   static cl::opt<bool>
      62      289224 :     UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
      63      289224 :                cl::desc("Enable DAG combiner's use of TBAA"));
      64             : 
      65             : #ifndef NDEBUG
      66             :   static cl::opt<std::string>
      67             :     CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
      68             :                cl::desc("Only use DAG-combiner alias analysis in this"
      69             :                         " function"));
      70             : #endif
      71             : 
      72             :   /// Hidden option to stress test load slicing, i.e., when this option
      73             :   /// is enabled, load slicing bypasses most of its profitability guards.
      74             :   static cl::opt<bool>
      75       72306 :   StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
      76      216918 :                     cl::desc("Bypass the profitability model of load "
      77             :                              "slicing"),
      78      289224 :                     cl::init(false));
      79             : 
      80             :   static cl::opt<bool>
      81      289224 :     MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
      82      289224 :                       cl::desc("DAG combiner may split indexing from loads"));
      83             : 
      84             : //------------------------------ DAGCombiner ---------------------------------//
      85             : 
      86     2699512 :   class DAGCombiner {
      87             :     SelectionDAG &DAG;
      88             :     const TargetLowering &TLI;
      89             :     CombineLevel Level;
      90             :     CodeGenOpt::Level OptLevel;
      91             :     bool LegalOperations;
      92             :     bool LegalTypes;
      93             :     bool ForCodeSize;
      94             : 
      95             :     /// \brief Worklist of all of the nodes that need to be simplified.
      96             :     ///
      97             :     /// This must behave as a stack -- new nodes to process are pushed onto the
      98             :     /// back and when processing we pop off of the back.
      99             :     ///
     100             :     /// The worklist will not contain duplicates but may contain null entries
     101             :     /// due to nodes being deleted from the underlying DAG.
     102             :     SmallVector<SDNode *, 64> Worklist;
     103             : 
     104             :     /// \brief Mapping from an SDNode to its position on the worklist.
     105             :     ///
     106             :     /// This is used to find and remove nodes from the worklist (by nulling
     107             :     /// them) when they are deleted from the underlying DAG. It relies on
     108             :     /// stable indices of nodes within the worklist.
     109             :     DenseMap<SDNode *, unsigned> WorklistMap;
     110             : 
     111             :     /// \brief Set of nodes which have been combined (at least once).
     112             :     ///
     113             :     /// This is used to allow us to reliably add any operands of a DAG node
     114             :     /// which have not yet been combined to the worklist.
     115             :     SmallPtrSet<SDNode *, 32> CombinedNodes;
     116             : 
     117             :     // AA - Used for DAG load/store alias analysis.
     118             :     AliasAnalysis *AA;
     119             : 
     120             :     /// When an instruction is simplified, add all users of the instruction to
     121             :     /// the work lists because they might get more simplified now.
     122             :     void AddUsersToWorklist(SDNode *N) {
     123    12354059 :       for (SDNode *Node : N->uses())
     124     3467431 :         AddToWorklist(Node);
     125             :     }
     126             : 
     127             :     /// Call the node-specific routine that folds each particular type of node.
     128             :     SDValue visit(SDNode *N);
     129             : 
     130             :   public:
     131             :     /// Add to the worklist making sure its instance is at the back (next to be
     132             :     /// processed.)
     133    76292430 :     void AddToWorklist(SDNode *N) {
     134             :       assert(N->getOpcode() != ISD::DELETED_NODE &&
     135             :              "Deleted Node added to Worklist");
     136             : 
     137             :       // Skip handle nodes as they can't usefully be combined and confuse the
     138             :       // zero-use deletion strategy.
     139    76292430 :       if (N->getOpcode() == ISD::HANDLENODE)
     140             :         return;
     141             : 
     142   381437420 :       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
     143    27997289 :         Worklist.push_back(N);
     144             :     }
     145             : 
     146             :     /// Remove all instances of N from the worklist.
     147     4086220 :     void removeFromWorklist(SDNode *N) {
     148     8172440 :       CombinedNodes.erase(N);
     149             : 
     150     4086220 :       auto It = WorklistMap.find(N);
     151    12258660 :       if (It == WorklistMap.end())
     152     2489462 :         return; // Not in the worklist.
     153             : 
     154             :       // Null out the entry rather than erasing it to avoid a linear operation.
     155     3193516 :       Worklist[It->second] = nullptr;
     156     3193516 :       WorklistMap.erase(It);
     157             :     }
     158             : 
     159             :     void deleteAndRecombine(SDNode *N);
     160             :     bool recursivelyDeleteUnusedNodes(SDNode *N);
     161             : 
     162             :     /// Replaces all uses of the results of one DAG node with new values.
     163             :     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
     164             :                       bool AddTo = true);
     165             : 
     166             :     /// Replaces all uses of the results of one DAG node with new values.
     167             :     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
     168      349471 :       return CombineTo(N, &Res, 1, AddTo);
     169             :     }
     170             : 
     171             :     /// Replaces all uses of the results of one DAG node with new values.
     172             :     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
     173             :                       bool AddTo = true) {
     174      138199 :       SDValue To[] = { Res0, Res1 };
     175      138199 :       return CombineTo(N, To, 2, AddTo);
     176             :     }
     177             : 
     178             :     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
     179             : 
     180             :   private:
     181             :     unsigned MaximumLegalStoreInBits;
     182             : 
     183             :     /// Check the specified integer node value to see if it can be simplified or
     184             :     /// if things it uses can be simplified by bit propagation.
     185             :     /// If so, return true.
     186     2425378 :     bool SimplifyDemandedBits(SDValue Op) {
     187     2425378 :       unsigned BitWidth = Op.getScalarValueSizeInBits();
     188     4850756 :       APInt Demanded = APInt::getAllOnesValue(BitWidth);
     189     4850756 :       return SimplifyDemandedBits(Op, Demanded);
     190             :     }
     191             : 
     192             :     bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
     193             : 
     194             :     bool CombineToPreIndexedLoadStore(SDNode *N);
     195             :     bool CombineToPostIndexedLoadStore(SDNode *N);
     196             :     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
     197             :     bool SliceUpLoad(SDNode *N);
     198             : 
     199             :     /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
     200             :     ///   load.
     201             :     ///
     202             :     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
     203             :     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
     204             :     /// \param EltNo index of the vector element to load.
     205             :     /// \param OriginalLoad load that EVE came from to be replaced.
     206             :     /// \returns EVE on success SDValue() on failure.
     207             :     SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
     208             :         SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
     209             :     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
     210             :     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
     211             :     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
     212             :     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
     213             :     SDValue PromoteIntBinOp(SDValue Op);
     214             :     SDValue PromoteIntShiftOp(SDValue Op);
     215             :     SDValue PromoteExtend(SDValue Op);
     216             :     bool PromoteLoad(SDValue Op);
     217             : 
     218             :     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, SDValue Trunc,
     219             :                          SDValue ExtLoad, const SDLoc &DL,
     220             :                          ISD::NodeType ExtType);
     221             : 
     222             :     /// Call the node-specific routine that knows how to fold each
     223             :     /// particular type of node. If that doesn't do anything, try the
     224             :     /// target-specific DAG combines.
     225             :     SDValue combine(SDNode *N);
     226             : 
     227             :     // Visitation implementation - Implement dag node combining for different
     228             :     // node types.  The semantics are as follows:
     229             :     // Return Value:
     230             :     //   SDValue.getNode() == 0 - No change was made
     231             :     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
     232             :     //   otherwise              - N should be replaced by the returned Operand.
     233             :     //
     234             :     SDValue visitTokenFactor(SDNode *N);
     235             :     SDValue visitMERGE_VALUES(SDNode *N);
     236             :     SDValue visitADD(SDNode *N);
     237             :     SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
     238             :     SDValue visitSUB(SDNode *N);
     239             :     SDValue visitADDC(SDNode *N);
     240             :     SDValue visitUADDO(SDNode *N);
     241             :     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
     242             :     SDValue visitSUBC(SDNode *N);
     243             :     SDValue visitUSUBO(SDNode *N);
     244             :     SDValue visitADDE(SDNode *N);
     245             :     SDValue visitADDCARRY(SDNode *N);
     246             :     SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
     247             :     SDValue visitSUBE(SDNode *N);
     248             :     SDValue visitSUBCARRY(SDNode *N);
     249             :     SDValue visitMUL(SDNode *N);
     250             :     SDValue useDivRem(SDNode *N);
     251             :     SDValue visitSDIV(SDNode *N);
     252             :     SDValue visitUDIV(SDNode *N);
     253             :     SDValue visitREM(SDNode *N);
     254             :     SDValue visitMULHU(SDNode *N);
     255             :     SDValue visitMULHS(SDNode *N);
     256             :     SDValue visitSMUL_LOHI(SDNode *N);
     257             :     SDValue visitUMUL_LOHI(SDNode *N);
     258             :     SDValue visitSMULO(SDNode *N);
     259             :     SDValue visitUMULO(SDNode *N);
     260             :     SDValue visitIMINMAX(SDNode *N);
     261             :     SDValue visitAND(SDNode *N);
     262             :     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference);
     263             :     SDValue visitOR(SDNode *N);
     264             :     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference);
     265             :     SDValue visitXOR(SDNode *N);
     266             :     SDValue SimplifyVBinOp(SDNode *N);
     267             :     SDValue visitSHL(SDNode *N);
     268             :     SDValue visitSRA(SDNode *N);
     269             :     SDValue visitSRL(SDNode *N);
     270             :     SDValue visitRotate(SDNode *N);
     271             :     SDValue visitABS(SDNode *N);
     272             :     SDValue visitBSWAP(SDNode *N);
     273             :     SDValue visitBITREVERSE(SDNode *N);
     274             :     SDValue visitCTLZ(SDNode *N);
     275             :     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
     276             :     SDValue visitCTTZ(SDNode *N);
     277             :     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
     278             :     SDValue visitCTPOP(SDNode *N);
     279             :     SDValue visitSELECT(SDNode *N);
     280             :     SDValue visitVSELECT(SDNode *N);
     281             :     SDValue visitSELECT_CC(SDNode *N);
     282             :     SDValue visitSETCC(SDNode *N);
     283             :     SDValue visitSETCCE(SDNode *N);
     284             :     SDValue visitSETCCCARRY(SDNode *N);
     285             :     SDValue visitSIGN_EXTEND(SDNode *N);
     286             :     SDValue visitZERO_EXTEND(SDNode *N);
     287             :     SDValue visitANY_EXTEND(SDNode *N);
     288             :     SDValue visitAssertZext(SDNode *N);
     289             :     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
     290             :     SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
     291             :     SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
     292             :     SDValue visitTRUNCATE(SDNode *N);
     293             :     SDValue visitBITCAST(SDNode *N);
     294             :     SDValue visitBUILD_PAIR(SDNode *N);
     295             :     SDValue visitFADD(SDNode *N);
     296             :     SDValue visitFSUB(SDNode *N);
     297             :     SDValue visitFMUL(SDNode *N);
     298             :     SDValue visitFMA(SDNode *N);
     299             :     SDValue visitFDIV(SDNode *N);
     300             :     SDValue visitFREM(SDNode *N);
     301             :     SDValue visitFSQRT(SDNode *N);
     302             :     SDValue visitFCOPYSIGN(SDNode *N);
     303             :     SDValue visitSINT_TO_FP(SDNode *N);
     304             :     SDValue visitUINT_TO_FP(SDNode *N);
     305             :     SDValue visitFP_TO_SINT(SDNode *N);
     306             :     SDValue visitFP_TO_UINT(SDNode *N);
     307             :     SDValue visitFP_ROUND(SDNode *N);
     308             :     SDValue visitFP_ROUND_INREG(SDNode *N);
     309             :     SDValue visitFP_EXTEND(SDNode *N);
     310             :     SDValue visitFNEG(SDNode *N);
     311             :     SDValue visitFABS(SDNode *N);
     312             :     SDValue visitFCEIL(SDNode *N);
     313             :     SDValue visitFTRUNC(SDNode *N);
     314             :     SDValue visitFFLOOR(SDNode *N);
     315             :     SDValue visitFMINNUM(SDNode *N);
     316             :     SDValue visitFMAXNUM(SDNode *N);
     317             :     SDValue visitBRCOND(SDNode *N);
     318             :     SDValue visitBR_CC(SDNode *N);
     319             :     SDValue visitLOAD(SDNode *N);
     320             : 
     321             :     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
     322             :     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
     323             : 
     324             :     SDValue visitSTORE(SDNode *N);
     325             :     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
     326             :     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
     327             :     SDValue visitBUILD_VECTOR(SDNode *N);
     328             :     SDValue visitCONCAT_VECTORS(SDNode *N);
     329             :     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
     330             :     SDValue visitVECTOR_SHUFFLE(SDNode *N);
     331             :     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
     332             :     SDValue visitINSERT_SUBVECTOR(SDNode *N);
     333             :     SDValue visitMLOAD(SDNode *N);
     334             :     SDValue visitMSTORE(SDNode *N);
     335             :     SDValue visitMGATHER(SDNode *N);
     336             :     SDValue visitMSCATTER(SDNode *N);
     337             :     SDValue visitFP_TO_FP16(SDNode *N);
     338             :     SDValue visitFP16_TO_FP(SDNode *N);
     339             : 
     340             :     SDValue visitFADDForFMACombine(SDNode *N);
     341             :     SDValue visitFSUBForFMACombine(SDNode *N);
     342             :     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
     343             : 
     344             :     SDValue XformToShuffleWithZero(SDNode *N);
     345             :     SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue LHS,
     346             :                            SDValue RHS);
     347             : 
     348             :     SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
     349             : 
     350             :     SDValue foldSelectOfConstants(SDNode *N);
     351             :     SDValue foldVSelectOfConstants(SDNode *N);
     352             :     SDValue foldBinOpIntoSelect(SDNode *BO);
     353             :     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
     354             :     SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
     355             :     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
     356             :     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
     357             :                              SDValue N2, SDValue N3, ISD::CondCode CC,
     358             :                              bool NotExtCompare = false);
     359             :     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
     360             :                                    SDValue N2, SDValue N3, ISD::CondCode CC);
     361             :     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
     362             :                               const SDLoc &DL);
     363             :     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
     364             :                           const SDLoc &DL, bool foldBooleans = true);
     365             : 
     366             :     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
     367             :                            SDValue &CC) const;
     368             :     bool isOneUseSetCC(SDValue N) const;
     369             : 
     370             :     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
     371             :                                          unsigned HiOp);
     372             :     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
     373             :     SDValue CombineExtLoad(SDNode *N);
     374             :     SDValue combineRepeatedFPDivisors(SDNode *N);
     375             :     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
     376             :     SDValue BuildSDIV(SDNode *N);
     377             :     SDValue BuildSDIVPow2(SDNode *N);
     378             :     SDValue BuildUDIV(SDNode *N);
     379             :     SDValue BuildLogBase2(SDValue Op, const SDLoc &DL);
     380             :     SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
     381             :     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
     382             :     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
     383             :     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
     384             :     SDValue buildSqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations,
     385             :                                 SDNodeFlags Flags, bool Reciprocal);
     386             :     SDValue buildSqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations,
     387             :                                 SDNodeFlags Flags, bool Reciprocal);
     388             :     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
     389             :                                bool DemandHighBits = true);
     390             :     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
     391             :     SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
     392             :                               SDValue InnerPos, SDValue InnerNeg,
     393             :                               unsigned PosOpcode, unsigned NegOpcode,
     394             :                               const SDLoc &DL);
     395             :     SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
     396             :     SDValue MatchLoadCombine(SDNode *N);
     397             :     SDValue ReduceLoadWidth(SDNode *N);
     398             :     SDValue ReduceLoadOpStoreWidth(SDNode *N);
     399             :     SDValue splitMergedValStore(StoreSDNode *ST);
     400             :     SDValue TransformFPLoadStorePair(SDNode *N);
     401             :     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
     402             :     SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
     403             :     SDValue reduceBuildVecToShuffle(SDNode *N);
     404             :     SDValue reduceBuildVecToTrunc(SDNode *N);
     405             :     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
     406             :                                   ArrayRef<int> VectorMask, SDValue VecIn1,
     407             :                                   SDValue VecIn2, unsigned LeftIdx);
     408             :     SDValue matchVSelectOpSizesWithSetCC(SDNode *N);
     409             : 
     410             :     /// Walk up chain skipping non-aliasing memory nodes,
     411             :     /// looking for aliasing nodes and adding them to the Aliases vector.
     412             :     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
     413             :                           SmallVectorImpl<SDValue> &Aliases);
     414             : 
     415             :     /// Return true if there is any possibility that the two addresses overlap.
     416             :     bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
     417             : 
     418             :     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
     419             :     /// chain (aliasing node.)
     420             :     SDValue FindBetterChain(SDNode *N, SDValue Chain);
     421             : 
     422             :     /// Try to replace a store and any possibly adjacent stores on
     423             :     /// consecutive chains with better chains. Return true only if St is
     424             :     /// replaced.
     425             :     ///
     426             :     /// Notice that other chains may still be replaced even if the function
     427             :     /// returns false.
     428             :     bool findBetterNeighborChains(StoreSDNode *St);
     429             : 
     430             :     /// Match "(X shl/srl V1) & V2" where V2 may not be present.
     431             :     bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask);
     432             : 
     433             :     /// Holds a pointer to an LSBaseSDNode as well as information on where it
     434             :     /// is located in a sequence of memory operations connected by a chain.
     435             :     struct MemOpLink {
     436             :       MemOpLink(LSBaseSDNode *N, int64_t Offset)
     437      509957 :           : MemNode(N), OffsetFromBase(Offset) {}
     438             :       // Ptr to the mem node.
     439             :       LSBaseSDNode *MemNode;
     440             :       // Offset from the base ptr.
     441             :       int64_t OffsetFromBase;
     442             :     };
     443             : 
     444             :     /// This is a helper function for visitMUL to check the profitability
     445             :     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
     446             :     /// MulNode is the original multiply, AddNode is (add x, c1),
     447             :     /// and ConstNode is c2.
     448             :     bool isMulAddWithConstProfitable(SDNode *MulNode,
     449             :                                      SDValue &AddNode,
     450             :                                      SDValue &ConstNode);
     451             : 
     452             : 
     453             :     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
     454             :     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
     455             :     /// the type of the loaded value to be extended.  LoadedVT returns the type
     456             :     /// of the original loaded value.  NarrowLoad returns whether the load would
     457             :     /// need to be narrowed in order to match.
     458             :     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
     459             :                           EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
     460             :                           bool &NarrowLoad);
     461             : 
     462             :     /// Helper function for MergeConsecutiveStores which merges the
     463             :     /// component store chains.
     464             :     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
     465             :                                 unsigned NumStores);
     466             : 
     467             :     /// This is a helper function for MergeConsecutiveStores. When the
     468             :     /// source elements of the consecutive stores are all constants or
     469             :     /// all extracted vector elements, try to merge them into one
     470             :     /// larger store introducing bitcasts if necessary.  \return True
     471             :     /// if a merged store was created.
     472             :     bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
     473             :                                          EVT MemVT, unsigned NumStores,
     474             :                                          bool IsConstantSrc, bool UseVector,
     475             :                                          bool UseTrunc);
     476             : 
     477             :     /// This is a helper function for MergeConsecutiveStores. Stores
     478             :     /// that potentially may be merged with St are placed in
     479             :     /// StoreNodes.
     480             :     void getStoreMergeCandidates(StoreSDNode *St,
     481             :                                  SmallVectorImpl<MemOpLink> &StoreNodes);
     482             : 
     483             :     /// Helper function for MergeConsecutiveStores. Checks if
     484             :     /// candidate stores have indirect dependency through their
     485             :     /// operands. \return True if safe to merge.
     486             :     bool checkMergeStoreCandidatesForDependencies(
     487             :         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores);
     488             : 
     489             :     /// Merge consecutive store operations into a wide store.
     490             :     /// This optimization uses wide integers or vectors when possible.
     491             :     /// \return number of stores that were merged into a merged store (the
     492             :     /// affected nodes are stored as a prefix in \p StoreNodes).
     493             :     bool MergeConsecutiveStores(StoreSDNode *N);
     494             : 
     495             :     /// \brief Try to transform a truncation where C is a constant:
     496             :     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
     497             :     ///
     498             :     /// \p N needs to be a truncation and its first operand an AND. Other
     499             :     /// requirements are checked by the function (e.g. that trunc is
     500             :     /// single-use) and if missed an empty SDValue is returned.
     501             :     SDValue distributeTruncateThroughAnd(SDNode *N);
     502             : 
     503             :   public:
     504      674878 :     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
     505     1349756 :         : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
     506     3374390 :           OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(AA) {
     507      674878 :       ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize();
     508             : 
     509      674878 :       MaximumLegalStoreInBits = 0;
     510    75586336 :       for (MVT VT : MVT::all_valuetypes())
     511    74911458 :         if (EVT(VT).isSimple() && VT != MVT::Other &&
     512   167965002 :             TLI.isTypeLegal(EVT(VT)) &&
     513     9745921 :             VT.getSizeInBits() >= MaximumLegalStoreInBits)
     514     7446918 :           MaximumLegalStoreInBits = VT.getSizeInBits();
     515      674878 :     }
     516             : 
     517             :     /// Runs the dag combiner on all nodes in the work list
     518             :     void Run(CombineLevel AtLevel);
     519             : 
     520             :     SelectionDAG &getDAG() const { return DAG; }
     521             : 
     522             :     /// Returns a type large enough to hold any valid shift amount - before type
     523             :     /// legalization these can be huge.
     524       26947 :     EVT getShiftAmountTy(EVT LHSTy) {
     525             :       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
     526       26947 :       if (LHSTy.isVector())
     527         267 :         return LHSTy;
     528       53360 :       auto &DL = DAG.getDataLayout();
     529       28212 :       return LegalTypes ? TLI.getScalarShiftAmountTy(DL, LHSTy)
     530       54892 :                         : TLI.getPointerTy(DL);
     531             :     }
     532             : 
     533             :     /// This method returns true if we are running before type legalization or
     534             :     /// if the specified VT is legal.
     535             :     bool isTypeLegal(const EVT &VT) {
     536      488353 :       if (!LegalTypes) return true;
     537      299058 :       return TLI.isTypeLegal(VT);
     538             :     }
     539             : 
     540             :     /// Convenience wrapper around TargetLowering::getSetCCResultType
     541      128798 :     EVT getSetCCResultType(EVT VT) const {
     542      257596 :       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
     543             :     }
     544             :   };
     545             : }
     546             : 
     547             : 
     548             : namespace {
     549             : /// This class is a DAGUpdateListener that removes any deleted
     550             : /// nodes from the worklist.
     551    53016636 : class WorklistRemover : public SelectionDAG::DAGUpdateListener {
     552             :   DAGCombiner &DC;
     553             : public:
     554             :   explicit WorklistRemover(DAGCombiner &dc)
     555    53016636 :     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
     556             : 
     557       22901 :   void NodeDeleted(SDNode *N, SDNode *E) override {
     558       22901 :     DC.removeFromWorklist(N);
     559       22901 :   }
     560             : };
     561             : }
     562             : 
     563             : //===----------------------------------------------------------------------===//
     564             : //  TargetLowering::DAGCombinerInfo implementation
     565             : //===----------------------------------------------------------------------===//
     566             : 
     567       17864 : void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
     568       17864 :   ((DAGCombiner*)DC)->AddToWorklist(N);
     569       17864 : }
     570             : 
     571         799 : SDValue TargetLowering::DAGCombinerInfo::
     572             : CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
     573         799 :   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
     574             : }
     575             : 
     576        6043 : SDValue TargetLowering::DAGCombinerInfo::
     577             : CombineTo(SDNode *N, SDValue Res, bool AddTo) {
     578       12086 :   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
     579             : }
     580             : 
     581             : 
     582        2477 : SDValue TargetLowering::DAGCombinerInfo::
     583             : CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
     584        4954 :   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
     585             : }
     586             : 
     587         323 : void TargetLowering::DAGCombinerInfo::
     588             : CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
     589         323 :   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
     590             : }
     591             : 
     592             : //===----------------------------------------------------------------------===//
     593             : // Helper Functions
     594             : //===----------------------------------------------------------------------===//
     595             : 
     596      639299 : void DAGCombiner::deleteAndRecombine(SDNode *N) {
     597      639299 :   removeFromWorklist(N);
     598             : 
     599             :   // If the operands of this node are only used by the node, they will now be
     600             :   // dead. Make sure to re-visit them and recursively delete dead nodes.
     601     2589353 :   for (const SDValue &Op : N->ops())
     602             :     // For an operand generating multiple values, one of the values may
     603             :     // become dead allowing further simplification (e.g. split index
     604             :     // arithmetic from an indexed load).
     605     3798468 :     if (Op->hasOneUse() || Op->getNumValues() > 1)
     606      415371 :       AddToWorklist(Op.getNode());
     607             : 
     608      639299 :   DAG.DeleteNode(N);
     609      639299 : }
     610             : 
     611             : /// Return 1 if we can compute the negated form of the specified expression for
     612             : /// the same cost as the expression itself, or 2 if we can compute the negated
     613             : /// form more cheaply than the expression itself.
     614      132317 : static char isNegatibleForFree(SDValue Op, bool LegalOperations,
     615             :                                const TargetLowering &TLI,
     616             :                                const TargetOptions *Options,
     617             :                                unsigned Depth = 0) {
     618             :   // fneg is removable even if it has multiple uses.
     619      264634 :   if (Op.getOpcode() == ISD::FNEG) return 2;
     620             : 
     621             :   // Don't allow anything with multiple uses.
     622      263124 :   if (!Op.hasOneUse()) return 0;
     623             : 
     624             :   // Don't recurse exponentially.
     625       90158 :   if (Depth > 6) return 0;
     626             : 
     627      176846 :   switch (Op.getOpcode()) {
     628             :   default: return false;
     629        3072 :   case ISD::ConstantFP: {
     630        3072 :     if (!LegalOperations)
     631             :       return 1;
     632             : 
     633             :     // Don't invert constant FP values after legalization unless the target says
     634             :     // the negated constant is legal.
     635        1344 :     EVT VT = Op.getValueType();
     636         802 :     return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
     637        2666 :       TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
     638             :   }
     639        5718 :   case ISD::FADD:
     640             :     // FIXME: determine better conditions for this xform.
     641        5718 :     if (!Options->UnsafeFPMath) return 0;
     642             : 
     643             :     // After operation legalization, it might not be legal to create new FSUBs.
     644        1083 :     if (LegalOperations &&
     645         854 :         !TLI.isOperationLegalOrCustom(ISD::FSUB,  Op.getValueType()))
     646             :       return 0;
     647             : 
     648             :     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
     649        3249 :     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
     650        1083 :                                     Options, Depth + 1))
     651        1064 :       return V;
     652             :     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
     653        2128 :     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
     654        1064 :                               Depth + 1);
     655        2019 :   case ISD::FSUB:
     656             :     // We can't turn -(A-B) into B-A when we honor signed zeros.
     657        4006 :     if (!Options->NoSignedZerosFPMath &&
     658        3974 :         !Op.getNode()->getFlags().hasNoSignedZeros())
     659             :       return 0;
     660             : 
     661             :     // fold (fneg (fsub A, B)) -> (fsub B, A)
     662             :     return 1;
     663             : 
     664       24005 :   case ISD::FMUL:
     665             :   case ISD::FDIV:
     666       24005 :     if (Options->HonorSignDependentRoundingFPMath()) return 0;
     667             : 
     668             :     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
     669       72015 :     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
     670       24005 :                                     Options, Depth + 1))
     671       23820 :       return V;
     672             : 
     673       47640 :     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
     674       23820 :                               Depth + 1);
     675             : 
     676        1563 :   case ISD::FP_EXTEND:
     677             :   case ISD::FP_ROUND:
     678             :   case ISD::FSIN:
     679        3126 :     return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
     680        1563 :                               Depth + 1);
     681             :   }
     682             : }
     683             : 
     684             : /// If isNegatibleForFree returns true, return the newly negated expression.
     685         760 : static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
     686             :                                     bool LegalOperations, unsigned Depth = 0) {
     687         760 :   const TargetOptions &Options = DAG.getTarget().Options;
     688             :   // fneg is removable even if it has multiple uses.
     689        1914 :   if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
     690             : 
     691             :   // Don't allow anything with multiple uses.
     692             :   assert(Op.hasOneUse() && "Unknown reuse!");
     693             : 
     694             :   assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
     695             : 
     696         732 :   const SDNodeFlags Flags = Op.getNode()->getFlags();
     697             : 
     698         732 :   switch (Op.getOpcode()) {
     699           0 :   default: llvm_unreachable("Unknown code");
     700         125 :   case ISD::ConstantFP: {
     701         500 :     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
     702         125 :     V.changeSign();
     703         500 :     return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
     704             :   }
     705          18 :   case ISD::FADD:
     706             :     // FIXME: determine better conditions for this xform.
     707             :     assert(Options.UnsafeFPMath);
     708             : 
     709             :     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
     710          36 :     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
     711             :                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
     712          36 :       return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
     713          36 :                          GetNegatedExpression(Op.getOperand(0), DAG,
     714             :                                               LegalOperations, Depth+1),
     715          90 :                          Op.getOperand(1), Flags);
     716             :     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
     717           0 :     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
     718           0 :                        GetNegatedExpression(Op.getOperand(1), DAG,
     719             :                                             LegalOperations, Depth+1),
     720           0 :                        Op.getOperand(0), Flags);
     721          19 :   case ISD::FSUB:
     722             :     // fold (fneg (fsub 0, B)) -> B
     723          43 :     if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
     724           5 :       if (N0CFP->isZero())
     725          10 :         return Op.getOperand(1);
     726             : 
     727             :     // fold (fneg (fsub A, B)) -> (fsub B, A)
     728          28 :     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
     729          70 :                        Op.getOperand(1), Op.getOperand(0), Flags);
     730             : 
     731         184 :   case ISD::FMUL:
     732             :   case ISD::FDIV:
     733             :     assert(!Options.HonorSignDependentRoundingFPMath());
     734             : 
     735             :     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
     736         368 :     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
     737             :                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
     738         176 :       return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
     739         176 :                          GetNegatedExpression(Op.getOperand(0), DAG,
     740             :                                               LegalOperations, Depth+1),
     741         528 :                          Op.getOperand(1), Flags);
     742             : 
     743             :     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
     744         192 :     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
     745         192 :                        Op.getOperand(0),
     746         192 :                        GetNegatedExpression(Op.getOperand(1), DAG,
     747         480 :                                             LegalOperations, Depth+1), Flags);
     748             : 
     749           8 :   case ISD::FP_EXTEND:
     750             :   case ISD::FSIN:
     751          16 :     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
     752          16 :                        GetNegatedExpression(Op.getOperand(0), DAG,
     753          48 :                                             LegalOperations, Depth+1));
     754          12 :   case ISD::FP_ROUND:
     755          24 :       return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
     756          24 :                          GetNegatedExpression(Op.getOperand(0), DAG,
     757             :                                               LegalOperations, Depth+1),
     758          72 :                          Op.getOperand(1));
     759             :   }
     760             : }
     761             : 
     762             : // APInts must be the same size for most operations, this helper
     763             : // function zero extends the shorter of the pair so that they match.
     764             : // We provide an Offset so that we can create bitwidths that won't overflow.
     765        9020 : static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
     766       18040 :   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
     767       27060 :   LHS = LHS.zextOrSelf(Bits);
     768       27060 :   RHS = RHS.zextOrSelf(Bits);
     769        9020 : }
     770             : 
     771             : // Return true if this node is a setcc, or is a select_cc
     772             : // that selects between the target values used for true and false, making it
     773             : // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
     774             : // the appropriate nodes based on the type of node we are checking. This
     775             : // simplifies life a bit for the callers.
     776      359978 : bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
     777             :                                     SDValue &CC) const {
     778      719956 :   if (N.getOpcode() == ISD::SETCC) {
     779      154384 :     LHS = N.getOperand(0);
     780      154384 :     RHS = N.getOperand(1);
     781      154384 :     CC  = N.getOperand(2);
     782             :     return true;
     783             :   }
     784             : 
     785      283221 :   if (N.getOpcode() != ISD::SELECT_CC ||
     786      283510 :       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
     787         578 :       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
     788             :     return false;
     789             : 
     790         578 :   if (TLI.getBooleanContents(N.getValueType()) ==
     791             :       TargetLowering::UndefinedBooleanContent)
     792             :     return false;
     793             : 
     794         578 :   LHS = N.getOperand(0);
     795         578 :   RHS = N.getOperand(1);
     796         578 :   CC  = N.getOperand(4);
     797             :   return true;
     798             : }
     799             : 
     800             : /// Return true if this is a SetCC-equivalent operation with only one use.
     801             : /// If this is true, it allows the users to invert the operation for free when
     802             : /// it is profitable to do so.
     803          72 : bool DAGCombiner::isOneUseSetCC(SDValue N) const {
     804          72 :   SDValue N0, N1, N2;
     805          72 :   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
     806             :     return true;
     807             :   return false;
     808             : }
     809             : 
     810             : // \brief Returns the SDNode if it is a constant float BuildVector
     811             : // or constant float.
     812             : static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
     813      901437 :   if (isa<ConstantFPSDNode>(N))
     814             :     return N.getNode();
     815      901437 :   if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
     816        1075 :     return N.getNode();
     817             :   return nullptr;
     818             : }
     819             : 
     820             : // Determines if it is a constant integer or a build vector of constant
     821             : // integers (and undefs).
     822             : // Do not permit build vector implicit truncation.
     823     5024009 : static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
     824     3417410 :   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
     825     3417410 :     return !(Const->isOpaque() && NoOpaques);
     826     3213198 :   if (N.getOpcode() != ISD::BUILD_VECTOR)
     827             :     return false;
     828      351522 :   unsigned BitWidth = N.getScalarValueSizeInBits();
     829     2716913 :   for (const SDValue &Op : N->op_values()) {
     830      833154 :     if (Op.isUndef())
     831         850 :       continue;
     832      829346 :     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
     833     1657689 :     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
     834           0 :         (Const->isOpaque() && NoOpaques))
     835             :       return false;
     836             :   }
     837             :   return true;
     838             : }
     839             : 
     840             : // Determines if it is a constant null integer or a splatted vector of a
     841             : // constant null integer (with no undefs).
     842             : // Build vector implicit truncation is not an issue for null values.
     843      673367 : static bool isNullConstantOrNullSplatConstant(SDValue N) {
     844      673367 :   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
     845        9877 :     return Splat->isNullValue();
     846             :   return false;
     847             : }
     848             : 
     849             : // Determines if it is a constant integer of one or a splatted vector of a
     850             : // constant integer of one (with no undefs).
     851             : // Do not permit build vector implicit truncation.
     852         236 : static bool isOneConstantOrOneSplatConstant(SDValue N) {
     853         236 :   unsigned BitWidth = N.getScalarValueSizeInBits();
     854         236 :   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
     855         518 :     return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth;
     856             :   return false;
     857             : }
     858             : 
     859             : // Determines if it is a constant integer of all ones or a splatted vector of a
     860             : // constant integer of all ones (with no undefs).
     861             : // Do not permit build vector implicit truncation.
     862      374519 : static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) {
     863      374519 :   unsigned BitWidth = N.getScalarValueSizeInBits();
     864      374519 :   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
     865       10684 :     return Splat->isAllOnesValue() &&
     866          50 :            Splat->getAPIntValue().getBitWidth() == BitWidth;
     867             :   return false;
     868             : }
     869             : 
     870             : // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
     871             : // undef's.
     872       17962 : static bool isAnyConstantBuildVector(const SDNode *N) {
     873       33954 :   return ISD::isBuildVectorOfConstantSDNodes(N) ||
     874       33954 :          ISD::isBuildVectorOfConstantFPSDNodes(N);
     875             : }
     876             : 
     877             : // Attempt to match a unary predicate against a scalar/splat constant or
     878             : // every element of a constant BUILD_VECTOR.
     879      316947 : static bool matchUnaryPredicate(SDValue Op,
     880             :                                 std::function<bool(ConstantSDNode *)> Match) {
     881      295431 :   if (auto *Cst = dyn_cast<ConstantSDNode>(Op))
     882      295431 :     return Match(Cst);
     883             : 
     884       43032 :   if (ISD::BUILD_VECTOR != Op.getOpcode())
     885             :     return false;
     886             : 
     887        9222 :   EVT SVT = Op.getValueType().getScalarType();
     888        9362 :   for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
     889       14040 :     auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(i));
     890       13618 :     if (!Cst || Cst->getValueType(0) != SVT || !Match(Cst))
     891             :       return false;
     892             :   }
     893             :   return true;
     894             : }
     895             : 
     896             : // Attempt to match a binary predicate against a pair of scalar/splat constants
     897             : // or every element of a pair of constant BUILD_VECTORs.
     898       10379 : static bool matchBinaryPredicate(
     899             :     SDValue LHS, SDValue RHS,
     900             :     std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match) {
     901       31143 :   if (LHS.getValueType() != RHS.getValueType())
     902             :     return false;
     903             : 
     904        9436 :   if (auto *LHSCst = dyn_cast<ConstantSDNode>(LHS))
     905        9212 :     if (auto *RHSCst = dyn_cast<ConstantSDNode>(RHS))
     906        9212 :       return Match(LHSCst, RHSCst);
     907             : 
     908        2256 :   if (ISD::BUILD_VECTOR != LHS.getOpcode() ||
     909         158 :       ISD::BUILD_VECTOR != RHS.getOpcode())
     910             :     return false;
     911             : 
     912         316 :   EVT SVT = LHS.getValueType().getScalarType();
     913        1414 :   for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
     914        3351 :     auto *LHSCst = dyn_cast<ConstantSDNode>(LHS.getOperand(i));
     915        3351 :     auto *RHSCst = dyn_cast<ConstantSDNode>(RHS.getOperand(i));
     916        1117 :     if (!LHSCst || !RHSCst)
     917             :       return false;
     918        2234 :     if (LHSCst->getValueType(0) != SVT ||
     919        2234 :         LHSCst->getValueType(0) != RHSCst->getValueType(0))
     920             :       return false;
     921        1117 :     if (!Match(LHSCst, RHSCst))
     922             :       return false;
     923             :   }
     924             :   return true;
     925             : }
     926             : 
     927     2268255 : SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
     928             :                                     SDValue N1) {
     929     4536510 :   EVT VT = N0.getValueType();
     930     4536510 :   if (N0.getOpcode() == Opc) {
     931      841996 :     if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
     932      153909 :       if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
     933             :         // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
     934      151908 :         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
     935      455661 :           return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
     936          21 :         return SDValue();
     937             :       }
     938        4002 :       if (N0.hasOneUse()) {
     939             :         // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
     940             :         // use
     941        6200 :         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
     942        1240 :         if (!OpNode.getNode())
     943           0 :           return SDValue();
     944        1240 :         AddToWorklist(OpNode.getNode());
     945        3720 :         return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
     946             :       }
     947             :     }
     948             :   }
     949             : 
     950     4230214 :   if (N1.getOpcode() == Opc) {
     951       57078 :     if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
     952       24906 :       if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
     953             :         // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
     954           0 :         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
     955           0 :           return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
     956           0 :         return SDValue();
     957             :       }
     958       49812 :       if (N1.hasOneUse()) {
     959             :         // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
     960             :         // use
     961      122110 :         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
     962       24422 :         if (!OpNode.getNode())
     963           0 :           return SDValue();
     964       24422 :         AddToWorklist(OpNode.getNode());
     965       73266 :         return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
     966             :       }
     967             :     }
     968             :   }
     969             : 
     970     2090685 :   return SDValue();
     971             : }
     972             : 
     973      488469 : SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
     974             :                                bool AddTo) {
     975             :   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
     976      488469 :   ++NodesCombined;
     977             :   DEBUG(dbgs() << "\nReplacing.1 ";
     978             :         N->dump(&DAG);
     979             :         dbgs() << "\nWith: ";
     980             :         To[0].getNode()->dump(&DAG);
     981             :         dbgs() << " and " << NumTo-1 << " other values\n");
     982      488469 :   for (unsigned i = 0, e = NumTo; i != e; ++i)
     983             :     assert((!To[i].getNode() ||
     984             :             N->getValueType(i) == To[i].getValueType()) &&
     985             :            "Cannot combine value to value of different type!");
     986             : 
     987      976938 :   WorklistRemover DeadNodes(*this);
     988      488469 :   DAG.ReplaceAllUsesWith(N, To);
     989      488469 :   if (AddTo) {
     990             :     // Push the new nodes and any users onto the worklist
     991      904138 :     for (unsigned i = 0, e = NumTo; i != e; ++i) {
     992      347821 :       if (To[i].getNode()) {
     993      347619 :         AddToWorklist(To[i].getNode());
     994      347619 :         AddUsersToWorklist(To[i].getNode());
     995             :       }
     996             :     }
     997             :   }
     998             : 
     999             :   // Finally, if the node is now dead, remove it from the graph.  The node
    1000             :   // may not be dead if the replacement process recursively simplified to
    1001             :   // something else needing this node.
    1002      488469 :   if (N->use_empty())
    1003      488427 :     deleteAndRecombine(N);
    1004      976938 :   return SDValue(N, 0);
    1005             : }
    1006             : 
    1007       82637 : void DAGCombiner::
    1008             : CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
    1009             :   // Replace all uses.  If any nodes become isomorphic to other nodes and
    1010             :   // are deleted, make sure to remove them from our worklist.
    1011      165274 :   WorklistRemover DeadNodes(*this);
    1012       82637 :   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
    1013             : 
    1014             :   // Push the new node and any (possibly new) users onto the worklist.
    1015       82637 :   AddToWorklist(TLO.New.getNode());
    1016      165274 :   AddUsersToWorklist(TLO.New.getNode());
    1017             : 
    1018             :   // Finally, if the node is now dead, remove it from the graph.  The node
    1019             :   // may not be dead if the replacement process recursively simplified to
    1020             :   // something else needing this node.
    1021       82637 :   if (TLO.Old.getNode()->use_empty())
    1022       82345 :     deleteAndRecombine(TLO.Old.getNode());
    1023       82637 : }
    1024             : 
    1025             : /// Check the specified integer node value to see if it can be simplified or if
    1026             : /// things it uses can be simplified by bit propagation. If so, return true.
    1027     2470786 : bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
    1028     4941572 :   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
    1029     4941572 :   KnownBits Known;
    1030     2470786 :   if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO))
    1031             :     return false;
    1032             : 
    1033             :   // Revisit the node.
    1034       82314 :   AddToWorklist(Op.getNode());
    1035             : 
    1036             :   // Replace the old value with the new one.
    1037       82314 :   ++NodesCombined;
    1038             :   DEBUG(dbgs() << "\nReplacing.2 ";
    1039             :         TLO.Old.getNode()->dump(&DAG);
    1040             :         dbgs() << "\nWith: ";
    1041             :         TLO.New.getNode()->dump(&DAG);
    1042             :         dbgs() << '\n');
    1043             : 
    1044       82314 :   CommitTargetLoweringOpt(TLO);
    1045       82314 :   return true;
    1046             : }
    1047             : 
    1048          56 : void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
    1049         112 :   SDLoc DL(Load);
    1050         112 :   EVT VT = Load->getValueType(0);
    1051         168 :   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
    1052             : 
    1053             :   DEBUG(dbgs() << "\nReplacing.9 ";
    1054             :         Load->dump(&DAG);
    1055             :         dbgs() << "\nWith: ";
    1056             :         Trunc.getNode()->dump(&DAG);
    1057             :         dbgs() << '\n');
    1058         112 :   WorklistRemover DeadNodes(*this);
    1059         112 :   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
    1060         168 :   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
    1061          56 :   deleteAndRecombine(Load);
    1062          56 :   AddToWorklist(Trunc.getNode());
    1063          56 : }
    1064             : 
    1065        2611 : SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
    1066        2611 :   Replace = false;
    1067        5222 :   SDLoc DL(Op);
    1068        2710 :   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
    1069          99 :     LoadSDNode *LD = cast<LoadSDNode>(Op);
    1070          99 :     EVT MemVT = LD->getMemoryVT();
    1071          99 :     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
    1072         168 :       ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
    1073             :                                                        : ISD::EXTLOAD)
    1074          99 :       : LD->getExtensionType();
    1075          99 :     Replace = true;
    1076          99 :     return DAG.getExtLoad(ExtType, DL, PVT,
    1077         198 :                           LD->getChain(), LD->getBasePtr(),
    1078         198 :                           MemVT, LD->getMemOperand());
    1079             :   }
    1080             : 
    1081        5024 :   unsigned Opc = Op.getOpcode();
    1082        2512 :   switch (Opc) {
    1083             :   default: break;
    1084           9 :   case ISD::AssertSext:
    1085          18 :     if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
    1086          27 :       return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
    1087           0 :     break;
    1088          33 :   case ISD::AssertZext:
    1089          66 :     if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
    1090          99 :       return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
    1091           0 :     break;
    1092         675 :   case ISD::Constant: {
    1093             :     unsigned ExtOpc =
    1094        2025 :       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
    1095        1350 :     return DAG.getNode(ExtOpc, DL, PVT, Op);
    1096             :   }
    1097             :   }
    1098             : 
    1099        3590 :   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
    1100           0 :     return SDValue();
    1101        3590 :   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
    1102             : }
    1103             : 
    1104           9 : SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
    1105          18 :   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
    1106           0 :     return SDValue();
    1107          18 :   EVT OldVT = Op.getValueType();
    1108           9 :   SDLoc DL(Op);
    1109           9 :   bool Replace = false;
    1110           9 :   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
    1111           9 :   if (!NewOp.getNode())
    1112           0 :     return SDValue();
    1113           9 :   AddToWorklist(NewOp.getNode());
    1114             : 
    1115           9 :   if (Replace)
    1116           0 :     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
    1117           9 :   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
    1118          18 :                      DAG.getValueType(OldVT));
    1119             : }
    1120             : 
    1121         588 : SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
    1122        1176 :   EVT OldVT = Op.getValueType();
    1123        1176 :   SDLoc DL(Op);
    1124         588 :   bool Replace = false;
    1125         588 :   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
    1126         588 :   if (!NewOp.getNode())
    1127           0 :     return SDValue();
    1128         588 :   AddToWorklist(NewOp.getNode());
    1129             : 
    1130         588 :   if (Replace)
    1131           5 :     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
    1132         588 :   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
    1133             : }
    1134             : 
    1135             : /// Promote the specified integer binary operation if the target indicates it is
    1136             : /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
    1137             : /// i32 since i16 instructions are longer.
    1138     2243065 : SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
    1139     2243065 :   if (!LegalOperations)
    1140     1265318 :     return SDValue();
    1141             : 
    1142     1955494 :   EVT VT = Op.getValueType();
    1143      977747 :   if (VT.isVector() || !VT.isInteger())
    1144      123994 :     return SDValue();
    1145             : 
    1146             :   // If operation type is 'undesirable', e.g. i16 on x86, consider
    1147             :   // promoting it.
    1148     1707506 :   unsigned Opc = Op.getOpcode();
    1149      853753 :   if (TLI.isTypeDesirableForOp(Opc, VT))
    1150      852575 :     return SDValue();
    1151             : 
    1152        1178 :   EVT PVT = VT;
    1153             :   // Consult target whether it is a good idea to promote this operation and
    1154             :   // what's the right type to promote it to.
    1155        1178 :   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
    1156             :     assert(PVT != VT && "Don't know what type to promote to!");
    1157             : 
    1158             :     DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
    1159             : 
    1160         962 :     bool Replace0 = false;
    1161        1924 :     SDValue N0 = Op.getOperand(0);
    1162         962 :     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
    1163             : 
    1164         962 :     bool Replace1 = false;
    1165        1924 :     SDValue N1 = Op.getOperand(1);
    1166         962 :     SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
    1167        1924 :     SDLoc DL(Op);
    1168             : 
    1169             :     SDValue RV =
    1170        2886 :         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
    1171             : 
    1172             :     // We are always replacing N0/N1's use in N and only need
    1173             :     // additional replacements if there are additional uses.
    1174        1924 :     Replace0 &= !N0->hasOneUse();
    1175        1919 :     Replace1 &= (N0 != N1) && !N1->hasOneUse();
    1176             : 
    1177             :     // Combine Op here so it is presreved past replacements.
    1178        1924 :     CombineTo(Op.getNode(), RV);
    1179             : 
    1180             :     // If operands have a use ordering, make sur we deal with
    1181             :     // predecessor first.
    1182         962 :     if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
    1183             :       std::swap(N0, N1);
    1184             :       std::swap(NN0, NN1);
    1185             :     }
    1186             : 
    1187         962 :     if (Replace0) {
    1188          23 :       AddToWorklist(NN0.getNode());
    1189          23 :       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
    1190             :     }
    1191         962 :     if (Replace1) {
    1192          11 :       AddToWorklist(NN1.getNode());
    1193          11 :       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
    1194             :     }
    1195         962 :     return Op;
    1196             :   }
    1197         216 :   return SDValue();
    1198             : }
    1199             : 
    1200             : /// Promote the specified integer shift operation if the target indicates it is
    1201             : /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
    1202             : /// i32 since i16 instructions are longer.
    1203      251694 : SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
    1204      251694 :   if (!LegalOperations)
    1205      117217 :     return SDValue();
    1206             : 
    1207      268954 :   EVT VT = Op.getValueType();
    1208      134477 :   if (VT.isVector() || !VT.isInteger())
    1209        1656 :     return SDValue();
    1210             : 
    1211             :   // If operation type is 'undesirable', e.g. i16 on x86, consider
    1212             :   // promoting it.
    1213      265642 :   unsigned Opc = Op.getOpcode();
    1214      132821 :   if (TLI.isTypeDesirableForOp(Opc, VT))
    1215      130125 :     return SDValue();
    1216             : 
    1217        2696 :   EVT PVT = VT;
    1218             :   // Consult target whether it is a good idea to promote this operation and
    1219             :   // what's the right type to promote it to.
    1220        2696 :   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
    1221             :     assert(PVT != VT && "Don't know what type to promote to!");
    1222             : 
    1223             :     DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
    1224             : 
    1225         645 :     bool Replace = false;
    1226        1290 :     SDValue N0 = Op.getOperand(0);
    1227        1290 :     SDValue N1 = Op.getOperand(1);
    1228         645 :     if (Opc == ISD::SRA)
    1229           0 :       N0 = SExtPromoteOperand(N0, PVT);
    1230         645 :     else if (Opc == ISD::SRL)
    1231         555 :       N0 = ZExtPromoteOperand(N0, PVT);
    1232             :     else
    1233          90 :       N0 = PromoteOperand(N0, PVT, Replace);
    1234             : 
    1235         645 :     if (!N0.getNode())
    1236           0 :       return SDValue();
    1237             : 
    1238         646 :     SDLoc DL(Op);
    1239             :     SDValue RV =
    1240        1935 :         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
    1241             : 
    1242         645 :     AddToWorklist(N0.getNode());
    1243         645 :     if (Replace)
    1244          34 :       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
    1245             : 
    1246             :     // Deal with Op being deleted.
    1247        1290 :     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
    1248         644 :       return RV;
    1249             :   }
    1250        2052 :   return SDValue();
    1251             : }
    1252             : 
    1253      157650 : SDValue DAGCombiner::PromoteExtend(SDValue Op) {
    1254      157650 :   if (!LegalOperations)
    1255      107326 :     return SDValue();
    1256             : 
    1257      100648 :   EVT VT = Op.getValueType();
    1258       50324 :   if (VT.isVector() || !VT.isInteger())
    1259         677 :     return SDValue();
    1260             : 
    1261             :   // If operation type is 'undesirable', e.g. i16 on x86, consider
    1262             :   // promoting it.
    1263       99294 :   unsigned Opc = Op.getOpcode();
    1264       49647 :   if (TLI.isTypeDesirableForOp(Opc, VT))
    1265       49539 :     return SDValue();
    1266             : 
    1267         108 :   EVT PVT = VT;
    1268             :   // Consult target whether it is a good idea to promote this operation and
    1269             :   // what's the right type to promote it to.
    1270         108 :   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
    1271             :     assert(PVT != VT && "Don't know what type to promote to!");
    1272             :     // fold (aext (aext x)) -> (aext x)
    1273             :     // fold (aext (zext x)) -> (zext x)
    1274             :     // fold (aext (sext x)) -> (sext x)
    1275             :     DEBUG(dbgs() << "\nPromoting ";
    1276             :           Op.getNode()->dump(&DAG));
    1277         624 :     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
    1278             :   }
    1279           4 :   return SDValue();
    1280             : }
    1281             : 
    1282     1493069 : bool DAGCombiner::PromoteLoad(SDValue Op) {
    1283     1493069 :   if (!LegalOperations)
    1284             :     return false;
    1285             : 
    1286     1105396 :   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
    1287             :     return false;
    1288             : 
    1289     1105230 :   EVT VT = Op.getValueType();
    1290      552615 :   if (VT.isVector() || !VT.isInteger())
    1291             :     return false;
    1292             : 
    1293             :   // If operation type is 'undesirable', e.g. i16 on x86, consider
    1294             :   // promoting it.
    1295      749494 :   unsigned Opc = Op.getOpcode();
    1296      374747 :   if (TLI.isTypeDesirableForOp(Opc, VT))
    1297             :     return false;
    1298             : 
    1299        1166 :   EVT PVT = VT;
    1300             :   // Consult target whether it is a good idea to promote this operation and
    1301             :   // what's the right type to promote it to.
    1302        1166 :   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
    1303             :     assert(PVT != VT && "Don't know what type to promote to!");
    1304             : 
    1305           0 :     SDLoc DL(Op);
    1306           0 :     SDNode *N = Op.getNode();
    1307           0 :     LoadSDNode *LD = cast<LoadSDNode>(N);
    1308           0 :     EVT MemVT = LD->getMemoryVT();
    1309           0 :     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
    1310           0 :       ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
    1311             :                                                        : ISD::EXTLOAD)
    1312           0 :       : LD->getExtensionType();
    1313           0 :     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
    1314           0 :                                    LD->getChain(), LD->getBasePtr(),
    1315           0 :                                    MemVT, LD->getMemOperand());
    1316           0 :     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
    1317             : 
    1318             :     DEBUG(dbgs() << "\nPromoting ";
    1319             :           N->dump(&DAG);
    1320             :           dbgs() << "\nTo: ";
    1321             :           Result.getNode()->dump(&DAG);
    1322             :           dbgs() << '\n');
    1323           0 :     WorklistRemover DeadNodes(*this);
    1324           0 :     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
    1325           0 :     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
    1326           0 :     deleteAndRecombine(N);
    1327           0 :     AddToWorklist(Result.getNode());
    1328           0 :     return true;
    1329             :   }
    1330             :   return false;
    1331             : }
    1332             : 
    1333             : /// \brief Recursively delete a node which has no uses and any operands for
    1334             : /// which it is the only use.
    1335             : ///
    1336             : /// Note that this both deletes the nodes and removes them from the worklist.
    1337             : /// It also adds any nodes who have had a user deleted to the worklist as they
    1338             : /// may now have only one use and subject to other combines.
    1339    27698126 : bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
    1340    27698126 :   if (!N->use_empty())
    1341             :     return false;
    1342             : 
    1343     1829738 :   SmallSetVector<SDNode *, 16> Nodes;
    1344     1829738 :   Nodes.insert(N);
    1345             :   do {
    1346     7753576 :     N = Nodes.pop_back_val();
    1347     7753576 :     if (!N)
    1348           0 :       continue;
    1349             : 
    1350     7753576 :     if (N->use_empty()) {
    1351    23072180 :       for (const SDValue &ChildN : N->op_values())
    1352     6400060 :         Nodes.insert(ChildN.getNode());
    1353             : 
    1354     3424020 :       removeFromWorklist(N);
    1355     3424020 :       DAG.DeleteNode(N);
    1356             :     } else {
    1357     4329556 :       AddToWorklist(N);
    1358             :     }
    1359     7753576 :   } while (!Nodes.empty());
    1360     1829738 :   return true;
    1361             : }
    1362             : 
    1363             : //===----------------------------------------------------------------------===//
    1364             : //  Main DAG Combiner implementation
    1365             : //===----------------------------------------------------------------------===//
    1366             : 
    1367      674878 : void DAGCombiner::Run(CombineLevel AtLevel) {
    1368             :   // set the instance variables, so that the various visit routines may use it.
    1369      674878 :   Level = AtLevel;
    1370      674878 :   LegalOperations = Level >= AfterLegalizeVectorOps;
    1371      674878 :   LegalTypes = Level >= AfterLegalizeTypes;
    1372             : 
    1373             :   // Add all the dag nodes to the worklist.
    1374    23039202 :   for (SDNode &Node : DAG.allnodes())
    1375    21689446 :     AddToWorklist(&Node);
    1376             : 
    1377             :   // Create a dummy node (which is not added to allnodes), that adds a reference
    1378             :   // to the root node, preventing it from being deleted, and tracking any
    1379             :   // changes of the root.
    1380     2024634 :   HandleSDNode Dummy(DAG.getRoot());
    1381             : 
    1382             :   // While the worklist isn't empty, find a node and try to combine it.
    1383    54150818 :   while (!WorklistMap.empty()) {
    1384             :     SDNode *N;
    1385             :     // The Worklist holds the SDNodes in order, but it may contain null entries.
    1386             :     do {
    1387    55994578 :       N = Worklist.pop_back_val();
    1388    27997289 :     } while (!N);
    1389             : 
    1390    26400531 :     bool GoodWorklistEntry = WorklistMap.erase(N);
    1391             :     (void)GoodWorklistEntry;
    1392             :     assert(GoodWorklistEntry &&
    1393             :            "Found a worklist entry without a corresponding map entry!");
    1394             : 
    1395             :     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
    1396             :     // N is deleted from the DAG, since they too may now be dead or may have a
    1397             :     // reduced number of uses, allowing other xforms.
    1398    26400531 :     if (recursivelyDeleteUnusedNodes(N))
    1399    25635424 :       continue;
    1400             : 
    1401    27165638 :     WorklistRemover DeadNodes(*this);
    1402             : 
    1403             :     // If this combine is running after legalizing the DAG, re-legalize any
    1404             :     // nodes pulled off the worklist.
    1405    25868297 :     if (Level == AfterLegalizeDAG) {
    1406    20735372 :       SmallSetVector<SDNode *, 16> UpdatedNodes;
    1407    10371258 :       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
    1408             : 
    1409    31129770 :       for (SDNode *LN : UpdatedNodes) {
    1410       15996 :         AddToWorklist(LN);
    1411       15996 :         AddUsersToWorklist(LN);
    1412             :       }
    1413    10371258 :       if (!NIsValid)
    1414        7144 :         continue;
    1415             :     }
    1416             : 
    1417             :     DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
    1418             : 
    1419             :     // Add any operands of the new node which have not yet been combined to the
    1420             :     // worklist as well. Because the worklist uniques things already, this
    1421             :     // won't repeatedly process the same operand.
    1422    25861153 :     CombinedNodes.insert(N);
    1423   177762397 :     for (const SDValue &ChildN : N->op_values())
    1424    50089469 :       if (!CombinedNodes.count(ChildN.getNode()))
    1425    43600099 :         AddToWorklist(ChildN.getNode());
    1426             : 
    1427    25861153 :     SDValue RV = combine(N);
    1428             : 
    1429    25861153 :     if (!RV.getNode())
    1430    23976253 :       continue;
    1431             : 
    1432     1884900 :     ++NodesCombined;
    1433             : 
    1434             :     // If we get back the same node we passed in, rather than a new node or
    1435             :     // zero, we know that the node must have defined multiple values and
    1436             :     // CombineTo was used.  Since CombineTo takes care of the worklist
    1437             :     // mechanics for us, we have no work to do in this case.
    1438     1884900 :     if (RV.getNode() == N)
    1439      587559 :       continue;
    1440             : 
    1441             :     assert(N->getOpcode() != ISD::DELETED_NODE &&
    1442             :            RV.getOpcode() != ISD::DELETED_NODE &&
    1443             :            "Node was deleted but visit returned new node!");
    1444             : 
    1445             :     DEBUG(dbgs() << " ... into: ";
    1446             :           RV.getNode()->dump(&DAG));
    1447             : 
    1448     1297341 :     if (N->getNumValues() == RV.getNode()->getNumValues())
    1449     1183352 :       DAG.ReplaceAllUsesWith(N, RV.getNode());
    1450             :     else {
    1451             :       assert(N->getValueType(0) == RV.getValueType() &&
    1452             :              N->getNumValues() == 1 && "Type mismatch");
    1453      113989 :       DAG.ReplaceAllUsesWith(N, &RV);
    1454             :     }
    1455             : 
    1456             :     // Push the new node and any users onto the worklist
    1457     1297341 :     AddToWorklist(RV.getNode());
    1458     2594682 :     AddUsersToWorklist(RV.getNode());
    1459             : 
    1460             :     // Finally, if the node is now dead, remove it from the graph.  The node
    1461             :     // may not be dead if the replacement process recursively simplified to
    1462             :     // something else needing this node. This will also take care of adding any
    1463             :     // operands which have lost a user to the worklist.
    1464     1297341 :     recursivelyDeleteUnusedNodes(N);
    1465             :   }
    1466             : 
    1467             :   // If the root changed (e.g. it was a dead load, update the root).
    1468     1349756 :   DAG.setRoot(Dummy.getValue());
    1469      674878 :   DAG.RemoveDeadNodes();
    1470      674878 : }
    1471             : 
    1472    25862697 : SDValue DAGCombiner::visit(SDNode *N) {
    1473    51725394 :   switch (N->getOpcode()) {
    1474             :   default: break;
    1475     1596529 :   case ISD::TokenFactor:        return visitTokenFactor(N);
    1476       57226 :   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
    1477     1882327 :   case ISD::ADD:                return visitADD(N);
    1478      375507 :   case ISD::SUB:                return visitSUB(N);
    1479         696 :   case ISD::ADDC:               return visitADDC(N);
    1480       72700 :   case ISD::UADDO:              return visitUADDO(N);
    1481         242 :   case ISD::SUBC:               return visitSUBC(N);
    1482         969 :   case ISD::USUBO:              return visitUSUBO(N);
    1483         751 :   case ISD::ADDE:               return visitADDE(N);
    1484       87352 :   case ISD::ADDCARRY:           return visitADDCARRY(N);
    1485         148 :   case ISD::SUBE:               return visitSUBE(N);
    1486         214 :   case ISD::SUBCARRY:           return visitSUBCARRY(N);
    1487       42452 :   case ISD::MUL:                return visitMUL(N);
    1488        2316 :   case ISD::SDIV:               return visitSDIV(N);
    1489        2172 :   case ISD::UDIV:               return visitUDIV(N);
    1490        2777 :   case ISD::SREM:
    1491        2777 :   case ISD::UREM:               return visitREM(N);
    1492        2242 :   case ISD::MULHU:              return visitMULHU(N);
    1493         931 :   case ISD::MULHS:              return visitMULHS(N);
    1494         713 :   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
    1495        1723 :   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
    1496          78 :   case ISD::SMULO:              return visitSMULO(N);
    1497         117 :   case ISD::UMULO:              return visitUMULO(N);
    1498       10976 :   case ISD::SMIN:
    1499             :   case ISD::SMAX:
    1500             :   case ISD::UMIN:
    1501       10976 :   case ISD::UMAX:               return visitIMINMAX(N);
    1502      201432 :   case ISD::AND:                return visitAND(N);
    1503       99942 :   case ISD::OR:                 return visitOR(N);
    1504       77008 :   case ISD::XOR:                return visitXOR(N);
    1505      172037 :   case ISD::SHL:                return visitSHL(N);
    1506       23648 :   case ISD::SRA:                return visitSRA(N);
    1507      128022 :   case ISD::SRL:                return visitSRL(N);
    1508        1600 :   case ISD::ROTR:
    1509        1600 :   case ISD::ROTL:               return visitRotate(N);
    1510         763 :   case ISD::ABS:                return visitABS(N);
    1511        1254 :   case ISD::BSWAP:              return visitBSWAP(N);
    1512         462 :   case ISD::BITREVERSE:         return visitBITREVERSE(N);
    1513         920 :   case ISD::CTLZ:               return visitCTLZ(N);
    1514         459 :   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
    1515         354 :   case ISD::CTTZ:               return visitCTTZ(N);
    1516         349 :   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
    1517        1202 :   case ISD::CTPOP:              return visitCTPOP(N);
    1518       50696 :   case ISD::SELECT:             return visitSELECT(N);
    1519       26306 :   case ISD::VSELECT:            return visitVSELECT(N);
    1520       15094 :   case ISD::SELECT_CC:          return visitSELECT_CC(N);
    1521      152726 :   case ISD::SETCC:              return visitSETCC(N);
    1522          89 :   case ISD::SETCCE:             return visitSETCCE(N);
    1523         155 :   case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
    1524       22642 :   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
    1525      130342 :   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
    1526       35147 :   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
    1527       46858 :   case ISD::AssertZext:         return visitAssertZext(N);
    1528       40913 :   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
    1529        2926 :   case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
    1530        3592 :   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
    1531      154503 :   case ISD::TRUNCATE:           return visitTRUNCATE(N);
    1532      465355 :   case ISD::BITCAST:            return visitBITCAST(N);
    1533       16196 :   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
    1534       27222 :   case ISD::FADD:               return visitFADD(N);
    1535        7472 :   case ISD::FSUB:               return visitFSUB(N);
    1536       16688 :   case ISD::FMUL:               return visitFMUL(N);
    1537        6325 :   case ISD::FMA:                return visitFMA(N);
    1538        3486 :   case ISD::FDIV:               return visitFDIV(N);
    1539         264 :   case ISD::FREM:               return visitFREM(N);
    1540        1224 :   case ISD::FSQRT:              return visitFSQRT(N);
    1541         857 :   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
    1542        8419 :   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
    1543        3976 :   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
    1544        3746 :   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
    1545        2518 :   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
    1546        3536 :   case ISD::FP_ROUND:           return visitFP_ROUND(N);
    1547           0 :   case ISD::FP_ROUND_INREG:     return visitFP_ROUND_INREG(N);
    1548        6069 :   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
    1549        4381 :   case ISD::FNEG:               return visitFNEG(N);
    1550        2280 :   case ISD::FABS:               return visitFABS(N);
    1551        1013 :   case ISD::FFLOOR:             return visitFFLOOR(N);
    1552        2516 :   case ISD::FMINNUM:            return visitFMINNUM(N);
    1553        2189 :   case ISD::FMAXNUM:            return visitFMAXNUM(N);
    1554         921 :   case ISD::FCEIL:              return visitFCEIL(N);
    1555        1114 :   case ISD::FTRUNC:             return visitFTRUNC(N);
    1556      127972 :   case ISD::BRCOND:             return visitBRCOND(N);
    1557        5632 :   case ISD::BR_CC:              return visitBR_CC(N);
    1558     1630910 :   case ISD::LOAD:               return visitLOAD(N);
    1559     2715150 :   case ISD::STORE:              return visitSTORE(N);
    1560       33706 :   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
    1561      218032 :   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
    1562      297116 :   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
    1563       21926 :   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
    1564       42269 :   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
    1565       40596 :   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
    1566       13583 :   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
    1567       12870 :   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
    1568         642 :   case ISD::MGATHER:            return visitMGATHER(N);
    1569         725 :   case ISD::MLOAD:              return visitMLOAD(N);
    1570         282 :   case ISD::MSCATTER:           return visitMSCATTER(N);
    1571         369 :   case ISD::MSTORE:             return visitMSTORE(N);
    1572        2951 :   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
    1573        3688 :   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
    1574             :   }
    1575    14573912 :   return SDValue();
    1576             : }
    1577             : 
    1578    25862697 : SDValue DAGCombiner::combine(SDNode *N) {
    1579    25862697 :   SDValue RV = visit(N);
    1580             : 
    1581             :   // If nothing happened, try a target-specific DAG combine.
    1582    25862697 :   if (!RV.getNode()) {
    1583             :     assert(N->getOpcode() != ISD::DELETED_NODE &&
    1584             :            "Node was deleted but visit returned NULL!");
    1585             : 
    1586    70485276 :     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
    1587    44281552 :         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
    1588             : 
    1589             :       // Expose the DAG combiner to the target combiner impls.
    1590             :       TargetLowering::DAGCombinerInfo
    1591    18733060 :         DagCombineInfo(DAG, Level, false, this);
    1592             : 
    1593     9366530 :       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
    1594             :     }
    1595             :   }
    1596             : 
    1597             :   // If nothing happened still, try promoting the operation.
    1598    25862697 :   if (!RV.getNode()) {
    1599    47957216 :     switch (N->getOpcode()) {
    1600             :     default: break;
    1601     2243065 :     case ISD::ADD:
    1602             :     case ISD::SUB:
    1603             :     case ISD::MUL:
    1604             :     case ISD::AND:
    1605             :     case ISD::OR:
    1606             :     case ISD::XOR:
    1607     2243065 :       RV = PromoteIntBinOp(SDValue(N, 0));
    1608     2243065 :       break;
    1609      251694 :     case ISD::SHL:
    1610             :     case ISD::SRA:
    1611             :     case ISD::SRL:
    1612      251694 :       RV = PromoteIntShiftOp(SDValue(N, 0));
    1613      251694 :       break;
    1614      157650 :     case ISD::SIGN_EXTEND:
    1615             :     case ISD::ZERO_EXTEND:
    1616             :     case ISD::ANY_EXTEND:
    1617      157650 :       RV = PromoteExtend(SDValue(N, 0));
    1618      157650 :       break;
    1619     1493069 :     case ISD::LOAD:
    1620     1493069 :       if (PromoteLoad(SDValue(N, 0)))
    1621           0 :         RV = SDValue(N, 0);
    1622             :       break;
    1623             :     }
    1624             :   }
    1625             : 
    1626             :   // If N is a commutative binary node, try eliminate it if the commuted
    1627             :   // version is already present in the DAG.
    1628    51841866 :   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
    1629     2002271 :       N->getNumValues() == 1) {
    1630     3853924 :     SDValue N0 = N->getOperand(0);
    1631     3853924 :     SDValue N1 = N->getOperand(1);
    1632             : 
    1633             :     // Constant operands are canonicalized to RHS.
    1634     3835888 :     if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
    1635      568999 :       SDValue Ops[] = {N1, N0};
    1636     2844995 :       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
    1637     1137998 :                                             N->getFlags());
    1638      568999 :       if (CSENode)
    1639          17 :         return SDValue(CSENode, 0);
    1640             :     }
    1641             :   }
    1642             : 
    1643    25862680 :   return RV;
    1644             : }
    1645             : 
    1646             : /// Given a node, return its input chain if it has one, otherwise return a null
    1647             : /// sd operand.
    1648     2061304 : static SDValue getInputChainForNode(SDNode *N) {
    1649     4122608 :   if (unsigned NumOps = N->getNumOperands()) {
    1650     8240708 :     if (N->getOperand(0).getValueType() == MVT::Other)
    1651     4071870 :       return N->getOperand(0);
    1652       72726 :     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
    1653       48484 :       return N->getOperand(NumOps-1);
    1654           0 :     for (unsigned i = 1; i < NumOps-1; ++i)
    1655           0 :       if (N->getOperand(i).getValueType() == MVT::Other)
    1656           0 :         return N->getOperand(i);
    1657             :   }
    1658        1127 :   return SDValue();
    1659             : }
    1660             : 
    1661     1596529 : SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
    1662             :   // If N has two operands, where one has an input chain equal to the other,
    1663             :   // the 'other' chain is redundant.
    1664     1596529 :   if (N->getNumOperands() == 2) {
    1665     4164708 :     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
    1666       42100 :       return N->getOperand(0);
    1667     4018490 :     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
    1668      124036 :       return N->getOperand(1);
    1669             :   }
    1670             : 
    1671     1513461 :   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
    1672     3026922 :   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
    1673     3026922 :   SmallPtrSet<SDNode*, 16> SeenOps;
    1674     1513461 :   bool Changed = false;             // If we should replace this token factor.
    1675             : 
    1676             :   // Start out with this token factor.
    1677     1513461 :   TFs.push_back(N);
    1678             : 
    1679             :   // Iterate through token factors.  The TFs grows when new token factors are
    1680             :   // encountered.
    1681     7082494 :   for (unsigned i = 0; i < TFs.size(); ++i) {
    1682     4055572 :     SDNode *TF = TFs[i];
    1683             : 
    1684             :     // Check each of the operands.
    1685    16822968 :     for (const SDValue &Op : TF->op_values()) {
    1686             : 
    1687     6383698 :       switch (Op.getOpcode()) {
    1688        5723 :       case ISD::EntryToken:
    1689             :         // Entry tokens don't need to be added to the list. They are
    1690             :         // redundant.
    1691        5723 :         Changed = true;
    1692        5723 :         break;
    1693             : 
    1694      930217 :       case ISD::TokenFactor:
    1695     1444542 :         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
    1696             :           // Queue up for processing.
    1697      514325 :           TFs.push_back(Op.getNode());
    1698             :           // Clean up in case the token factor is removed.
    1699      514325 :           AddToWorklist(Op.getNode());
    1700      514325 :           Changed = true;
    1701      514325 :           break;
    1702             :         }
    1703             :         LLVM_FALLTHROUGH;
    1704             : 
    1705             :       default:
    1706             :         // Only add if it isn't already in the list.
    1707     5863650 :         if (SeenOps.insert(Op.getNode()).second)
    1708     5784303 :           Ops.push_back(Op);
    1709             :         else
    1710       79347 :           Changed = true;
    1711             :         break;
    1712             :       }
    1713             :     }
    1714             :   }
    1715             : 
    1716             :   // Remove Nodes that are chained to another node in the list. Do so
    1717             :   // by walking up chains breath-first stopping when we've seen
    1718             :   // another operand. In general we must climb to the EntryNode, but we can exit
    1719             :   // early if we find all remaining work is associated with just one operand as
    1720             :   // no further pruning is possible.
    1721             : 
    1722             :   // List of nodes to search through and original Ops from which they originate.
    1723     3026922 :   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
    1724     3026922 :   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
    1725     3026922 :   SmallPtrSet<SDNode *, 16> SeenChains;
    1726     1513461 :   bool DidPruneOps = false;
    1727             : 
    1728     1513461 :   unsigned NumLeftToConsider = 0;
    1729    10324686 :   for (const SDValue &Op : Ops) {
    1730    11568606 :     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
    1731     5784303 :     OpWorkCount.push_back(1);
    1732             :   }
    1733             : 
    1734    12721433 :   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
    1735             :     // If this is an Op, we can remove the op from the list. Remark any
    1736             :     // search associated with it as from the current OpNumber.
    1737    12721433 :     if (SeenOps.count(Op) != 0) {
    1738      680020 :       Changed = true;
    1739      680020 :       DidPruneOps = true;
    1740      680020 :       unsigned OrigOpNumber = 0;
    1741   102864536 :       while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
    1742    25206119 :         OrigOpNumber++;
    1743             :       assert((OrigOpNumber != Ops.size()) &&
    1744             :              "expected to find TokenFactor Operand");
    1745             :       // Re-mark worklist from OrigOpNumber to OpNumber
    1746    49840311 :       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
    1747    41429788 :         if (Worklist[i].second == OrigOpNumber) {
    1748      793138 :           Worklist[i].second = OpNumber;
    1749             :         }
    1750             :       }
    1751     9090543 :       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
    1752     1360040 :       OpWorkCount[OrigOpNumber] = 0;
    1753      680020 :       NumLeftToConsider--;
    1754             :     }
    1755             :     // Add if it's a new chain
    1756    12721433 :     if (SeenChains.insert(Op).second) {
    1757    21151449 :       OpWorkCount[OpNumber]++;
    1758    14100966 :       Worklist.push_back(std::make_pair(Op, OpNumber));
    1759             :     }
    1760    14234894 :   };
    1761             : 
    1762    24272478 :   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
    1763             :     // We need at least be consider at least 2 Ops to prune.
    1764    11853882 :     if (NumLeftToConsider <= 1)
    1765             :       break;
    1766    21245556 :     auto CurNode = Worklist[i].first;
    1767    21245556 :     auto CurOpNumber = Worklist[i].second;
    1768             :     assert((OpWorkCount[CurOpNumber] > 0) &&
    1769             :            "Node should not appear in worklist");
    1770    21245556 :     switch (CurNode->getOpcode()) {
    1771      296057 :     case ISD::EntryToken:
    1772             :       // Hitting EntryToken is the only way for the search to terminate without
    1773             :       // hitting
    1774             :       // another operand's search. Prevent us from marking this operand
    1775             :       // considered.
    1776      296057 :       NumLeftToConsider++;
    1777      296057 :       break;
    1778     1518754 :     case ISD::TokenFactor:
    1779    12210252 :       for (const SDValue &Op : CurNode->op_values())
    1780     4586372 :         AddToWorklist(i, Op.getNode(), CurOpNumber);
    1781             :       break;
    1782      432509 :     case ISD::CopyFromReg:
    1783             :     case ISD::CopyToReg:
    1784      865018 :       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
    1785      432509 :       break;
    1786     8375458 :     default:
    1787     7702552 :       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
    1788     7702552 :         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
    1789             :       break;
    1790             :     }
    1791    21245556 :     OpWorkCount[CurOpNumber]--;
    1792    21245556 :     if (OpWorkCount[CurOpNumber] == 0)
    1793     4055642 :       NumLeftToConsider--;
    1794             :   }
    1795             : 
    1796             :   // If we've changed things around then replace token factor.
    1797     1513461 :   if (Changed) {
    1798      411262 :     SDValue Result;
    1799      411262 :     if (Ops.empty()) {
    1800             :       // The entry token is the only possible outcome.
    1801         420 :       Result = DAG.getEntryNode();
    1802             :     } else {
    1803      411052 :       if (DidPruneOps) {
    1804      530762 :         SmallVector<SDValue, 8> PrunedOps;
    1805             :         //
    1806     1688758 :         for (const SDValue &Op : Ops) {
    1807     1423377 :           if (SeenChains.count(Op.getNode()) == 0)
    1808      886646 :             PrunedOps.push_back(Op);
    1809             :         }
    1810     1592286 :         Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps);
    1811             :       } else {
    1812      874026 :         Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
    1813             :       }
    1814             :     }
    1815      411262 :     return Result;
    1816             :   }
    1817     1102199 :   return SDValue();
    1818             : }
    1819             : 
    1820             : /// MERGE_VALUES can always be eliminated.
    1821       57226 : SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
    1822      114452 :   WorklistRemover DeadNodes(*this);
    1823             :   // Replacing results may cause a different MERGE_VALUES to suddenly
    1824             :   // be CSE'd with N, and carry its uses with it. Iterate until no
    1825             :   // uses remain, to ensure that the node can be safely deleted.
    1826             :   // First add the users of this node to the work list so that they
    1827             :   // can be tried again once they have new operands.
    1828             :   AddUsersToWorklist(N);
    1829             :   do {
    1830      232719 :     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
    1831      354801 :       DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
    1832       57226 :   } while (!N->use_empty());
    1833       57226 :   deleteAndRecombine(N);
    1834      114452 :   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
    1835             : }
    1836             : 
    1837             : /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
    1838             : /// ConstantSDNode pointer else nullptr.
    1839             : static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
    1840       82125 :   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
    1841       82125 :   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
    1842             : }
    1843             : 
    1844     3046100 : SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
    1845     6092200 :   auto BinOpcode = BO->getOpcode();
    1846             :   assert((BinOpcode == ISD::ADD || BinOpcode == ISD::SUB ||
    1847             :           BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV ||
    1848             :           BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM ||
    1849             :           BinOpcode == ISD::UREM || BinOpcode == ISD::AND ||
    1850             :           BinOpcode == ISD::OR || BinOpcode == ISD::XOR ||
    1851             :           BinOpcode == ISD::SHL || BinOpcode == ISD::SRL ||
    1852             :           BinOpcode == ISD::SRA || BinOpcode == ISD::FADD ||
    1853             :           BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL ||
    1854             :           BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) &&
    1855             :          "Unexpected binary operator");
    1856             : 
    1857             :   // Bail out if any constants are opaque because we can't constant fold those.
    1858     6092200 :   SDValue C1 = BO->getOperand(1);
    1859     3046100 :   if (!isConstantOrConstantVector(C1, true) &&
    1860     1598710 :       !isConstantFPBuildVectorOrConstantFP(C1))
    1861      795931 :     return SDValue();
    1862             : 
    1863             :   // Don't do this unless the old select is going away. We want to eliminate the
    1864             :   // binary operator, not replace a binop with a select.
    1865             :   // TODO: Handle ISD::SELECT_CC.
    1866     4500338 :   SDValue Sel = BO->getOperand(0);
    1867     4524762 :   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
    1868     2225996 :     return SDValue();
    1869             : 
    1870       48346 :   SDValue CT = Sel.getOperand(1);
    1871       24173 :   if (!isConstantOrConstantVector(CT, true) &&
    1872         690 :       !isConstantFPBuildVectorOrConstantFP(CT))
    1873         337 :     return SDValue();
    1874             : 
    1875       47672 :   SDValue CF = Sel.getOperand(2);
    1876       23836 :   if (!isConstantOrConstantVector(CF, true) &&
    1877         122 :       !isConstantFPBuildVectorOrConstantFP(CF))
    1878          54 :     return SDValue();
    1879             : 
    1880             :   // We have a select-of-constants followed by a binary operator with a
    1881             :   // constant. Eliminate the binop by pulling the constant math into the select.
    1882             :   // Example: add (select Cond, CT, CF), C1 --> select Cond, CT + C1, CF + C1
    1883       47564 :   EVT VT = Sel.getValueType();
    1884       23782 :   SDLoc DL(Sel);
    1885       47564 :   SDValue NewCT = DAG.getNode(BinOpcode, DL, VT, CT, C1);
    1886             :   assert((NewCT.isUndef() || isConstantOrConstantVector(NewCT) ||
    1887             :           isConstantFPBuildVectorOrConstantFP(NewCT)) &&
    1888             :          "Failed to constant fold a binop with constant operands");
    1889             : 
    1890       47564 :   SDValue NewCF = DAG.getNode(BinOpcode, DL, VT, CF, C1);
    1891             :   assert((NewCF.isUndef() || isConstantOrConstantVector(NewCF) ||
    1892             :           isConstantFPBuildVectorOrConstantFP(NewCF)) &&
    1893             :          "Failed to constant fold a binop with constant operands");
    1894             : 
    1895       47564 :   return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
    1896             : }
    1897             : 
    1898     1882327 : SDValue DAGCombiner::visitADD(SDNode *N) {
    1899     3764654 :   SDValue N0 = N->getOperand(0);
    1900     3764654 :   SDValue N1 = N->getOperand(1);
    1901     3764654 :   EVT VT = N0.getValueType();
    1902     3764654 :   SDLoc DL(N);
    1903             : 
    1904             :   // fold vector ops
    1905     1882327 :   if (VT.isVector()) {
    1906       98538 :     if (SDValue FoldedVOp = SimplifyVBinOp(N))
    1907          18 :       return FoldedVOp;
    1908             : 
    1909             :     // fold (add x, 0) -> x, vector edition
    1910       98520 :     if (ISD::isBuildVectorAllZeros(N1.getNode()))
    1911          76 :       return N0;
    1912       98444 :     if (ISD::isBuildVectorAllZeros(N0.getNode()))
    1913          59 :       return N1;
    1914             :   }
    1915             : 
    1916             :   // fold (add x, undef) -> undef
    1917     3764348 :   if (N0.isUndef())
    1918           3 :     return N0;
    1919             : 
    1920     3764342 :   if (N1.isUndef())
    1921           4 :     return N1;
    1922             : 
    1923     1882167 :   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
    1924             :     // canonicalize constant to RHS
    1925        1593 :     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
    1926        2088 :       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
    1927             :     // fold (add c1, c2) -> c1+c2
    1928         549 :     return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
    1929         549 :                                       N1.getNode());
    1930             :   }
    1931             : 
    1932             :   // fold (add x, 0) -> x
    1933     1880574 :   if (isNullConstant(N1))
    1934          20 :     return N0;
    1935             : 
    1936     1880554 :   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
    1937             :     // fold ((c1-A)+c2) -> (c1+c2)-A
    1938     2871992 :     if (N0.getOpcode() == ISD::SUB &&
    1939         692 :         isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
    1940             :       // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
    1941          24 :       return DAG.getNode(ISD::SUB, DL, VT,
    1942          72 :                          DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
    1943          96 :                          N0.getOperand(1));
    1944             :     }
    1945             : 
    1946             :     // add (sext i1 X), 1 -> zext (not i1 X)
    1947             :     // We don't transform this pattern:
    1948             :     //   add (zext i1 X), -1 -> sext (not i1 X)
    1949             :     // because most (?) targets generate better code for the zext form.
    1950     2871976 :     if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
    1951         180 :         isOneConstantOrOneSplatConstant(N1)) {
    1952          46 :       SDValue X = N0.getOperand(0);
    1953          23 :       if ((!LegalOperations ||
    1954           2 :            (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
    1955          45 :             TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
    1956          22 :           X.getScalarValueSizeInBits() == 1) {
    1957          34 :         SDValue Not = DAG.getNOT(DL, X, X.getValueType());
    1958          34 :         return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
    1959             :       }
    1960             :     }
    1961             : 
    1962             :     // Undo the add -> or combine to merge constant offsets from a frame index.
    1963     1435782 :     if (N0.getOpcode() == ISD::OR &&
    1964        6154 :         isa<FrameIndexSDNode>(N0.getOperand(0)) &&
    1965     1444305 :         isa<ConstantSDNode>(N0.getOperand(1)) &&
    1966        8523 :         DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
    1967        8523 :       SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
    1968        8523 :       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
    1969             :     }
    1970             :   }
    1971             : 
    1972     1877672 :   if (SDValue NewSel = foldBinOpIntoSelect(N))
    1973           8 :     return NewSel;
    1974             : 
    1975             :   // reassociate add
    1976     1877664 :   if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1))
    1977      175547 :     return RADD;
    1978             : 
    1979             :   // fold ((0-A) + B) -> B-A
    1980     3406505 :   if (N0.getOpcode() == ISD::SUB &&
    1981        4542 :       isNullConstantOrNullSplatConstant(N0.getOperand(0)))
    1982          30 :     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
    1983             : 
    1984             :   // fold (A + (0-B)) -> A-B
    1985     3404745 :   if (N1.getOpcode() == ISD::SUB &&
    1986        1062 :       isNullConstantOrNullSplatConstant(N1.getOperand(0)))
    1987         228 :     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
    1988             : 
    1989             :   // fold (A+(B-A)) -> B
    1990     3404975 :   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
    1991           6 :     return N1.getOperand(0);
    1992             : 
    1993             :   // fold ((B-A)+A) -> B
    1994     3408574 :   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
    1995           4 :     return N0.getOperand(0);
    1996             : 
    1997             :   // fold (A+(B-(A+C))) to (B-C)
    1998     3405408 :   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
    1999          17 :       N0 == N1.getOperand(1).getOperand(0))
    2000           6 :     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
    2001           8 :                        N1.getOperand(1).getOperand(1));
    2002             : 
    2003             :   // fold (A+(B-(C+A))) to (B-C)
    2004     3405398 :   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
    2005          12 :       N0 == N1.getOperand(1).getOperand(1))
    2006           9 :     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
    2007          12 :                        N1.getOperand(1).getOperand(0));
    2008             : 
    2009             :   // fold (A+((B-A)+or-C)) to (B+or-C)
    2010     5108230 :   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
    2011     1708143 :       N1.getOperand(0).getOpcode() == ISD::SUB &&
    2012          41 :       N0 == N1.getOperand(0).getOperand(1))
    2013          20 :     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
    2014          20 :                        N1.getOperand(1));
    2015             : 
    2016             :   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
    2017     3408544 :   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
    2018          24 :     SDValue N00 = N0.getOperand(0);
    2019          24 :     SDValue N01 = N0.getOperand(1);
    2020          24 :     SDValue N10 = N1.getOperand(0);
    2021          24 :     SDValue N11 = N1.getOperand(1);
    2022             : 
    2023          12 :     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
    2024           5 :       return DAG.getNode(ISD::SUB, DL, VT,
    2025          15 :                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
    2026          35 :                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
    2027             :   }
    2028             : 
    2029     1702011 :   if (SimplifyDemandedBits(SDValue(N, 0)))
    2030        2153 :     return SDValue(N, 0);
    2031             : 
    2032             :   // fold (a+b) -> (a|b) iff a and b share no bits.
    2033     3386738 :   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
    2034     1686880 :       DAG.haveNoCommonBitsSet(N0, N1))
    2035       52256 :     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
    2036             : 
    2037     1673730 :   if (SDValue Combined = visitADDLike(N0, N1, N))
    2038         775 :     return Combined;
    2039             : 
    2040     1672955 :   if (SDValue Combined = visitADDLike(N1, N0, N))
    2041          42 :     return Combined;
    2042             : 
    2043     1672913 :   return SDValue();
    2044             : }
    2045             : 
    2046     2953974 : static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
    2047     2953974 :   bool Masked = false;
    2048             : 
    2049             :   // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
    2050             :   while (true) {
    2051     8876356 :     if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
    2052        8416 :       V = V.getOperand(0);
    2053        4208 :       continue;
    2054             :     }
    2055             : 
    2056     5909912 :     if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
    2057         249 :       Masked = true;
    2058         498 :       V = V.getOperand(0);
    2059         249 :       continue;
    2060             :     }
    2061             : 
    2062             :     break;
    2063             :   }
    2064             : 
    2065             :   // If this is not a carry, return.
    2066     2953974 :   if (V.getResNo() != 1)
    2067     2951821 :     return SDValue();
    2068             : 
    2069        7808 :   if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
    2070        7608 :       V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
    2071        1785 :     return SDValue();
    2072             : 
    2073             :   // If the result is masked, then no matter what kind of bool it is we can
    2074             :   // return. If it isn't, then we need to make sure the bool type is either 0 or
    2075             :   // 1 and not other values.
    2076         555 :   if (Masked ||
    2077         374 :       TLI.getBooleanContents(V.getValueType()) ==
    2078             :           TargetLoweringBase::ZeroOrOneBooleanContent)
    2079         368 :     return V;
    2080             : 
    2081           0 :   return SDValue();
    2082             : }
    2083             : 
    2084     3346685 : SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
    2085     6693370 :   EVT VT = N0.getValueType();
    2086     6693370 :   SDLoc DL(LocReference);
    2087             : 
    2088             :   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
    2089     6957366 :   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
    2090        1614 :       isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0)))
    2091          30 :     return DAG.getNode(ISD::SUB, DL, VT, N0,
    2092          30 :                        DAG.getNode(ISD::SHL, DL, VT,
    2093          90 :                                    N1.getOperand(0).getOperand(1),
    2094         120 :                                    N1.getOperand(1)));
    2095             : 
    2096     6693310 :   if (N1.getOpcode() == ISD::AND) {
    2097        3788 :     SDValue AndOp0 = N1.getOperand(0);
    2098        1894 :     unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
    2099        1894 :     unsigned DestBits = VT.getScalarSizeInBits();
    2100             : 
    2101             :     // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
    2102             :     // and similar xforms where the inner op is either ~0 or 0.
    2103        1950 :     if (NumSignBits == DestBits &&
    2104         112 :         isOneConstantOrOneSplatConstant(N1->getOperand(1)))
    2105          98 :       return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0);
    2106             :   }
    2107             : 
    2108             :   // add (sext i1), X -> sub X, (zext i1)
    2109     3346606 :   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
    2110     3350536 :       N0.getOperand(0).getValueType() == MVT::i1 &&
    2111         256 :       !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
    2112         294 :     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
    2113         196 :     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
    2114             :   }
    2115             : 
    2116             :   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
    2117     6693016 :   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
    2118        2901 :     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
    2119        1592 :     if (TN->getVT() == MVT::i1) {
    2120        1875 :       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
    2121        1250 :                                  DAG.getConstant(1, DL, VT));
    2122        1250 :       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
    2123             :     }
    2124             :   }
    2125             : 
    2126             :   // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
    2127     6691779 :   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)))
    2128          11 :     return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
    2129          44 :                        N0, N1.getOperand(0), N1.getOperand(2));
    2130             : 
    2131             :   // (add X, Carry) -> (addcarry X, 0, Carry)
    2132     3345872 :   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
    2133     2636841 :     if (SDValue Carry = getAsCarry(TLI, N1))
    2134           4 :       return DAG.getNode(ISD::ADDCARRY, DL,
    2135           4 :                          DAG.getVTList(VT, Carry.getValueType()), N0,
    2136          12 :                          DAG.getConstant(0, DL, VT), Carry);
    2137             : 
    2138     3345868 :   return SDValue();
    2139             : }
    2140             : 
    2141         696 : SDValue DAGCombiner::visitADDC(SDNode *N) {
    2142        1392 :   SDValue N0 = N->getOperand(0);
    2143        1392 :   SDValue N1 = N->getOperand(1);
    2144        1392 :   EVT VT = N0.getValueType();
    2145        1392 :   SDLoc DL(N);
    2146             : 
    2147             :   // If the flag result is dead, turn this into an ADD.
    2148         696 :   if (!N->hasAnyUseOfValue(1))
    2149          17 :     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
    2150          68 :                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
    2151             : 
    2152             :   // canonicalize constant to RHS.
    2153         679 :   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
    2154         679 :   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
    2155         679 :   if (N0C && !N1C)
    2156           0 :     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
    2157             : 
    2158             :   // fold (addc x, 0) -> x + no carry out
    2159         679 :   if (isNullConstant(N1))
    2160           0 :     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
    2161           0 :                                         DL, MVT::Glue));
    2162             : 
    2163             :   // If it cannot overflow, transform into an add.
    2164         679 :   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
    2165           0 :     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
    2166           0 :                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
    2167             : 
    2168         679 :   return SDValue();
    2169             : }
    2170             : 
    2171       72700 : SDValue DAGCombiner::visitUADDO(SDNode *N) {
    2172      145400 :   SDValue N0 = N->getOperand(0);
    2173      145400 :   SDValue N1 = N->getOperand(1);
    2174      145400 :   EVT VT = N0.getValueType();
    2175       72700 :   if (VT.isVector())
    2176           0 :     return SDValue();
    2177             : 
    2178      145400 :   EVT CarryVT = N->getValueType(1);
    2179       72700 :   SDLoc DL(N);
    2180             : 
    2181             :   // If the flag result is dead, turn this into an ADD.
    2182       72700 :   if (!N->hasAnyUseOfValue(1))
    2183         331 :     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
    2184         993 :                      DAG.getUNDEF(CarryVT));
    2185             : 
    2186             :   // canonicalize constant to RHS.
    2187       72369 :   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
    2188       72369 :   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
    2189       72369 :   if (N0C && !N1C)
    2190           4 :     return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0);
    2191             : 
    2192             :   // fold (uaddo x, 0) -> x + no carry out
    2193       72367 :   if (isNullConstant(N1))
    2194         256 :     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
    2195             : 
    2196             :   // If it cannot overflow, transform into an add.
    2197       72111 :   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
    2198          15 :     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
    2199          45 :                      DAG.getConstant(0, DL, CarryVT));
    2200             : 
    2201       72096 :   if (SDValue Combined = visitUADDOLike(N0, N1, N))
    2202          41 :     return Combined;
    2203             : 
    2204       72055 :   if (SDValue Combined = visitUADDOLike(N1, N0, N))
    2205           7 :     return Combined;
    2206             : 
    2207       72048 :   return SDValue();
    2208             : }
    2209             : 
    2210      144151 : SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
    2211      288302 :   auto VT = N0.getValueType();
    2212             : 
    2213             :   // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
    2214             :   // If Y + 1 cannot overflow.
    2215      288504 :   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
    2216         318 :     SDValue Y = N1.getOperand(0);
    2217         636 :     SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
    2218         159 :     if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
    2219         144 :       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
    2220         192 :                          N1.getOperand(2));
    2221             :   }
    2222             : 
    2223             :   // (uaddo X, Carry) -> (addcarry X, 0, Carry)
    2224      144103 :   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
    2225      143821 :     if (SDValue Carry = getAsCarry(TLI, N1))
    2226           0 :       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
    2227           0 :                          DAG.getConstant(0, SDLoc(N), VT), Carry);
    2228             : 
    2229      144103 :   return SDValue();
    2230             : }
    2231             : 
    2232         751 : SDValue DAGCombiner::visitADDE(SDNode *N) {
    2233        1502 :   SDValue N0 = N->getOperand(0);
    2234        1502 :   SDValue N1 = N->getOperand(1);
    2235        1502 :   SDValue CarryIn = N->getOperand(2);
    2236             : 
    2237             :   // canonicalize constant to RHS
    2238         751 :   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
    2239         751 :   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
    2240         751 :   if (N0C && !N1C)
    2241          18 :     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
    2242          18 :                        N1, N0, CarryIn);
    2243             : 
    2244             :   // fold (adde x, y, false) -> (addc x, y)
    2245        1490 :   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
    2246           0 :     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
    2247             : 
    2248         745 :   return SDValue();
    2249             : }
    2250             : 
    2251       87352 : SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
    2252      174704 :   SDValue N0 = N->getOperand(0);
    2253      174704 :   SDValue N1 = N->getOperand(1);
    2254      174704 :   SDValue CarryIn = N->getOperand(2);
    2255      174704 :   SDLoc DL(N);
    2256             : 
    2257             :   // canonicalize constant to RHS
    2258       87352 :   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
    2259       87352 :   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
    2260       87352 :   if (N0C && !N1C)
    2261         346 :     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
    2262             : 
    2263             :   // fold (addcarry x, y, false) -> (uaddo x, y)
    2264       87179 :   if (isNullConstant(CarryIn))
    2265         508 :     return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
    2266             : 
    2267             :   // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
    2268       86925 :   if (isNullConstant(N0) && isNullConstant(N1)) {
    2269         438 :     EVT VT = N0.getValueType();
    2270         438 :     EVT CarryVT = CarryIn.getValueType();
    2271         219 :     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
    2272         219 :     AddToWorklist(CarryExt.getNode());
    2273         219 :     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
    2274             :                                     DAG.getConstant(1, DL, VT)),
    2275         657 :                      DAG.getConstant(0, DL, CarryVT));
    2276             :   }
    2277             : 
    2278       86706 :   if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
    2279          51 :     return Combined;
    2280             : 
    2281       86655 :   if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
    2282           0 :     return Combined;
    2283             : 
    2284       86655 :   return SDValue();
    2285             : }
    2286             : 
    2287      173361 : SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
    2288             :                                        SDNode *N) {
    2289             :   // Iff the flag result is dead:
    2290             :   // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
    2291      346481 :   if ((N0.getOpcode() == ISD::ADD ||
    2292      176507 :        (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) &&
    2293      176312 :       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
    2294         147 :     return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
    2295         245 :                        N0.getOperand(0), N0.getOperand(1), CarryIn);
    2296             : 
    2297             :   /**
    2298             :    * When one of the addcarry argument is itself a carry, we may be facing
    2299             :    * a diamond carry propagation. In which case we try to transform the DAG
    2300             :    * to ensure linear carry propagation if that is possible.
    2301             :    *
    2302             :    * We are trying to get:
    2303             :    *   (addcarry X, 0, (addcarry A, B, Z):Carry)
    2304             :    */
    2305      173312 :   if (auto Y = getAsCarry(TLI, N1)) {
    2306             :     /**
    2307             :      *            (uaddo A, B)
    2308             :      *             /       \
    2309             :      *          Carry      Sum
    2310             :      *            |          \
    2311             :      *            | (addcarry *, 0, Z)
    2312             :      *            |       /
    2313             :      *             \   Carry
    2314             :      *              |   /
    2315             :      * (addcarry X, *, *)
    2316             :      */
    2317         461 :     if (Y.getOpcode() == ISD::UADDO &&
    2318         194 :         CarryIn.getResNo() == 1 &&
    2319         101 :         CarryIn.getOpcode() == ISD::ADDCARRY &&
    2320         372 :         isNullConstant(CarryIn.getOperand(1)) &&
    2321           8 :         CarryIn.getOperand(0) == Y.getValue(0)) {
    2322           6 :       auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
    2323           6 :                               Y.getOperand(0), Y.getOperand(1),
    2324          10 :                               CarryIn.getOperand(2));
    2325           2 :       AddToWorklist(NewY.getNode());
    2326           6 :       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
    2327           4 :                          DAG.getConstant(0, SDLoc(N), N0.getValueType()),
    2328          12 :                          NewY.getValue(1));
    2329             :     }
    2330             :   }
    2331             : 
    2332      173310 :   return SDValue();
    2333             : }
    2334             : 
    2335             : // Since it may not be valid to emit a fold to zero for vector initializers
    2336             : // check if we can before folding.
    2337          23 : static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
    2338             :                              SelectionDAG &DAG, bool LegalOperations,
    2339             :                              bool LegalTypes) {
    2340          23 :   if (!VT.isVector())
    2341          13 :     return DAG.getConstant(0, DL, VT);
    2342          10 :   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
    2343          10 :     return DAG.getConstant(0, DL, VT);
    2344           0 :   return SDValue();
    2345             : }
    2346             : 
    2347      375507 : SDValue DAGCombiner::visitSUB(SDNode *N) {
    2348      751014 :   SDValue N0 = N->getOperand(0);
    2349      751014 :   SDValue N1 = N->getOperand(1);
    2350      751014 :   EVT VT = N0.getValueType();
    2351      751014 :   SDLoc DL(N);
    2352             : 
    2353             :   // fold vector ops
    2354      375507 :   if (VT.isVector()) {
    2355      352408 :     if (SDValue FoldedVOp = SimplifyVBinOp(N))
    2356           3 :       return FoldedVOp;
    2357             : 
    2358             :     // fold (sub x, 0) -> x, vector edition
    2359      352405 :     if (ISD::isBuildVectorAllZeros(N1.getNode()))
    2360           8 :       return N0;
    2361             :   }
    2362             : 
    2363             :   // fold (sub x, x) -> 0
    2364             :   // FIXME: Refactor this and xor and other similar operations together.
    2365      375496 :   if (N0 == N1)
    2366          10 :     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations, LegalTypes);
    2367      380694 :   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
    2368        5208 :       DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
    2369             :     // fold (sub c1, c2) -> c1-c2
    2370           4 :     return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
    2371           4 :                                       N1.getNode());
    2372             :   }
    2373             : 
    2374      375482 :   if (SDValue NewSel = foldBinOpIntoSelect(N))
    2375           2 :     return NewSel;
    2376             : 
    2377      376646 :   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
    2378             : 
    2379             :   // fold (sub x, c) -> (add x, -c)
    2380             :   if (N1C) {
    2381        1166 :     return DAG.getNode(ISD::ADD, DL, VT, N0,
    2382        6996 :                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
    2383             :   }
    2384             : 
    2385      374314 :   if (isNullConstantOrNullSplatConstant(N0)) {
    2386        2795 :     unsigned BitWidth = VT.getScalarSizeInBits();
    2387             :     // Right-shifting everything out but the sign bit followed by negation is
    2388             :     // the same as flipping arithmetic/logical shift type without the negation:
    2389             :     // -(X >>u 31) -> (X >>s 31)
    2390             :     // -(X >>s 31) -> (X >>u 31)
    2391        5590 :     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
    2392         158 :       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
    2393         158 :       if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
    2394          12 :         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
    2395          12 :         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
    2396          48 :           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
    2397             :       }
    2398             :     }
    2399             : 
    2400             :     // 0 - X --> 0 if the sub is NUW.
    2401        5566 :     if (N->getFlags().hasNoUnsignedWrap())
    2402           2 :       return N0;
    2403             : 
    2404       13905 :     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
    2405             :       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
    2406             :       // N1 must be 0 because negating the minimum signed value is undefined.
    2407          12 :       if (N->getFlags().hasNoSignedWrap())
    2408           2 :         return N0;
    2409             : 
    2410             :       // 0 - X --> X if X is 0 or the minimum signed value.
    2411           4 :       return N1;
    2412             :     }
    2413             :   }
    2414             : 
    2415             :   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
    2416      374294 :   if (isAllOnesConstantOrAllOnesSplatConstant(N0))
    2417          48 :     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
    2418             : 
    2419             :   // fold A-(A-B) -> B
    2420      748828 :   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
    2421          28 :     return N1.getOperand(1);
    2422             : 
    2423             :   // fold (A+B)-A -> B
    2424      750869 :   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
    2425          10 :     return N0.getOperand(1);
    2426             : 
    2427             :   // fold (A+B)-B -> A
    2428      750847 :   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
    2429           6 :     return N0.getOperand(0);
    2430             : 
    2431             :   // fold C2-(A+C1) -> (C2-C1)-A
    2432      748496 :   if (N1.getOpcode() == ISD::ADD) {
    2433         588 :     SDValue N11 = N1.getOperand(1);
    2434         444 :     if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
    2435         150 :         isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
    2436         270 :       SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
    2437         405 :       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
    2438             :     }
    2439             :   }
    2440             : 
    2441             :   // fold ((A+(B+or-C))-B) -> A+or-C
    2442      375281 :   if (N0.getOpcode() == ISD::ADD &&
    2443        3485 :       (N0.getOperand(1).getOpcode() == ISD::SUB ||
    2444      376411 :        N0.getOperand(1).getOpcode() == ISD::ADD) &&
    2445          93 :       N0.getOperand(1).getOperand(0) == N1)
    2446          24 :     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
    2447          24 :                        N0.getOperand(1).getOperand(1));
    2448             : 
    2449             :   // fold ((A+(C+B))-B) -> A+C
    2450      751700 :   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
    2451          22 :       N0.getOperand(1).getOperand(1) == N1)
    2452           3 :     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
    2453           4 :                        N0.getOperand(1).getOperand(0));
    2454             : 
    2455             :   // fold ((A-(B-C))-C) -> A-B
    2456      753870 :   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
    2457          12 :       N0.getOperand(1).getOperand(1) == N1)
    2458           9 :     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
    2459          12 :                        N0.getOperand(1).getOperand(0));
    2460             : 
    2461             :   // If either operand of a sub is undef, the result is undef
    2462      748206 :   if (N0.isUndef())
    2463           0 :     return N0;
    2464      748206 :   if (N1.isUndef())
    2465           0 :     return N1;
    2466             : 
    2467             :   // If the relocation model supports it, consider symbol offsets.
    2468          10 :   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
    2469          10 :     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
    2470             :       // fold (sub Sym, c) -> Sym-c
    2471             :       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
    2472             :         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
    2473             :                                     GA->getOffset() -
    2474             :                                         (uint64_t)N1C->getSExtValue());
    2475             :       // fold (sub Sym+c1, Sym+c2) -> c1-c2
    2476           0 :       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
    2477           0 :         if (GA->getGlobal() == GB->getGlobal())
    2478           0 :           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
    2479           0 :                                  DL, VT);
    2480             :     }
    2481             : 
    2482             :   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
    2483      748206 :   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
    2484         264 :     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
    2485         142 :     if (TN->getVT() == MVT::i1) {
    2486         162 :       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
    2487         108 :                                  DAG.getConstant(1, DL, VT));
    2488         108 :       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
    2489             :     }
    2490             :   }
    2491             : 
    2492      374049 :   return SDValue();
    2493             : }
    2494             : 
    2495         242 : SDValue DAGCombiner::visitSUBC(SDNode *N) {
    2496         484 :   SDValue N0 = N->getOperand(0);
    2497         484 :   SDValue N1 = N->getOperand(1);
    2498         484 :   EVT VT = N0.getValueType();
    2499         484 :   SDLoc DL(N);
    2500             : 
    2501             :   // If the flag result is dead, turn this into an SUB.
    2502         242 :   if (!N->hasAnyUseOfValue(1))
    2503           7 :     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
    2504          28 :                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
    2505             : 
    2506             :   // fold (subc x, x) -> 0 + no borrow
    2507         235 :   if (N0 == N1)
    2508           0 :     return CombineTo(N, DAG.getConstant(0, DL, VT),
    2509           0 :                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
    2510             : 
    2511             :   // fold (subc x, 0) -> x + no borrow
    2512         235 :   if (isNullConstant(N1))
    2513           0 :     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
    2514             : 
    2515             :   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
    2516         235 :   if (isAllOnesConstant(N0))
    2517           0 :     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
    2518           0 :                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
    2519             : 
    2520         235 :   return SDValue();
    2521             : }
    2522             : 
    2523         969 : SDValue DAGCombiner::visitUSUBO(SDNode *N) {
    2524        1938 :   SDValue N0 = N->getOperand(0);
    2525        1938 :   SDValue N1 = N->getOperand(1);
    2526        1938 :   EVT VT = N0.getValueType();
    2527         969 :   if (VT.isVector())
    2528           0 :     return SDValue();
    2529             : 
    2530        1938 :   EVT CarryVT = N->getValueType(1);
    2531         969 :   SDLoc DL(N);
    2532             : 
    2533             :   // If the flag result is dead, turn this into an SUB.
    2534         969 :   if (!N->hasAnyUseOfValue(1))
    2535          13 :     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
    2536          39 :                      DAG.getUNDEF(CarryVT));
    2537             : 
    2538             :   // fold (usubo x, x) -> 0 + no borrow
    2539         956 :   if (N0 == N1)
    2540           7 :     return CombineTo(N, DAG.getConstant(0, DL, VT),
    2541          14 :                      DAG.getConstant(0, DL, CarryVT));
    2542             : 
    2543             :   // fold (usubo x, 0) -> x + no borrow
    2544         949 :   if (isNullConstant(N1))
    2545          13 :     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
    2546             : 
    2547             :   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
    2548         936 :   if (isAllOnesConstant(N0))
    2549           3 :     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
    2550           9 :                      DAG.getConstant(0, DL, CarryVT));
    2551             : 
    2552         933 :   return SDValue();
    2553             : }
    2554             : 
    2555         148 : SDValue DAGCombiner::visitSUBE(SDNode *N) {
    2556         296 :   SDValue N0 = N->getOperand(0);
    2557         296 :   SDValue N1 = N->getOperand(1);
    2558         296 :   SDValue CarryIn = N->getOperand(2);
    2559             : 
    2560             :   // fold (sube x, y, false) -> (subc x, y)
    2561         296 :   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
    2562           0 :     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
    2563             : 
    2564         148 :   return SDValue();
    2565             : }
    2566             : 
    2567         214 : SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
    2568         428 :   SDValue N0 = N->getOperand(0);
    2569         428 :   SDValue N1 = N->getOperand(1);
    2570         428 :   SDValue CarryIn = N->getOperand(2);
    2571             : 
    2572             :   // fold (subcarry x, y, false) -> (usubo x, y)
    2573         214 :   if (isNullConstant(CarryIn))
    2574          40 :     return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
    2575             : 
    2576         204 :   return SDValue();
    2577             : }
    2578             : 
    2579       42452 : SDValue DAGCombiner::visitMUL(SDNode *N) {
    2580       84904 :   SDValue N0 = N->getOperand(0);
    2581       84904 :   SDValue N1 = N->getOperand(1);
    2582       84904 :   EVT VT = N0.getValueType();
    2583             : 
    2584             :   // fold (mul x, undef) -> 0
    2585      127355 :   if (N0.isUndef() || N1.isUndef())
    2586           3 :     return DAG.getConstant(0, SDLoc(N), VT);
    2587             : 
    2588       42451 :   bool N0IsConst = false;
    2589       42451 :   bool N1IsConst = false;
    2590       42451 :   bool N1IsOpaqueConst = false;
    2591       42451 :   bool N0IsOpaqueConst = false;
    2592      127353 :   APInt ConstValue0, ConstValue1;
    2593             :   // fold vector ops
    2594       42451 :   if (VT.isVector()) {
    2595        3718 :     if (SDValue FoldedVOp = SimplifyVBinOp(N))
    2596           3 :       return FoldedVOp;
    2597             : 
    2598        3715 :     N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
    2599        3715 :     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
    2600             :     assert((!N0IsConst ||
    2601             :             ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
    2602             :            "Splat APInt should be element width");
    2603             :     assert((!N1IsConst ||
    2604             :             ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
    2605             :            "Splat APInt should be element width");
    2606             :   } else {
    2607             :     N0IsConst = isa<ConstantSDNode>(N0);
    2608             :     if (N0IsConst) {
    2609          12 :       ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
    2610          12 :       N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
    2611             :     }
    2612             :     N1IsConst = isa<ConstantSDNode>(N1);
    2613             :     if (N1IsConst) {
    2614       66896 :       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
    2615       66896 :       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
    2616             :     }
    2617             :   }
    2618             : 
    2619             :   // fold (mul c1, c2) -> c1*c2
    2620       42448 :   if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
    2621          18 :     return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
    2622          12 :                                       N0.getNode(), N1.getNode());
    2623             : 
    2624             :   // canonicalize constant to RHS (vector doesn't have to splat)
    2625       42449 :   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
    2626           7 :      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
    2627          28 :     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
    2628             :   // fold (mul x, 0) -> 0
    2629       76342 :   if (N1IsConst && ConstValue1.isNullValue())
    2630          22 :     return N1;
    2631             :   // fold (mul x, 1) -> x
    2632       76298 :   if (N1IsConst && ConstValue1.isOneValue())
    2633         372 :     return N0;
    2634             : 
    2635       42041 :   if (SDValue NewSel = foldBinOpIntoSelect(N))
    2636           3 :     return NewSel;
    2637             : 
    2638             :   // fold (mul x, -1) -> 0-x
    2639       75548 :   if (N1IsConst && ConstValue1.isAllOnesValue()) {
    2640          38 :     SDLoc DL(N);
    2641          19 :     return DAG.getNode(ISD::SUB, DL, VT,
    2642          19 :                        DAG.getConstant(0, DL, VT), N0);
    2643             :   }
    2644             :   // fold (mul x, (1 << c)) -> x << c
    2645       75811 :   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
    2646       33792 :       DAG.isKnownToBeAPowerOfTwo(N1)) {
    2647       50536 :     SDLoc DL(N);
    2648       25268 :     SDValue LogBase2 = BuildLogBase2(N1, DL);
    2649       25268 :     AddToWorklist(LogBase2.getNode());
    2650             : 
    2651       50536 :     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
    2652       25268 :     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
    2653       25268 :     AddToWorklist(Trunc.getNode());
    2654       50536 :     return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
    2655             :   }
    2656             :   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
    2657       66853 :   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
    2658         295 :     unsigned Log2Val = (-ConstValue1).logBase2();
    2659         118 :     SDLoc DL(N);
    2660             :     // FIXME: If the input is something that is easily negated (e.g. a
    2661             :     // single-use add), we should put the negate there.
    2662          59 :     return DAG.getNode(ISD::SUB, DL, VT,
    2663          59 :                        DAG.getConstant(0, DL, VT),
    2664          59 :                        DAG.getNode(ISD::SHL, DL, VT, N0,
    2665             :                             DAG.getConstant(Log2Val, DL,
    2666         236 :                                       getShiftAmountTy(N0.getValueType()))));
    2667             :   }
    2668             : 
    2669             :   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
    2670       16711 :   if (N0.getOpcode() == ISD::SHL &&
    2671       16698 :       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
    2672          12 :       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
    2673          30 :     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
    2674           6 :     if (isConstantOrConstantVector(C3))
    2675          30 :       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
    2676             :   }
    2677             : 
    2678             :   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
    2679             :   // use.
    2680             :   {
    2681       33372 :     SDValue Sh(nullptr, 0), Y(nullptr, 0);
    2682             : 
    2683             :     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
    2684       16699 :     if (N0.getOpcode() == ISD::SHL &&
    2685       16699 :         isConstantOrConstantVector(N0.getOperand(1)) &&
    2686          19 :         N0.getNode()->hasOneUse()) {
    2687           8 :       Sh = N0; Y = N1;
    2688       16687 :     } else if (N1.getOpcode() == ISD::SHL &&
    2689       16696 :                isConstantOrConstantVector(N1.getOperand(1)) &&
    2690           9 :                N1.getNode()->hasOneUse()) {
    2691           3 :       Sh = N1; Y = N0;
    2692             :     }
    2693             : 
    2694       16686 :     if (Sh.getNode()) {
    2695          55 :       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
    2696          55 :       return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
    2697             :     }
    2698             :   }
    2699             : 
    2700             :   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
    2701       25174 :   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
    2702        9079 :       N0.getOpcode() == ISD::ADD &&
    2703       18012 :       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
    2704         177 :       isMulAddWithConstProfitable(N, N0, N1))
    2705         246 :       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
    2706         246 :                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
    2707         164 :                                      N0.getOperand(0), N1),
    2708         246 :                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
    2709         738 :                                      N0.getOperand(1), N1));
    2710             : 
    2711             :   // reassociate mul
    2712       33186 :   if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
    2713          27 :     return RMUL;
    2714             : 
    2715       16566 :   return SDValue();
    2716             : }
    2717             : 
    2718             : /// Return true if divmod libcall is available.
    2719        1352 : static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
    2720             :                                      const TargetLowering &TLI) {
    2721             :   RTLIB::Libcall LC;
    2722        2704 :   EVT NodeType = Node->getValueType(0);
    2723        1352 :   if (!NodeType.isSimple())
    2724             :     return false;
    2725        1352 :   switch (NodeType.getSimpleVT().SimpleTy) {
    2726             :   default: return false; // No libcall for vector types.
    2727           0 :   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
    2728           4 :   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
    2729        1159 :   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
    2730         186 :   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
    2731           3 :   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
    2732             :   }
    2733             : 
    2734        2704 :   return TLI.getLibcallName(LC) != nullptr;
    2735             : }
    2736             : 
    2737             : /// Issue divrem if both quotient and remainder are needed.
    2738        5046 : SDValue DAGCombiner::useDivRem(SDNode *Node) {
    2739        5046 :   if (Node->use_empty())
    2740           0 :     return SDValue(); // This is a dead node, leave it alone.
    2741             : 
    2742       10092 :   unsigned Opcode = Node->getOpcode();
    2743        5046 :   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
    2744        5046 :   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
    2745             : 
    2746             :   // DivMod lib calls can still work on non-legal types if using lib-calls.
    2747       10092 :   EVT VT = Node->getValueType(0);
    2748        5046 :   if (VT.isVector() || !VT.isInteger())
    2749         479 :     return SDValue();
    2750             : 
    2751        6013 :   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
    2752         711 :     return SDValue();
    2753             : 
    2754             :   // If DIVREM is going to get expanded into a libcall,
    2755             :   // but there is no libcall available, then don't combine.
    2756        5208 :   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
    2757        1352 :       !isDivRemLibcallAvailable(Node, isSigned, TLI))
    2758        1240 :     return SDValue();
    2759             : 
    2760             :   // If div is legal, it's better to do the normal expansion
    2761        2616 :   unsigned OtherOpcode = 0;
    2762        2616 :   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
    2763        1364 :     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
    2764        1364 :     if (TLI.isOperationLegalOrCustom(Opcode, VT))
    2765         208 :       return SDValue();
    2766             :   } else {
    2767        1252 :     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
    2768        1252 :     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
    2769         168 :       return SDValue();
    2770             :   }
    2771             : 
    2772        4480 :   SDValue Op0 = Node->getOperand(0);
    2773        4480 :   SDValue Op1 = Node->getOperand(1);
    2774        2240 :   SDValue combined;
    2775        2240 :   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
    2776       11898 :          UE = Op0.getNode()->use_end(); UI != UE;) {
    2777        8814 :     SDNode *User = *UI++;
    2778        5198 :     if (User == Node || User->use_empty())
    2779        2260 :       continue;
    2780             :     // Convert the other matching node(s), too;
    2781             :     // otherwise, the DIVREM may get target-legalized into something
    2782             :     // target-specific that we won't be able to recognize.
    2783         678 :     unsigned UserOpc = User->getOpcode();
    2784        1066 :     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
    2785        1468 :         User->getOperand(0) == Op0 &&
    2786         498 :         User->getOperand(1) == Op1) {
    2787         166 :       if (!combined) {
    2788         166 :         if (UserOpc == OtherOpcode) {
    2789         166 :           SDVTList VTs = DAG.getVTList(VT, VT);
    2790         498 :           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
    2791           0 :         } else if (UserOpc == DivRemOpc) {
    2792             :           combined = SDValue(User, 0);
    2793             :         } else {
    2794             :           assert(UserOpc == Opcode);
    2795           0 :           continue;
    2796             :         }
    2797             :       }
    2798         166 :       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
    2799             :         CombineTo(User, combined);
    2800          31 :       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
    2801          62 :         CombineTo(User, combined.getValue(1));
    2802             :     }
    2803             :   }
    2804        2240 :   return combined;
    2805             : }
    2806             : 
    2807        7251 : static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
    2808       14502 :   SDValue N0 = N->getOperand(0);
    2809       14502 :   SDValue N1 = N->getOperand(1);
    2810       14502 :   EVT VT = N->getValueType(0);
    2811       14502 :   SDLoc DL(N);
    2812             : 
    2813       21753 :   if (DAG.isUndef(N->getOpcode(), {N0, N1}))
    2814           1 :     return DAG.getUNDEF(VT);
    2815             : 
    2816             :   // undef / X -> 0
    2817             :   // undef % X -> 0
    2818       14500 :   if (N0.isUndef())
    2819           0 :     return DAG.getConstant(0, DL, VT);
    2820             : 
    2821        7250 :   return SDValue();
    2822             : }
    2823             : 
    2824        2316 : SDValue DAGCombiner::visitSDIV(SDNode *N) {
    2825        4632 :   SDValue N0 = N->getOperand(0);
    2826        4632 :   SDValue N1 = N->getOperand(1);
    2827        4632 :   EVT VT = N->getValueType(0);
    2828             : 
    2829             :   // fold vector ops
    2830        2316 :   if (VT.isVector())
    2831         250 :     if (SDValue FoldedVOp = SimplifyVBinOp(N))
    2832           1 :       return FoldedVOp;
    2833             : 
    2834        2315 :   SDLoc DL(N);
    2835             : 
    2836             :   // fold (sdiv c1, c2) -> c1/c2
    2837        2315 :   ConstantSDNode *N0C = isConstOrConstSplat(N0);
    2838        2315 :   ConstantSDNode *N1C = isConstOrConstSplat(N1);
    2839        2315 :   if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
    2840           0 :     return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
    2841             :   // fold (sdiv X, 1) -> X
    2842        3239 :   if (N1C && N1C->isOne())
    2843           6 :     return N0;
    2844             :   // fold (sdiv X, -1) -> 0-X
    2845        4145 :   if (N1C && N1C->isAllOnesValue())
    2846           6 :     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
    2847             : 
    2848        2306 :   if (SDValue V = simplifyDivRem(N, DAG))
    2849           0 :     return V;
    2850             : 
    2851        2306 :   if (SDValue NewSel = foldBinOpIntoSelect(N))
    2852           2 :     return NewSel;
    2853             : 
    2854             :   // If we know the sign bits of both operands are zero, strength reduce to a
    2855             :   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
    2856        2304 :   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
    2857          48 :     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
    2858             : 
    2859             :   // fold (sdiv X, pow2) -> simple ops after legalize
    2860             :   // FIXME: We check for the exact bit here because the generic lowering gives
    2861             :   // better results in that case. The target-specific lowering should learn how
    2862             :   // to handle exact sdivs efficiently.
    2863        5896 :   if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
    2864        6263 :       !N->getFlags().hasExact() && (N1C->getAPIntValue().isPowerOf2() ||
    2865        9380 :                                     (-N1C->getAPIntValue()).isPowerOf2())) {
    2866             :     // Target-specific implementation of sdiv x, pow2.
    2867         155 :     if (SDValue Res = BuildSDIVPow2(N))
    2868          27 :       return Res;
    2869             : 
    2870         256 :     unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
    2871             : 
    2872             :     // Splat the sign bit into the register
    2873             :     SDValue SGN =
    2874         128 :         DAG.getNode(ISD::SRA, DL, VT, N0,
    2875         128 :                     DAG.getConstant(VT.getScalarSizeInBits() - 1, DL,
    2876         384 :                                     getShiftAmountTy(N0.getValueType())));
    2877         128 :     AddToWorklist(SGN.getNode());
    2878             : 
    2879             :     // Add (N0 < 0) ? abs2 - 1 : 0;
    2880             :     SDValue SRL =
    2881         128 :         DAG.getNode(ISD::SRL, DL, VT, SGN,
    2882         128 :                     DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL,
    2883         384 :                                     getShiftAmountTy(SGN.getValueType())));
    2884         256 :     SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL);
    2885         128 :     AddToWorklist(SRL.getNode());
    2886         128 :     AddToWorklist(ADD.getNode());    // Divide by pow2
    2887         128 :     SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD,
    2888             :                   DAG.getConstant(lg2, DL,
    2889         256 :                                   getShiftAmountTy(ADD.getValueType())));
    2890             : 
    2891             :     // If we're dividing by a positive value, we're done.  Otherwise, we must
    2892             :     // negate the result.
    2893         384 :     if (N1C->getAPIntValue().isNonNegative())
    2894         122 :       return SRA;
    2895             : 
    2896           6 :     AddToWorklist(SRA.getNode());
    2897          12 :     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
    2898             :   }
    2899             : 
    2900             :   // If integer divide is expensive and we satisfy the requirements, emit an
    2901             :   // alternate sequence.  Targets may check function attributes for size/speed
    2902             :   // trade-offs.
    2903        2133 :   AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
    2904        2880 :   if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
    2905         708 :     if (SDValue Op = BuildSDIV(N))
    2906         598 :       return Op;
    2907             : 
    2908             :   // sdiv, srem -> sdivrem
    2909             :   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
    2910             :   // true.  Otherwise, we break the simplification logic in visitREM().
    2911        1684 :   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
    2912        1425 :     if (SDValue DivRem = useDivRem(N))
    2913          14 :         return DivRem;
    2914             : 
    2915        1521 :   return SDValue();
    2916             : }
    2917             : 
    2918        2172 : SDValue DAGCombiner::visitUDIV(SDNode *N) {
    2919        4344 :   SDValue N0 = N->getOperand(0);
    2920        4344 :   SDValue N1 = N->getOperand(1);
    2921        4344 :   EVT VT = N->getValueType(0);
    2922             : 
    2923             :   // fold vector ops
    2924        2172 :   if (VT.isVector())
    2925         209 :     if (SDValue FoldedVOp = SimplifyVBinOp(N))
    2926           0 :       return FoldedVOp;
    2927             : 
    2928        2172 :   SDLoc DL(N);
    2929             : 
    2930             :   // fold (udiv c1, c2) -> c1/c2
    2931        2172 :   ConstantSDNode *N0C = isConstOrConstSplat(N0);
    2932        2172 :   ConstantSDNode *N1C = isConstOrConstSplat(N1);
    2933        2172 :   if (N0C && N1C)
    2934           0 :     if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
    2935           0 :                                                     N0C, N1C))
    2936           0 :       return Folded;
    2937             : 
    2938        2172 :   if (SDValue V = simplifyDivRem(N, DAG))
    2939           0 :     return V;
    2940             : 
    2941        2172 :   if (SDValue NewSel = foldBinOpIntoSelect(N))
    2942           2 :     return NewSel;
    2943             : 
    2944             :   // fold (udiv x, (1 << c)) -> x >>u c
    2945        2934 :   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
    2946         764 :       DAG.isKnownToBeAPowerOfTwo(N1)) {
    2947          39 :     SDValue LogBase2 = BuildLogBase2(N1, DL);
    2948          39 :     AddToWorklist(LogBase2.getNode());
    2949             : 
    2950          78 :     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
    2951          39 :     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
    2952          39 :     AddToWorklist(Trunc.getNode());
    2953          78 :     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
    2954             :   }
    2955             : 
    2956             :   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
    2957        4262 :   if (N1.getOpcode() == ISD::SHL) {
    2958          18 :     SDValue N10 = N1.getOperand(0);
    2959          18 :     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
    2960           9 :         DAG.isKnownToBeAPowerOfTwo(N10)) {
    2961           9 :       SDValue LogBase2 = BuildLogBase2(N10, DL);
    2962           9 :       AddToWorklist(LogBase2.getNode());
    2963             : 
    2964          27 :       EVT ADDVT = N1.getOperand(1).getValueType();
    2965           9 :       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
    2966           9 :       AddToWorklist(Trunc.getNode());
    2967          27 :       SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
    2968           9 :       AddToWorklist(Add.getNode());
    2969          18 :       return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
    2970             :     }
    2971             :   }
    2972             : 
    2973             :   // fold (udiv x, c) -> alternate
    2974        2122 :   AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
    2975        2847 :   if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
    2976         686 :     if (SDValue Op = BuildUDIV(N))
    2977         582 :       return Op;
    2978             : 
    2979             :   // sdiv, srem -> sdivrem
    2980             :   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
    2981             :   // true.  Otherwise, we break the simplification logic in visitREM().
    2982        1683 :   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
    2983        1436 :     if (SDValue DivRem = useDivRem(N))
    2984          17 :         return DivRem;
    2985             : 
    2986        1523 :   return SDValue();
    2987             : }
    2988             : 
    2989             : // handles ISD::SREM and ISD::UREM
    2990        2777 : SDValue DAGCombiner::visitREM(SDNode *N) {
    2991        5554 :   unsigned Opcode = N->getOpcode();
    2992        5554 :   SDValue N0 = N->getOperand(0);
    2993        5554 :   SDValue N1 = N->getOperand(1);
    2994        5554 :   EVT VT = N->getValueType(0);
    2995        2777 :   bool isSigned = (Opcode == ISD::SREM);
    2996        5554 :   SDLoc DL(N);
    2997             : 
    2998             :   // fold (rem c1, c2) -> c1%c2
    2999        2777 :   ConstantSDNode *N0C = isConstOrConstSplat(N0);
    3000        2777 :   ConstantSDNode *N1C = isConstOrConstSplat(N1);
    3001        2777 :   if (N0C && N1C)
    3002           4 :     if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
    3003           4 :       return Folded;
    3004             : 
    3005        2773 :   if (SDValue V = simplifyDivRem(N, DAG))
    3006           1 :     return V;
    3007             : 
    3008        2772 :   if (SDValue NewSel = foldBinOpIntoSelect(N))
    3009           4 :     return NewSel;
    3010             : 
    3011        2768 :   if (isSigned) {
    3012             :     // If we know the sign bits of both operands are zero, strength reduce to a
    3013             :     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
    3014        1415 :     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
    3015          18 :       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
    3016             :   } else {
    3017        1353 :     SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
    3018        1353 :     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
    3019             :       // fold (urem x, pow2) -> (and x, pow2-1)
    3020          72 :       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
    3021          36 :       AddToWorklist(Add.getNode());
    3022          72 :       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
    3023             :     }
    3024        2640 :     if (N1.getOpcode() == ISD::SHL &&
    3025          12 :         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
    3026             :       // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
    3027          12 :       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
    3028           6 :       AddToWorklist(Add.getNode());
    3029          12 :       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
    3030             :     }
    3031             :   }
    3032             : 
    3033        2717 :   AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
    3034             : 
    3035             :   // If X/C can be simplified by the division-by-constant logic, lower
    3036             :   // X%C to the equivalent of X-X/C*C.
    3037             :   // To avoid mangling nodes, this simplification requires that the combine()
    3038             :   // call for the speculative DIV must not cause a DIVREM conversion.  We guard
    3039             :   // against this by skipping the simplification if isIntDivCheap().  When
    3040             :   // div is not cheap, combine will not return a DIVREM.  Regardless,
    3041             :   // checking cheapness here makes sense since the simplification results in
    3042             :   // fatter code.
    3043        3411 :   if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) {
    3044         634 :     unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
    3045        1268 :     SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1);
    3046         634 :     AddToWorklist(Div.getNode());
    3047         634 :     SDValue OptimizedDiv = combine(Div.getNode());
    3048         634 :     if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
    3049             :       assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) &&
    3050             :              (OptimizedDiv.getOpcode() != ISD::SDIVREM));
    3051        1064 :       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
    3052        1064 :       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
    3053         532 :       AddToWorklist(Mul.getNode());
    3054         532 :       return Sub;
    3055             :     }
    3056             :   }
    3057             : 
    3058             :   // sdiv, srem -> sdivrem
    3059        2185 :   if (SDValue DivRem = useDivRem(N))
    3060         270 :     return DivRem.getValue(1);
    3061             : 
    3062        2050 :   return SDValue();
    3063             : }
    3064             : 
    3065         931 : SDValue DAGCombiner::visitMULHS(SDNode *N) {
    3066        1862 :   SDValue N0 = N->getOperand(0);
    3067        1862 :   SDValue N1 = N->getOperand(1);
    3068        1862 :   EVT VT = N->getValueType(0);
    3069        1862 :   SDLoc DL(N);
    3070             : 
    3071             :   // fold (mulhs x, 0) -> 0
    3072         931 :   if (isNullConstant(N1))
    3073           0 :     return N1;
    3074             :   // fold (mulhs x, 1) -> (sra x, size(x)-1)
    3075         931 :   if (isOneConstant(N1)) {
    3076           0 :     SDLoc DL(N);
    3077           0 :     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
    3078           0 :                        DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
    3079           0 :                                        getShiftAmountTy(N0.getValueType())));
    3080             :   }
    3081             :   // fold (mulhs x, undef) -> 0
    3082        2793 :   if (N0.isUndef() || N1.isUndef())
    3083           0 :     return DAG.getConstant(0, SDLoc(N), VT);
    3084             : 
    3085             :   // If the type twice as wide is legal, transform the mulhs to a wider multiply
    3086             :   // plus a shift.
    3087        1862 :   if (VT.isSimple() && !VT.isVector()) {
    3088         697 :     MVT Simple = VT.getSimpleVT();
    3089         697 :     unsigned SimpleSize = Simple.getSizeInBits();
    3090         697 :     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
    3091         947 :     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
    3092         500 :       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
    3093         500 :       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
    3094         500 :       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
    3095         500 :       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
    3096             :             DAG.getConstant(SimpleSize, DL,
    3097         750 :                             getShiftAmountTy(N1.getValueType())));
    3098         500 :       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
    3099             :     }
    3100             :   }
    3101             : 
    3102         681 :   return SDValue();
    3103             : }
    3104             : 
    3105        2242 : SDValue DAGCombiner::visitMULHU(SDNode *N) {
    3106        4484 :   SDValue N0 = N->getOperand(0);
    3107        4484 :   SDValue N1 = N->getOperand(1);
    3108        4484 :   EVT VT = N->getValueType(0);
    3109        4484 :   SDLoc DL(N);
    3110             : 
    3111             :   // fold (mulhu x, 0) -> 0
    3112        2242 :   if (isNullConstant(N1))
    3113           0 :     return N1;
    3114             :   // fold (mulhu x, 1) -> 0
    3115        2242 :   if (isOneConstant(N1))
    3116           0 :     return DAG.getConstant(0, DL, N0.getValueType());
    3117             :   // fold (mulhu x, undef) -> 0
    3118        6726 :   if (N0.isUndef() || N1.isUndef())
    3119           0 :     return DAG.getConstant(0, DL, VT);
    3120             : 
    3121             :   // If the type twice as wide is legal, transform the mulhu to a wider multiply
    3122             :   // plus a shift.
    3123        4484 :   if (VT.isSimple() && !VT.isVector()) {
    3124        2054 :     MVT Simple = VT.getSimpleVT();
    3125        2054 :     unsigned SimpleSize = Simple.getSizeInBits();
    3126        2054 :     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
    3127        2330 :     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
    3128         552 :       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
    3129         552 :       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
    3130         552 :       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
    3131         552 :       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
    3132             :             DAG.getConstant(SimpleSize, DL,
    3133         828 :                             getShiftAmountTy(N1.getValueType())));
    3134         552 :       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
    3135             :     }
    3136             :   }
    3137             : 
    3138        1966 :   return SDValue();
    3139             : }
    3140             : 
    3141             : /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
    3142             : /// give the opcodes for the two computations that are being performed. Return
    3143             : /// true if a simplification was made.
    3144        2436 : SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
    3145             :                                                 unsigned HiOp) {
    3146             :   // If the high half is not needed, just compute the low half.
    3147        2436 :   bool HiExists = N->hasAnyUseOfValue(1);
    3148        2458 :   if (!HiExists &&
    3149          29 :       (!LegalOperations ||
    3150          14 :        TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
    3151          75 :     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
    3152          15 :     return CombineTo(N, Res, Res);
    3153             :   }
    3154             : 
    3155             :   // If the low half is not needed, just compute the high half.
    3156        2421 :   bool LoExists = N->hasAnyUseOfValue(0);
    3157        3722 :   if (!LoExists &&
    3158        1301 :       (!LegalOperations ||
    3159        1806 :        TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
    3160        1990 :     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
    3161         398 :     return CombineTo(N, Res, Res);
    3162             :   }
    3163             : 
    3164             :   // If both halves are used, return as it is.
    3165        2023 :   if (LoExists && HiExists)
    3166        1113 :     return SDValue();
    3167             : 
    3168             :   // If the two computed results can be simplified separately, separate them.
    3169         910 :   if (LoExists) {
    3170          35 :     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
    3171           7 :     AddToWorklist(Lo.getNode());
    3172           7 :     SDValue LoOpt = combine(Lo.getNode());
    3173           7 :     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
    3174           0 :         (!LegalOperations ||
    3175           0 :          TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
    3176           0 :       return CombineTo(N, LoOpt, LoOpt);
    3177             :   }
    3178             : 
    3179         910 :   if (HiExists) {
    3180        4515 :     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
    3181         903 :     AddToWorklist(Hi.getNode());
    3182         903 :     SDValue HiOpt = combine(Hi.getNode());
    3183        1671 :     if (HiOpt.getNode() && HiOpt != Hi &&
    3184         384 :         (!LegalOperations ||
    3185        1152 :          TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
    3186         384 :       return CombineTo(N, HiOpt, HiOpt);
    3187             :   }
    3188             : 
    3189         526 :   return SDValue();
    3190             : }
    3191             : 
    3192         713 : SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
    3193         713 :   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
    3194         323 :     return Res;
    3195             : 
    3196         780 :   EVT VT = N->getValueType(0);
    3197         390 :   SDLoc DL(N);
    3198             : 
    3199             :   // If the type is twice as wide is legal, transform the mulhu to a wider
    3200             :   // multiply plus a shift.
    3201         780 :   if (VT.isSimple() && !VT.isVector()) {
    3202         390 :     MVT Simple = VT.getSimpleVT();
    3203         390 :     unsigned SimpleSize = Simple.getSizeInBits();
    3204         390 :     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
    3205         390 :     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
    3206           0 :       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
    3207           0 :       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
    3208           0 :       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
    3209             :       // Compute the high part as N1.
    3210           0 :       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
    3211             :             DAG.getConstant(SimpleSize, DL,
    3212           0 :                             getShiftAmountTy(Lo.getValueType())));
    3213           0 :       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
    3214             :       // Compute the low part as N0.
    3215           0 :       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
    3216           0 :       return CombineTo(N, Lo, Hi);
    3217             :     }
    3218             :   }
    3219             : 
    3220         390 :   return SDValue();
    3221             : }
    3222             : 
    3223        1723 : SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
    3224        1723 :   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
    3225         474 :     return Res;
    3226             : 
    3227        2498 :   EVT VT = N->getValueType(0);
    3228        1249 :   SDLoc DL(N);
    3229             : 
    3230             :   // If the type is twice as wide is legal, transform the mulhu to a wider
    3231             :   // multiply plus a shift.
    3232        2498 :   if (VT.isSimple() && !VT.isVector()) {
    3233        1249 :     MVT Simple = VT.getSimpleVT();
    3234        1249 :     unsigned SimpleSize = Simple.getSizeInBits();
    3235        1249 :     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
    3236        1249 :     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
    3237           0 :       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
    3238           0 :       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
    3239           0 :       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
    3240             :       // Compute the high part as N1.
    3241           0 :       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
    3242             :             DAG.getConstant(SimpleSize, DL,
    3243           0 :                             getShiftAmountTy(Lo.getValueType())));
    3244           0 :       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
    3245             :       // Compute the low part as N0.
    3246           0 :       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
    3247           0 :       return CombineTo(N, Lo, Hi);
    3248             :     }
    3249             :   }
    3250             : 
    3251        1249 :   return SDValue();
    3252             : }
    3253             : 
    3254          78 : SDValue DAGCombiner::visitSMULO(SDNode *N) {
    3255             :   // (smulo x, 2) -> (saddo x, x)
    3256         163 :   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
    3257           7 :     if (C2->getAPIntValue() == 2)
    3258           9 :       return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
    3259          15 :                          N->getOperand(0), N->getOperand(0));
    3260             : 
    3261          75 :   return SDValue();
    3262             : }
    3263             : 
    3264         117 : SDValue DAGCombiner::visitUMULO(SDNode *N) {
    3265             :   // (umulo x, 2) -> (uaddo x, x)
    3266         286 :   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
    3267          52 :     if (C2->getAPIntValue() == 2)
    3268          12 :       return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
    3269          20 :                          N->getOperand(0), N->getOperand(0));
    3270             : 
    3271         113 :   return SDValue();
    3272             : }
    3273             : 
    3274       10976 : SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
    3275       21952 :   SDValue N0 = N->getOperand(0);
    3276       21952 :   SDValue N1 = N->getOperand(1);
    3277       21952 :   EVT VT = N0.getValueType();
    3278             : 
    3279             :   // fold vector ops
    3280       10976 :   if (VT.isVector())
    3281        6825 :     if (SDValue FoldedVOp = SimplifyVBinOp(N))
    3282         252 :       return FoldedVOp;
    3283             : 
    3284             :   // fold operation with constant operands.
    3285       21448 :   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
    3286       21448 :   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
    3287       10724 :   if (N0C && N1C)
    3288           0 :     return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
    3289             : 
    3290             :   // canonicalize constant to RHS
    3291       10734 :   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
    3292          10 :      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
    3293          50 :     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
    3294             : 
    3295       10714 :   return SDValue();
    3296             : }
    3297             : 
    3298             : /// If this is a binary operator with two operands of the same opcode, try to
    3299             : /// simplify it.
    3300       33552 : SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
    3301      100656 :   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
    3302       67104 :   EVT VT = N0.getValueType();
    3303             :   assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
    3304             : 
    3305             :   // Bail early if none of these transforms apply.
    3306       67104 :   if (N0.getNumOperands() == 0) return SDValue();
    3307             : 
    3308             :   // For each of OP in AND/OR/XOR:
    3309             :   // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
    3310             :   // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
    3311             :   // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
    3312             :   // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y))
    3313             :   // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
    3314             :   //
    3315             :   // do not sink logical op inside of a vector extend, since it may combine
    3316             :   // into a vsetcc.
    3317      100623 :   EVT Op0VT = N0.getOperand(0).getValueType();
    3318       67011 :   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
    3319       66907 :        N0.getOpcode() == ISD::SIGN_EXTEND ||
    3320       66874 :        N0.getOpcode() == ISD::BSWAP ||
    3321             :        // Avoid infinite looping with PromoteIntBinOp.
    3322       33516 :        (N0.getOpcode() == ISD::ANY_EXTEND &&
    3323       33664 :         (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
    3324       34073 :        (N0.getOpcode() == ISD::TRUNCATE &&
    3325         809 :         (!TLI.isZExtFree(VT, Op0VT) ||
    3326         163 :          !TLI.isTruncateFree(Op0VT, VT)) &&
    3327        1066 :         TLI.isTypeLegal(Op0VT))) &&
    3328         583 :       !VT.isVector() &&
    3329       35544 :       Op0VT == N1.getOperand(0).getValueType() &&
    3330         665 :       (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
    3331        1473 :     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
    3332         982 :                                  N0.getOperand(0).getValueType(),
    3333        3437 :                                  N0.getOperand(0), N1.getOperand(0));
    3334         491 :     AddToWorklist(ORNode.getNode());
    3335        2455 :     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
    3336             :   }
    3337             : 
    3338             :   // For each of OP in SHL/SRL/SRA/AND...
    3339             :   //   fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
    3340             :   //   fold (or  (OP x, z), (OP y, z)) -> (OP (or  x, y), z)
    3341             :   //   fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
    3342      131524 :   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
    3343      131300 :        N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
    3344       14112 :       N0.getOperand(1) == N1.getOperand(1)) {
    3345         297 :     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
    3346         198 :                                  N0.getOperand(0).getValueType(),
    3347         693 :                                  N0.getOperand(0), N1.getOperand(0));
    3348          99 :     AddToWorklist(ORNode.getNode());
    3349         297 :     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
    3350         495 :                        ORNode, N0.getOperand(1));
    3351             :   }
    3352             : 
    3353             :   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
    3354             :   // Only perform this optimization up until type legalization, before
    3355             :   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
    3356             :   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
    3357             :   // we don't want to undo this promotion.
    3358             :   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
    3359             :   // on scalars.
    3360       49868 :   if ((N0.getOpcode() == ISD::BITCAST ||
    3361       65906 :        N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
    3362       16038 :        Level <= AfterLegalizeTypes) {
    3363         938 :     SDValue In0 = N0.getOperand(0);
    3364         938 :     SDValue In1 = N1.getOperand(0);
    3365         938 :     EVT In0Ty = In0.getValueType();
    3366         938 :     EVT In1Ty = In1.getValueType();
    3367         826 :     SDLoc DL(N);
    3368             :     // If both incoming values are integers, and the original types are the
    3369             :     // same.
    3370         499 :     if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
    3371         336 :       SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
    3372         336 :       SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
    3373         112 :       AddToWorklist(Op.getNode());
    3374         112 :       return BC;
    3375             :     }
    3376             :   }
    3377             : 
    3378             :   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
    3379             :   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
    3380             :   // If both shuffles use the same mask, and both shuffle within a single
    3381             :   // vector, then it is worthwhile to move the swizzle after the operation.
    3382             :   // The type-legalizer generates this pattern when loading illegal
    3383             :   // vector types from memory. In many cases this allows additional shuffle
    3384             :   // optimizations.
    3385             :   // There are other cases where moving the shuffle after the xor/and/or
    3386             :   // is profitable even if shuffles don't perform a swizzle.
    3387             :   // If both shuffles use the same mask, and both shuffles have the same first
    3388             :   // or second operand, then it might still be profitable to move the shuffle
    3389             :   // after the xor/and/or operation.
    3390       65678 :   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
    3391         172 :     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
    3392         172 :     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
    3393             : 
    3394             :     assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
    3395             :            "Inputs to shuffles are not the same type");
    3396             : 
    3397             :     // Check that both shuffles use the same mask. The masks are known to be of
    3398             :     // the same length because the result vector type is the same.
    3399             :     // Check also that shuffles have only one use to avoid introducing extra
    3400             :     // instructions.
    3401         592 :     if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
    3402         314 :         SVN0->getMask().equals(SVN1->getMask())) {
    3403         212 :       SDValue ShOp = N0->getOperand(1);
    3404             : 
    3405             :       // Don't try to fold this node if it requires introducing a
    3406             :       // build vector of all zeros that might be illegal at this stage.
    3407         138 :       if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
    3408          22 :         if (!LegalTypes)
    3409          60 :           ShOp = DAG.getConstant(0, SDLoc(N), VT);
    3410             :         else
    3411           2 :           ShOp = SDValue();
    3412             :       }
    3413             : 
    3414             :       // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C)
    3415             :       // (OR  (shuf (A, C), shuf (B, C)) -> shuf (OR  (A, B), C)
    3416             :       // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0)
    3417         380 :       if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
    3418         186 :         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
    3419         372 :                                       N0->getOperand(0), N1->getOperand(0));
    3420          62 :         AddToWorklist(NewNode.getNode());
    3421         186 :         return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
    3422         124 :                                     SVN0->getMask());
    3423             :       }
    3424             : 
    3425             :       // Don't try to fold this node if it requires introducing a
    3426             :       // build vector of all zeros that might be illegal at this stage.
    3427          88 :       ShOp = N0->getOperand(0);
    3428          68 :       if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
    3429          12 :         if (!LegalTypes)
    3430          30 :           ShOp = DAG.getConstant(0, SDLoc(N), VT);
    3431             :         else
    3432           2 :           ShOp = SDValue();
    3433             :       }
    3434             : 
    3435             :       // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B))
    3436             :       // (OR  (shuf (C, A), shuf (C, B)) -> shuf (C, OR  (A, B))
    3437             :       // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B))
    3438         162 :       if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
    3439          90 :         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
    3440         180 :                                       N0->getOperand(1), N1->getOperand(1));
    3441          30 :         AddToWorklist(NewNode.getNode());
    3442          90 :         return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
    3443          60 :                                     SVN0->getMask());
    3444             :       }
    3445             :     }
    3446             :   }
    3447             : 
    3448       32747 :   return SDValue();
    3449             : }
    3450             : 
    3451             : /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
    3452      267106 : SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
    3453             :                                        const SDLoc &DL) {
    3454      267106 :   SDValue LL, LR, RL, RR, N0CC, N1CC;
    3455      305532 :   if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
    3456       38426 :       !isSetCCEquivalent(N1, RL, RR, N1CC))
    3457      265141 :     return SDValue();
    3458             : 
    3459             :   assert(N0.getValueType() == N1.getValueType() &&
    3460             :          "Unexpected operand types for bitwise logic op");
    3461             :   assert(LL.getValueType() == LR.getValueType() &&
    3462             :          RL.getValueType() == RR.getValueType() &&
    3463             :          "Unexpected operand types for setcc");
    3464             : 
    3465             :   // If we're here post-legalization or the logic op type is not i1, the logic
    3466             :   // op type must match a setcc result type. Also, all folds require new
    3467             :   // operations on the left and right operands, so those types must match.
    3468        3930 :   EVT VT = N0.getValueType();
    3469        3930 :   EVT OpVT = LL.getValueType();
    3470        3507 :   if (LegalOperations || VT != MVT::i1)
    3471        1388 :     if (VT != getSetCCResultType(OpVT))
    3472         340 :       return SDValue();
    3473        3250 :   if (OpVT != RL.getValueType())
    3474          69 :     return SDValue();
    3475             : 
    3476        1556 :   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
    3477        1556 :   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
    3478        1556 :   bool IsInteger = OpVT.isInteger();
    3479         416 :   if (LR == RR && CC0 == CC1 && IsInteger) {
    3480         225 :     bool IsZero = isNullConstantOrNullSplatConstant(LR);
    3481         225 :     bool IsNeg1 = isAllOnesConstantOrAllOnesSplatConstant(LR);
    3482             : 
    3483             :     // All bits clear?
    3484         225 :     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
    3485             :     // All sign bits clear?
    3486         225 :     bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
    3487             :     // Any bits set?
    3488         225 :     bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
    3489             :     // Any sign bits set?
    3490         225 :     bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
    3491             : 
    3492             :     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
    3493             :     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
    3494             :     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
    3495             :     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
    3496         225 :     if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
    3497         220 :       SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
    3498          55 :       AddToWorklist(Or.getNode());
    3499          55 :       return DAG.getSetCC(DL, VT, Or, LR, CC1);
    3500             :     }
    3501             : 
    3502             :     // All bits set?
    3503         170 :     bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
    3504             :     // All sign bits set?
    3505         170 :     bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
    3506             :     // Any bits clear?
    3507         170 :     bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
    3508             :     // Any sign bits clear?
    3509         170 :     bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
    3510             : 
    3511             :     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
    3512             :     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
    3513             :     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
    3514             :     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
    3515         170 :     if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
    3516          88 :       SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
    3517          22 :       AddToWorklist(And.getNode());
    3518          22 :       return DAG.getSetCC(DL, VT, And, LR, CC1);
    3519             :     }
    3520             :   }
    3521             : 
    3522             :   // TODO: What is the 'or' equivalent of this fold?
    3523             :   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
    3524        1699 :   if (IsAnd && LL == RL && CC0 == CC1 && IsInteger && CC0 == ISD::SETNE &&
    3525          52 :       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
    3526          29 :        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
    3527           3 :     SDValue One = DAG.getConstant(1, DL, OpVT);
    3528           3 :     SDValue Two = DAG.getConstant(2, DL, OpVT);
    3529          12 :     SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
    3530           3 :     AddToWorklist(Add.getNode());
    3531           3 :     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
    3532             :   }
    3533             : 
    3534             :   // Try more general transforms if the predicates match and the only user of
    3535             :   // the compares is the 'and' or 'or'.
    3536        1185 :   if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
    3537        2220 :       N0.hasOneUse() && N1.hasOneUse()) {
    3538             :     // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
    3539             :     // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
    3540         248 :     if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
    3541         308 :       SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
    3542         308 :       SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
    3543         154 :       SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
    3544          77 :       SDValue Zero = DAG.getConstant(0, DL, OpVT);
    3545          77 :       return DAG.getSetCC(DL, VT, Or, Zero, CC1);
    3546             :     }
    3547             :   }
    3548             : 
    3549             :   // Canonicalize equivalent operands to LL == RL.
    3550          21 :   if (LL == RR && LR == RL) {
    3551           0 :     CC1 = ISD::getSetCCSwappedOperands(CC1);
    3552             :     std::swap(RL, RR);
    3553             :   }
    3554             : 
    3555             :   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
    3556             :   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
    3557         419 :   if (LL == RL && LR == RR) {
    3558         107 :     ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
    3559         107 :                                 : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
    3560         135 :     if (NewCC != ISD::SETCC_INVALID &&
    3561          65 :         (!LegalOperations ||
    3562          66 :          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
    3563           0 :           TLI.isOperationLegal(ISD::SETCC, OpVT))))
    3564          32 :       return DAG.getSetCC(DL, VT, LL, LR, NewCC);
    3565             :   }
    3566             : 
    3567        1367 :   return SDValue();
    3568             : }
    3569             : 
    3570             : /// This contains all DAGCombine rules which reduce two values combined by
    3571             : /// an And operation to a single value. This makes them reusable in the context
    3572             : /// of visitSELECT(). Rules involving constants are not included as
    3573             : /// visitSELECT() already handles those cases.
    3574      168387 : SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
    3575      336774 :   EVT VT = N1.getValueType();
    3576      336774 :   SDLoc DL(N);
    3577             : 
    3578             :   // fold (and x, undef) -> 0
    3579      505160 :   if (N0.isUndef() || N1.isUndef())
    3580           1 :     return DAG.getConstant(0, DL, VT);
    3581             : 
    3582      168386 :   if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
    3583          71 :     return V;
    3584             : 
    3585      340649 :   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
    3586           5 :       VT.getSizeInBits() <= 64) {
    3587          15 :     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
    3588          13 :       APInt ADDC = ADDI->getAPIntValue();
    3589          10 :       if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
    3590             :         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
    3591             :         // immediate for an add, but it is legal if its top c2 bits are set,
    3592             :         // transform the ADD so the immediate doesn't need to be materialized
    3593             :         // in a register.
    3594           6 :         if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
    3595             :           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
    3596           2 :                                              SRLI->getZExtValue());
    3597           4 :           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
    3598           2 :             ADDC |= Mask;
    3599           4 :             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
    3600           4 :               SDLoc DL0(N0);
    3601             :               SDValue NewAdd =
    3602           2 :                 DAG.getNode(ISD::ADD, DL0, VT,
    3603           4 :                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
    3604           4 :               CombineTo(N0.getNode(), NewAdd);
    3605             :               // Return N so it doesn't get rechecked!
    3606           2 :               return SDValue(N, 0);
    3607             :             }
    3608             :           }
    3609             :         }
    3610             :       }
    3611             :     }
    3612             :   }
    3613             : 
    3614             :   // Reduce bit extract of low half of an integer to the narrower type.
    3615             :   // (and (srl i64:x, K), KMask) ->
    3616             :   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
    3617      362367 :   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
    3618       20463 :     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
    3619       58432 :       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
    3620       17506 :         unsigned Size = VT.getSizeInBits();
    3621       17506 :         const APInt &AndMask = CAnd->getAPIntValue();
    3622       17506 :         unsigned ShiftBits = CShift->getZExtValue();
    3623             : 
    3624             :         // Bail out, this node will probably disappear anyway.
    3625       17506 :         if (ShiftBits == 0)
    3626           2 :           return SDValue();
    3627             : 
    3628       17504 :         unsigned MaskBits = AndMask.countTrailingOnes();
    3629       17504 :         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
    3630             : 
    3631       33784 :         if (AndMask.isMask() &&
    3632             :             // Required bits must not span the two halves of the integer and
    3633             :             // must fit in the half size type.
    3634       26856 :             (ShiftBits + MaskBits <= Size / 2) &&
    3635       10696 :             TLI.isNarrowingProfitable(VT, HalfVT) &&
    3636         212 :             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
    3637         184 :             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
    3638       17688 :             TLI.isTruncateFree(VT, HalfVT) &&
    3639          92 :             TLI.isZExtFree(HalfVT, VT)) {
    3640             :           // The isNarrowingProfitable is to avoid regressions on PPC and
    3641             :           // AArch64 which match a few 64-bit bit insert / bit extract patterns
    3642             :           // on downstream users of this. Those patterns could probably be
    3643             :           // extended to handle extensions mixed in.
    3644             : 
    3645          86 :           SDValue SL(N0);
    3646             :           assert(MaskBits <= Size);
    3647             : 
    3648             :           // Extracting the highest bit of the low half.
    3649         172 :           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
    3650          86 :           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
    3651         344 :                                       N0.getOperand(0));
    3652             : 
    3653         344 :           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
    3654         258 :           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
    3655         344 :           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
    3656         344 :           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
    3657         344 :           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
    3658             :         }
    3659             :       }
    3660             :     }
    3661             :   }
    3662             : 
    3663      168225 :   return SDValue();
    3664             : }
    3665             : 
    3666       11778 : bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
    3667             :                                    EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
    3668             :                                    bool &NarrowLoad) {
    3669       35334 :   uint32_t ActiveBits = AndC->getAPIntValue().getActiveBits();
    3670             : 
    3671       23556 :   if (ActiveBits == 0 || !AndC->getAPIntValue().isMask(ActiveBits))
    3672             :     return false;
    3673             : 
    3674        7017 :   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
    3675        7017 :   LoadedVT = LoadN->getMemoryVT();
    3676             : 
    3677        8135 :   if (ExtVT == LoadedVT &&
    3678        1812 :       (!LegalOperations ||
    3679        1652 :        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
    3680             :     // ZEXTLOAD will match without needing to change the size of the value being
    3681             :     // loaded.
    3682         160 :     NarrowLoad = false;
    3683             :     return true;
    3684             :   }
    3685             : 
    3686             :   // Do not change the width of a volatile load.
    3687       13714 :   if (LoadN->isVolatile())
    3688             :     return false;
    3689             : 
    3690             :   // Do not generate loads of non-round integer types since these can
    3691             :   // be expensive (and would be wrong if the type is not byte sized).
    3692       10638 :   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
    3693             :     return false;
    3694             : 
    3695        7598 :   if (LegalOperations &&
    3696        7434 :       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
    3697             :     return false;
    3698             : 
    3699         255 :   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
    3700             :     return false;
    3701             : 
    3702          26 :   NarrowLoad = true;
    3703             :   return true;
    3704             : }
    3705             : 
    3706      201432 : SDValue DAGCombiner::visitAND(SDNode *N) {
    3707      402864 :   SDValue N0 = N->getOperand(0);
    3708      402864 :   SDValue N1 = N->getOperand(1);
    3709      402864 :   EVT VT = N1.getValueType();
    3710             : 
    3711             :   // x & x --> x
    3712      201432 :   if (N0 == N1)
    3713          12 :     return N0;
    3714             : 
    3715             :   // fold vector ops
    3716      201420 :   if (VT.isVector()) {
    3717       30882 :     if (SDValue FoldedVOp = SimplifyVBinOp(N))
    3718         954 :       return FoldedVOp;
    3719             : 
    3720             :     // fold (and x, 0) -> 0, vector edition
    3721       29928 :     if (ISD::isBuildVectorAllZeros(N0.getNode()))
    3722             :       // do not return N0, because undef node may exist in N0
    3723         153 :       return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
    3724         255 :                              SDLoc(N), N0.getValueType());
    3725       29877 :     if (ISD::isBuildVectorAllZeros(N1.getNode()))
    3726             :       // do not return N1, because undef node may exist in N1
    3727           3 :       return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
    3728           5 :                              SDLoc(N), N1.getValueType());
    3729             : 
    3730             :     // fold (and x, -1) -> x, vector edition
    3731       29876 :     if (ISD::isBuildVectorAllOnes(N0.getNode()))
    3732          30 :       return N1;
    3733       29846 :     if (ISD::isBuildVectorAllOnes(N1.getNode()))
    3734           1 :       return N0;
    3735             :   }
    3736             : 
    3737             :   // fold (and c1, c2) -> c1&c2
    3738      400766 :   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
    3739      200383 :   ConstantSDNode *N1C = isConstOrConstSplat(N1);
    3740      200397 :   if (N0C && N1C && !N1C->isOpaque())
    3741          42 :     return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
    3742             :   // canonicalize constant to RHS
    3743      201278 :   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
    3744         909 :      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
    3745        3620 :     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
    3746             :   // fold (and x, -1) -> x
    3747      199464 :   if (isAllOnesConstant(N1))
    3748           8 :     return N0;
    3749             :   // if (and x, c) is known to be zero, return 0
    3750      199456 :   unsigned BitWidth = VT.getScalarSizeInBits();
    3751      728416 :   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
    3752      563639 :                                    APInt::getAllOnesValue(BitWidth)))
    3753          75 :     return DAG.getConstant(0, SDLoc(N), VT);
    3754             : 
    3755      199431 :   if (SDValue NewSel = foldBinOpIntoSelect(N))
    3756          27 :     return NewSel;
    3757             : 
    3758             :   // reassociate and
    3759      398808 :   if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
    3760        1841 :     return RAND;
    3761             :   // fold (and (or x, C), D) -> D if (C & D) == D
    3762      360453 :   if (N1C && N0.getOpcode() == ISD::OR)
    3763       17358 :     if (ConstantSDNode *ORI = isConstOrConstSplat(N0.getOperand(1)))
    3764       22700 :       if (N1C->getAPIntValue().isSubsetOf(ORI->getAPIntValue()))
    3765        2252 :         return N1;
    3766             :   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
    3767      355949 :   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
    3768       53630 :     SDValue N0Op0 = N0.getOperand(0);
    3769      135967 :     APInt Mask = ~N1C->getAPIntValue();
    3770       80445 :     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
    3771       26815 :     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
    3772       74769 :       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
    3773       99692 :                                  N0.getValueType(), N0Op0);
    3774             : 
    3775             :       // Replace uses of the AND with uses of the Zero extend node.
    3776       24923 :       CombineTo(N, Zext);
    3777             : 
    3778             :       // We actually want to replace all uses of the any_extend with the
    3779             :       // zero_extend, to avoid duplicating things.  This will later cause this
    3780             :       // AND to be folded.
    3781       49846 :       CombineTo(N0.getNode(), Zext);
    3782       24923 :       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
    3783             :     }
    3784             :   }
    3785             :   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
    3786             :   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
    3787             :   // already be zero by virtue of the width of the base type of the load.
    3788             :   //
    3789             :   // the 'X' node here can either be nothing or an extract_vector_elt to catch
    3790             :   // more cases.
    3791      178164 :   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
    3792       22466 :        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
    3793       16712 :        N0.getOperand(0).getOpcode() == ISD::LOAD &&
    3794      343660 :        N0.getOperand(0).getResNo() == 0) ||
    3795      189798 :       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
    3796       53238 :     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
    3797       25177 :                                          N0 : N0.getOperand(0) );
    3798             : 
    3799             :     // Get the constant (if applicable) the zero'th operand is being ANDed with.
    3800             :     // This can be a pure constant or a vector splat, in which case we treat the
    3801             :     // vector as a scalar and use the splat value.
    3802       48485 :     APInt Constant = APInt::getNullValue(1);
    3803       21053 :     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
    3804       21053 :       Constant = C->getAPIntValue();
    3805        1121 :     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
    3806        4484 :       APInt SplatValue, SplatUndef;
    3807             :       unsigned SplatBitSize;
    3808             :       bool HasAnyUndefs;
    3809             :       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
    3810        1121 :                                              SplatBitSize, HasAnyUndefs);
    3811        1121 :       if (IsSplat) {
    3812             :         // Undef bits can contribute to a possible optimisation if set, so
    3813             :         // set them.
    3814        1121 :         SplatValue |= SplatUndef;
    3815             : 
    3816             :         // The splat value may be something like "0x00FFFFFF", which means 0 for
    3817             :         // the first vector value and FF for the rest, repeating. We need a mask
    3818             :         // that will apply equally to all members of the vector, so AND all the
    3819             :         // lanes of the constant together.
    3820        2242 :         EVT VT = Vector->getValueType(0);
    3821        1121 :         unsigned BitWidth = VT.getScalarSizeInBits();
    3822             : 
    3823             :         // If the splat value has been compressed to a bitlength lower
    3824             :         // than the size of the vector lane, we need to re-expand it to
    3825             :         // the lane size.
    3826        1121 :         if (BitWidth > SplatBitSize)
    3827           4 :           for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
    3828           2 :                SplatBitSize < BitWidth;
    3829           1 :                SplatBitSize = SplatBitSize * 2)
    3830           3 :             SplatValue |= SplatValue.shl(SplatBitSize);
    3831             : 
    3832             :         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
    3833             :         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
    3834        1121 :         if (SplatBitSize % BitWidth == 0) {
    3835        3360 :           Constant = APInt::getAllOnesValue(BitWidth);
    3836        2292 :           for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
    3837        4688 :             Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
    3838             :         }
    3839             :       }
    3840             :     }
    3841             : 
    3842             :     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
    3843             :     // actually legal and isn't going to get expanded, else this is a false
    3844             :     // optimisation.
    3845       50354 :     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
    3846             :                                                     Load->getValueType(0),
    3847       50354 :                                                     Load->getMemoryVT());
    3848             : 
    3849             :     // Resize the constant to the same size as the original memory access before
    3850             :     // extension. If it is still the AllOnesValue then this AND is completely
    3851             :     // unneeded.
    3852      100708 :     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
    3853             : 
    3854             :     bool B;
    3855       25177 :     switch (Load->getExtensionType()) {
    3856             :     default: B = false; break;
    3857        5049 :     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
    3858             :     case ISD::ZEXTLOAD:
    3859             :     case ISD::NON_EXTLOAD: B = true; break;
    3860             :     }
    3861             : 
    3862       26583 :     if (B && Constant.isAllOnesValue()) {
    3863             :       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
    3864             :       // preserve semantics once we get rid of the AND.
    3865        1869 :       SDValue NewLoad(Load, 0);
    3866             : 
    3867             :       // Fold the AND away. NewLoad may get replaced immediately.
    3868        3738 :       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
    3869             : 
    3870        1869 :       if (Load->getExtensionType() == ISD::EXTLOAD) {
    3871        2384 :         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
    3872        2384 :                               Load->getValueType(0), SDLoc(Load),
    3873        2384 :                               Load->getChain(), Load->getBasePtr(),
    3874        1192 :                               Load->getOffset(), Load->getMemoryVT(),
    3875        4768 :                               Load->getMemOperand());
    3876             :         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
    3877        1192 :         if (Load->getNumValues() == 3) {
    3878             :           // PRE/POST_INC loads have 3 values.
    3879             :           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
    3880           0 :                            NewLoad.getValue(2) };
    3881           0 :           CombineTo(Load, To, 3, true);
    3882             :         } else {
    3883        3576 :           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
    3884             :         }
    3885             :       }
    3886             : 
    3887        1869 :       return SDValue(N, 0); // Return N so it doesn't get rechecked!
    3888             :     }
    3889             :   }
    3890             : 
    3891             :   // fold (and (load x), 255) -> (zextload x, i8)
    3892             :   // fold (and (extload x, i16), 255) -> (zextload x, i8)
    3893             :   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
    3894      416647 :   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
    3895      117516 :                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
    3896        3732 :                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
    3897       33848 :     bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
    3898             :     LoadSDNode *LN0 = HasAnyExt
    3899       17116 :       ? cast<LoadSDNode>(N0.getOperand(0))
    3900       16924 :       : cast<LoadSDNode>(N0);
    3901       33591 :     if (LN0->getExtensionType() != ISD::SEXTLOAD &&
    3902       78759 :         LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) {
    3903       11774 :       auto NarrowLoad = false;
    3904       11804 :       EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
    3905       11774 :       EVT ExtVT, LoadedVT;
    3906       11774 :       if (isAndLoadExtLoad(N1C, LN0, LoadResultTy, ExtVT, LoadedVT,
    3907             :                            NarrowLoad)) {
    3908         185 :         if (!NarrowLoad) {
    3909             :           SDValue NewLoad =
    3910         480 :             DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
    3911         320 :                            LN0->getChain(), LN0->getBasePtr(), ExtVT,
    3912         320 :                            LN0->getMemOperand());
    3913         160 :           AddToWorklist(N);
    3914         480 :           CombineTo(LN0, NewLoad, NewLoad.getValue(1));
    3915         160 :           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
    3916             :         } else {
    3917          75 :           EVT PtrType = LN0->getOperand(1).getValueType();
    3918             : 
    3919          50 :           unsigned Alignment = LN0->getAlignment();
    3920          25 :           SDValue NewPtr = LN0->getBasePtr();
    3921             : 
    3922             :           // For big endian targets, we need to add an offset to the pointer
    3923             :           // to load the correct bytes.  For little endian systems, we merely
    3924             :           // need to read fewer bytes from the same pointer.
    3925          50 :           if (DAG.getDataLayout().isBigEndian()) {
    3926           4 :             unsigned LVTStoreBytes = LoadedVT.getStoreSize();
    3927           4 :             unsigned EVTStoreBytes = ExtVT.getStoreSize();
    3928           4 :             unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
    3929           8 :             SDLoc DL(LN0);
    3930           8 :             NewPtr = DAG.getNode(ISD::ADD, DL, PtrType,
    3931           8 :                                  NewPtr, DAG.getConstant(PtrOff, DL, PtrType));
    3932           8 :             Alignment = MinAlign(Alignment, PtrOff);
    3933             :           }
    3934             : 
    3935          25 :           AddToWorklist(NewPtr.getNode());
    3936             : 
    3937          25 :           SDValue Load = DAG.getExtLoad(
    3938         100 :               ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), NewPtr,
    3939          25 :               LN0->getPointerInfo(), ExtVT, Alignment,
    3940         100 :               LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
    3941          25 :           AddToWorklist(N);
    3942          75 :           CombineTo(LN0, Load, Load.getValue(1));
    3943          25 :           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
    3944             :         }
    3945             :       }
    3946             :     }
    3947             :   }
    3948             : 
    3949      168334 :   if (SDValue Combined = visitANDLike(N0, N1, N))
    3950         158 :     return Combined;
    3951             : 
    3952             :   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
    3953      504528 :   if (N0.getOpcode() == N1.getOpcode())
    3954       14458 :     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
    3955         206 :       return Tmp;
    3956             : 
    3957             :   // Masking the negated extension of a boolean is just the zero-extended
    3958             :   // boolean:
    3959             :   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
    3960             :   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
    3961             :   //
    3962             :   // Note: the SimplifyDemandedBits fold below can make an information-losing
    3963             :   // transform, and then we have no way to find this better fold.
    3964      367074 :   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
    3965          56 :     if (isNullConstantOrNullSplatConstant(N0.getOperand(0))) {
    3966          12 :       SDValue SubRHS = N0.getOperand(1);
    3967          15 :       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
    3968           6 :           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
    3969           3 :         return SubRHS;
    3970           9 :       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
    3971           6 :           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
    3972          15 :         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
    3973             :     }
    3974             :   }
    3975             : 
    3976             :   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
    3977             :   // fold (and (sra)) -> (and (srl)) when possible.
    3978      167964 :   if (SimplifyDemandedBits(SDValue(N, 0)))
    3979       51177 :     return SDValue(N, 0);
    3980             : 
    3981             :   // fold (zext_inreg (extload x)) -> (zextload x)
    3982      122483 :   if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
    3983        2848 :     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
    3984        2848 :     EVT MemVT = LN0->getMemoryVT();
    3985             :     // If we zero all the possible extended bits, then we can turn this into
    3986             :     // a zextload if we are running before legalize or the operation is legal.
    3987        2848 :     unsigned BitWidth = N1.getScalarValueSizeInBits();
    3988        8544 :     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
    3989        8491 :                            BitWidth - MemVT.getScalarSizeInBits())) &&
    3990        6260 :         ((!LegalOperations && !LN0->isVolatile()) ||
    3991        4250 :          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
    3992        2799 :       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
    3993        1866 :                                        LN0->getChain(), LN0->getBasePtr(),
    3994        1866 :                                        MemVT, LN0->getMemOperand());
    3995         933 :       AddToWorklist(N);
    3996        2799 :       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
    3997         933 :       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
    3998             :     }
    3999             :   }
    4000             :   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
    4001      116652 :   if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
    4002         266 :       N0.hasOneUse()) {
    4003         249 :     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
    4004         249 :     EVT MemVT = LN0->getMemoryVT();
    4005             :     // If we zero all the possible extended bits, then we can turn this into
    4006             :     // a zextload if we are running before legalize or the operation is legal.
    4007         249 :     unsigned BitWidth = N1.getScalarValueSizeInBits();
    4008         747 :     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
    4009         732 :                            BitWidth - MemVT.getScalarSizeInBits())) &&
    4010         476 :         ((!LegalOperations && !LN0->isVolatile()) ||
    4011         452 :          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
    4012         654 :       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
    4013         436 :                                        LN0->getChain(), LN0->getBasePtr(),
    4014         436 :                                        MemVT, LN0->getMemOperand());
    4015         218 :       AddToWorklist(N);
    4016         654 :       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
    4017         218 :       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
    4018             :     }
    4019             :   }
    4020             :   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
    4021      215318 :   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
    4022         675 :     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
    4023         675 :                                            N0.getOperand(1), false))
    4024           1 :       return BSwap;
    4025             :   }
    4026             : 
    4027      115635 :   return SDValue();
    4028             : }
    4029             : 
    4030             : /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
    4031       98760 : SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
    4032             :                                         bool DemandHighBits) {
    4033       98760 :   if (!LegalOperations)
    4034       44601 :     return SDValue();
    4035             : 
    4036      108318 :   EVT VT = N->getValueType(0);
    4037      108011 :   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
    4038        8898 :     return SDValue();
    4039       45261 :   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
    4040       12861 :     return SDValue();
    4041             : 
    4042             :   // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
    4043       32400 :   bool LookPassAnd0 = false;
    4044       32400 :   bool LookPassAnd1 = false;
    4045       69000 :   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
    4046             :       std::swap(N0, N1);
    4047       68322 :   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
    4048             :       std::swap(N0, N1);
    4049       64800 :   if (N0.getOpcode() == ISD::AND) {
    4050        3648 :     if (!N0.getNode()->hasOneUse())
    4051          86 :       return SDValue();
    4052        5309 :     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
    4053        1747 :     if (!N01C || N01C->getZExtValue() != 0xFF00)
    4054        1775 :       return SDValue();
    4055          12 :     N0 = N0.getOperand(0);
    4056           6 :     LookPassAnd0 = true;
    4057             :   }
    4058             : 
    4059       61078 :   if (N1.getOpcode() == ISD::AND) {
    4060        2851 :     if (!N1.getNode()->hasOneUse())
    4061          19 :       return SDValue();
    4062        4247 :     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
    4063        1415 :     if (!N11C || N11C->getZExtValue() != 0xFF)
    4064        1383 :       return SDValue();
    4065          66 :     N1 = N1.getOperand(0);
    4066          33 :     LookPassAnd1 = true;
    4067             :   }
    4068             : 
    4069       58678 :   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
    4070             :     std::swap(N0, N1);
    4071       60928 :   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
    4072       28579 :     return SDValue();
    4073        1667 :   if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
    4074           7 :     return SDValue();
    4075             : 
    4076        1653 :   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
    4077        1653 :   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
    4078         551 :   if (!N01C || !N11C)
    4079         204 :     return SDValue();
    4080         380 :   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
    4081         327 :     return SDValue();
    4082             : 
    4083             :   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
    4084          40 :   SDValue N00 = N0->getOperand(0);
    4085          54 :   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
    4086          12 :     if (!N00.getNode()->hasOneUse())
    4087           0 :       return SDValue();
    4088          18 :     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
    4089           6 :     if (!N001C || N001C->getZExtValue() != 0xFF)
    4090           2 :       return SDValue();
    4091           8 :     N00 = N00.getOperand(0);
    4092           4 :     LookPassAnd0 = true;
    4093             :   }
    4094             : 
    4095          36 :   SDValue N10 = N1->getOperand(0);
    4096          30 :   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
    4097          10 :     if (!N10.getNode()->hasOneUse())
    4098           0 :       return SDValue();
    4099          15 :     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
    4100           5 :     if (!N101C || N101C->getZExtValue() != 0xFF00)
    4101           0 :       return SDValue();
    4102          10 :     N10 = N10.getOperand(0);
    4103           5 :     LookPassAnd1 = true;
    4104             :   }
    4105             : 
    4106          18 :   if (N00 != N10)
    4107           0 :     return SDValue();
    4108             : 
    4109             :   // Make sure everything beyond the low halfword gets set to zero since the SRL
    4110             :   // 16 will clear the top bits.
    4111          18 :   unsigned OpSizeInBits = VT.getSizeInBits();
    4112          18 :   if (DemandHighBits && OpSizeInBits > 16) {
    4113             :     // If the left-shift isn't masked out then the only way this is a bswap is
    4114             :     // if all bits beyond the low 8 are 0. In that case the entire pattern
    4115             :     // reduces to a left shift anyway: leave it for other parts of the combiner.
    4116           9 :     if (!LookPassAnd0)
    4117           2 :       return SDValue();
    4118             : 
    4119             :     // However, if the right shift isn't masked out then it might be because
    4120             :     // it's not needed. See if we can spot that too.
    4121          16 :     if (!LookPassAnd1 &&
    4122           4 :         !DAG.MaskedValueIsZero(
    4123          16 :             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
    4124           0 :       return SDValue();
    4125             :   }
    4126             : 
    4127          64 :   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
    4128          16 :   if (OpSizeInBits > 16) {
    4129          32 :     SDLoc DL(N);
    4130          32 :     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
    4131          16 :                       DAG.getConstant(OpSizeInBits - 16, DL,
    4132          48 :                                       getShiftAmountTy(VT)));
    4133             :   }
    4134          16 :   return Res;
    4135             : }
    4136             : 
    4137             : /// Return true if the specified node is an element that makes up a 32-bit
    4138             : /// packed halfword byteswap.
    4139             : /// ((x & 0x000000ff) << 8) |
    4140             : /// ((x & 0x0000ff00) >> 8) |
    4141             : /// ((x & 0x00ff0000) << 8) |
    4142             : /// ((x & 0xff000000) >> 8)
    4143         475 : static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
    4144         879 :   if (!N.getNode()->hasOneUse())
    4145             :     return false;
    4146             : 
    4147         808 :   unsigned Opc = N.getOpcode();
    4148         404 :   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
    4149             :     return false;
    4150             : 
    4151         284 :   SDValue N0 = N.getOperand(0);
    4152         284 :   unsigned Opc0 = N0.getOpcode();
    4153         142 :   if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
    4154             :     return false;
    4155             : 
    4156          38 :   ConstantSDNode *N1C = nullptr;
    4157             :   // SHL or SRL: look upstream for AND mask operand
    4158          38 :   if (Opc == ISD::AND)
    4159          48 :     N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
    4160          14 :   else if (Opc0 == ISD::AND)
    4161          28 :     N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
    4162          38 :   if (!N1C)
    4163             :     return false;
    4164             : 
    4165             :   unsigned MaskByteOffset;
    4166          76 :   switch (N1C->getZExtValue()) {
    4167             :   default:
    4168             :     return false;
    4169             :   case 0xFF:       MaskByteOffset = 0; break;
    4170          10 :   case 0xFF00:     MaskByteOffset = 1; break;
    4171          10 :   case 0xFF0000:   MaskByteOffset = 2; break;
    4172           8 :   case 0xFF000000: MaskByteOffset = 3; break;
    4173             :   }
    4174             : 
    4175             :   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
    4176          38 :   if (Opc == ISD::AND) {
    4177          24 :     if (MaskByteOffset == 0 || MaskByteOffset == 2) {
    4178             :       // (x >> 8) & 0xff
    4179             :       // (x >> 8) & 0xff0000
    4180          12 :       if (Opc0 != ISD::SRL)
    4181             :         return false;
    4182          36 :       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
    4183          12 :       if (!C || C->getZExtValue() != 8)
    4184             :         return false;
    4185             :     } else {
    4186             :       // (x << 8) & 0xff00
    4187             :       // (x << 8) & 0xff000000
    4188          12 :       if (Opc0 != ISD::SHL)
    4189             :         return false;
    4190          36 :       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
    4191          12 :       if (!C || C->getZExtValue() != 8)
    4192             :         return false;
    4193             :     }
    4194          14 :   } else if (Opc == ISD::SHL) {
    4195             :     // (x & 0xff) << 8
    4196             :     // (x & 0xff0000) << 8
    4197           8 :     if (MaskByteOffset != 0 && MaskByteOffset != 2)
    4198             :       return false;
    4199          24 :     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
    4200           8 :     if (!C || C->getZExtValue() != 8)
    4201             :       return false;
    4202             :   } else { // Opc == ISD::SRL
    4203             :     // (x & 0xff00) >> 8
    4204             :     // (x & 0xff000000) >> 8
    4205           6 :     if (MaskByteOffset != 1 && MaskByteOffset != 3)
    4206             :       return false;
    4207          18 :     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
    4208           6 :     if (!C || C->getZExtValue() != 8)
    4209             :       return false;
    4210             :   }
    4211             : 
    4212          76 :   if (Parts[MaskByteOffset])
    4213             :     return false;
    4214             : 
    4215         114 :   Parts[MaskByteOffset] = N0.getOperand(0).getNode();
    4216             :   return true;
    4217             : }
    4218             : 
    4219             : /// Match a 32-bit packed halfword bswap. That is
    4220             : /// ((x & 0x000000ff) << 8) |
    4221             : /// ((x & 0x0000ff00) >> 8) |
    4222             : /// ((x & 0x00ff0000) << 8) |
    4223             : /// ((x & 0xff000000) >> 8)
    4224             : /// => (rotl (bswap x), 16)
    4225       98489 : SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
    4226       98489 :   if (!LegalOperations)
    4227       44463 :     return SDValue();
    4228             : 
    4229      108052 :   EVT VT = N->getValueType(0);
    4230       54026 :   if (VT != MVT::i32)
    4231       19185 :     return SDValue();
    4232       34841 :   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
    4233        9551 :     return SDValue();
    4234             : 
    4235             :   // Look for either
    4236             :   // (or (or (and), (and)), (or (and), (and)))
    4237             :   // (or (or (or (and), (and)), (and)), (and))
    4238       50580 :   if (N0.getOpcode() != ISD::OR)
    4239       24845 :     return SDValue();
    4240         890 :   SDValue N00 = N0.getOperand(0);
    4241         890 :   SDValue N01 = N0.getOperand(1);
    4242         445 :   SDNode *Parts[4] = {};
    4243             : 
    4244         478 :   if (N1.getOpcode() == ISD::OR &&
    4245         509 :       N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
    4246             :     // (or (or (and), (and)), (or (and), (and)))
    4247          32 :     if (!isBSwapHWordElement(N00, Parts))
    4248          26 :       return SDValue();
    4249             : 
    4250           6 :     if (!isBSwapHWordElement(N01, Parts))
    4251           0 :       return SDValue();
    4252          12 :     SDValue N10 = N1.getOperand(0);
    4253           6 :     if (!isBSwapHWordElement(N10, Parts))
    4254           0 :       return SDValue();
    4255          12 :     SDValue N11 = N1.getOperand(1);
    4256           6 :     if (!isBSwapHWordElement(N11, Parts))
    4257           2 :       return SDValue();
    4258             :   } else {
    4259             :     // (or (or (or (and), (and)), (and)), (and))
    4260         413 :     if (!isBSwapHWordElement(N1, Parts))
    4261         409 :       return SDValue();
    4262           4 :     if (!isBSwapHWordElement(N01, Parts))
    4263           0 :       return SDValue();
    4264           8 :     if (N00.getOpcode() != ISD::OR)
    4265           0 :       return SDValue();
    4266           8 :     SDValue N000 = N00.getOperand(0);
    4267           4 :     if (!isBSwapHWordElement(N000, Parts))
    4268           0 :       return SDValue();
    4269           8 :     SDValue N001 = N00.getOperand(1);
    4270           4 :     if (!isBSwapHWordElement(N001, Parts))
    4271           0 :       return SDValue();
    4272             :   }
    4273             : 
    4274             :   // Make sure the parts are all coming from the same node.
    4275           8 :   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
    4276           0 :     return SDValue();
    4277             : 
    4278           8 :   SDLoc DL(N);
    4279           8 :   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
    4280          16 :                               SDValue(Parts[0], 0));
    4281             : 
    4282             :   // Result of the bswap should be rotated by 16. If it's not legal, then
    4283             :   // do  (x << 16) | (x >> 16).
    4284           8 :   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
    4285           8 :   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
    4286           8 :     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
    4287           4 :   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
    4288           8 :     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
    4289           0 :   return DAG.getNode(ISD::OR, DL, VT,
    4290           0 :                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
    4291           0 :                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
    4292             : }
    4293             : 
    4294             : /// This contains all DAGCombine rules which reduce two values combined by
    4295             : /// an Or operation to a single value \see visitANDLike().
    4296       98731 : SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
    4297      197462 :   EVT VT = N1.getValueType();
    4298      197462 :   SDLoc DL(N);
    4299             : 
    4300             :   // fold (or x, undef) -> -1
    4301      188126 :   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
    4302          11 :     return DAG.getAllOnesConstant(DL, VT);
    4303             : 
    4304       98720 :   if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
    4305         118 :     return V;
    4306             : 
    4307             :   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
    4308      210647 :   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
    4309             :       // Don't increase # computations.
    4310        4413 :       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
    4311             :     // We can only do this xform if we know that bits from X that are set in C2
    4312             :     // but not in C1 are already zero.  Likewise for Y.
    4313             :     if (const ConstantSDNode *N0O1C =
    4314       11268 :         getAsNonOpaqueConstant(N0.getOperand(1))) {
    4315             :       if (const ConstantSDNode *N1O1C =
    4316        7454 :           getAsNonOpaqueConstant(N1.getOperand(1))) {
    4317             :         // We can only do this xform if we know that bits from X that are set in
    4318             :         // C2 but not in C1 are already zero.  Likewise for Y.
    4319        2466 :         const APInt &LHSMask = N0O1C->getAPIntValue();
    4320        2466 :         const APInt &RHSMask = N1O1C->getAPIntValue();
    4321             : 
    4322       19955 :         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
    4323       10999 :             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
    4324          66 :           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
    4325         110 :                                   N0.getOperand(0), N1.getOperand(0));
    4326          22 :           return DAG.getNode(ISD::AND, DL, VT, X,
    4327         110 :                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
    4328             :         }
    4329             :       }
    4330             :     }
    4331             :   }
    4332             : 
    4333             :   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
    4334      112001 :   if (N0.getOpcode() == ISD::AND &&
    4335       13421 :       N1.getOpcode() == ISD::AND &&
    4336      111773 :       N0.getOperand(0) == N1.getOperand(0) &&
    4337             :       // Don't increase # computations.
    4338          64 :       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
    4339         180 :     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
    4340         300 :                             N0.getOperand(1), N1.getOperand(1));
    4341         180 :     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
    4342             :   }
    4343             : 
    4344       98520 :   return SDValue();
    4345             : }
    4346             : 
    4347       99942 : SDValue DAGCombiner::visitOR(SDNode *N) {
    4348      199884 :   SDValue N0 = N->getOperand(0);
    4349      199884 :   SDValue N1 = N->getOperand(1);
    4350      199884 :   EVT VT = N1.getValueType();
    4351             : 
    4352             :   // x | x --> x
    4353       99942 :   if (N0 == N1)
    4354           9 :     return N0;
    4355             : 
    4356             :   // fold vector ops
    4357       99933 :   if (VT.isVector()) {
    4358       11030 :     if (SDValue FoldedVOp = SimplifyVBinOp(N))
    4359           7 :       return FoldedVOp;
    4360             : 
    4361             :     // fold (or x, 0) -> x, vector edition
    4362       11023 :     if (ISD::isBuildVectorAllZeros(N0.getNode()))
    4363          26 :       return N1;
    4364       10997 :     if (ISD::isBuildVectorAllZeros(N1.getNode()))
    4365          66 :       return N0;
    4366             : 
    4367             :     // fold (or x, -1) -> -1, vector edition
    4368       10931 :     if (ISD::isBuildVectorAllOnes(N0.getNode()))
    4369             :       // do not return N0, because undef node may exist in N0
    4370           4 :       return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
    4371       10930 :     if (ISD::isBuildVectorAllOnes(N1.getNode()))
    4372             :       // do not return N1, because undef node may exist in N1
    4373           0 :       return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
    4374             : 
    4375             :     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
    4376             :     // Do this only if the resulting shuffle is legal.
    4377       11053 :     if (isa<ShuffleVectorSDNode>(N0) &&
    4378       11053 :         isa<ShuffleVectorSDNode>(N1) &&
    4379             :         // Avoid folding a node with illegal type.
    4380         225 :         TLI.isTypeLegal(VT)) {
    4381         224 :       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
    4382         224 :       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
    4383         224 :       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
    4384         224 :       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
    4385             :       // Ensure both shuffles have a zero input.
    4386         112 :       if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
    4387             :         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
    4388             :         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
    4389          61 :         const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
    4390          61 :         const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
    4391          61 :         bool CanFold = true;
    4392          61 :         int NumElts = VT.getVectorNumElements();
    4393         127 :         SmallVector<int, 4> Mask(NumElts);
    4394             : 
    4395         275 :         for (int i = 0; i != NumElts; ++i) {
    4396         438 :           int M0 = SV0->getMaskElt(i);
    4397         438 :           int M1 = SV1->getMaskElt(i);
    4398             : 
    4399             :           // Determine if either index is pointing to a zero vector.
    4400         219 :           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
    4401         219 :           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
    4402             : 
    4403             :           // If one element is zero and the otherside is undef, keep undef.
    4404             :           // This also handles the case that both are undef.
    4405         220 :           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
    4406           2 :             Mask[i] = -1;
    4407           1 :             continue;
    4408             :           }
    4409             : 
    4410             :           // Make sure only one of the elements is zero.
    4411         218 :           if (M0Zero == M1Zero) {
    4412             :             CanFold = false;
    4413             :             break;
    4414             :           }
    4415             : 
    4416             :           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
    4417             : 
    4418             :           // We have a zero and non-zero element. If the non-zero came from
    4419             :           // SV0 make the index a LHS index. If it came from SV1, make it
    4420             :           // a RHS index. We need to mod by NumElts because we don't care
    4421             :           // which operand it came from in the original shuffles.
    4422         426 :           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
    4423             :         }
    4424             : 
    4425          61 :         if (CanFold) {
    4426         112 :           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
    4427         112 :           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
    4428             : 
    4429         112 :           bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
    4430          56 :           if (!LegalMask) {
    4431           0 :             std::swap(NewLHS, NewRHS);
    4432           0 :             ShuffleVectorSDNode::commuteMask(Mask);
    4433           0 :             LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
    4434             :           }
    4435             : 
    4436          56 :           if (LegalMask)
    4437         224 :             return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
    4438             :         }
    4439             :       }
    4440             :     }
    4441             :   }
    4442             : 
    4443             :   // fold (or c1, c2) -> c1|c2
    4444      199554 :   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
    4445       99777 :   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
    4446       99901 :   if (N0C && N1C && !N1C->isOpaque())
    4447         372 :     return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
    4448             :   // canonicalize constant to RHS
    4449      100432 :   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
    4450         779 :      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
    4451        3100 :     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
    4452             :   // fold (or x, 0) -> x
    4453       98878 :   if (isNullConstant(N1))
    4454          61 :     return N0;
    4455             :   // fold (or x, -1) -> -1
    4456       98817 :   if (isAllOnesConstant(N1))
    4457          80 :     return N1;
    4458             : 
    4459       98737 :   if (SDValue NewSel = foldBinOpIntoSelect(N))
    4460          41 :     return NewSel;
    4461             : 
    4462             :   // fold (or x, c) -> c iff (x & ~c) == 0
    4463      463707 :   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
    4464           2 :     return N1;
    4465             : 
    4466       98694 :   if (SDValue Combined = visitORLike(N0, N1, N))
    4467         205 :     return Combined;
    4468             : 
    4469             :   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
    4470       98489 :   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
    4471           8 :     return BSwap;
    4472       98481 :   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
    4473           7 :     return BSwap;
    4474             : 
    4475             :   // reassociate or
    4476      196948 :   if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1))
    4477          46 :     return ROR;
    4478             : 
    4479             :   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
    4480             :   // iff (c1 & c2) != 0.
    4481      154939 :   if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse()) {
    4482        1983 :     if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
    4483        1322 :       if (C1->getAPIntValue().intersects(N1C->getAPIntValue())) {
    4484           9 :         if (SDValue COR =
    4485          27 :                 DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT, N1C, C1))
    4486           9 :           return DAG.getNode(
    4487          18 :               ISD::AND, SDLoc(N), VT,
    4488          54 :               DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR);
    4489           0 :         return SDValue();
    4490             :       }
    4491             :     }
    4492             :   }
    4493             : 
    4494             :   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
    4495      295257 :   if (N0.getOpcode() == N1.getOpcode())
    4496       11850 :     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
    4497         329 :       return Tmp;
    4498             : 
    4499             :   // See if this is some rotate idiom.
    4500      196180 :   if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
    4501         602 :     return SDValue(Rot, 0);
    4502             : 
    4503       97488 :   if (SDValue Load = MatchLoadCombine(N))
    4504         199 :     return Load;
    4505             : 
    4506             :   // Simplify the operands using demanded-bits information.
    4507       97289 :   if (SimplifyDemandedBits(SDValue(N, 0)))
    4508        2304 :     return SDValue(N, 0);
    4509             : 
    4510       94985 :   return SDValue();
    4511             : }
    4512             : 
    4513             : /// Match "(X shl/srl V1) & V2" where V2 may not be present.
    4514       89196 : bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
    4515      178392 :   if (Op.getOpcode() == ISD::AND) {
    4516       19584 :     if (DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
    4517       16876 :       Mask = Op.getOperand(1);
    4518       16876 :       Op = Op.getOperand(0);
    4519             :     } else {
    4520             :       return false;
    4521             :     }
    4522             :   }
    4523             : 
    4524      258401 :   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
    4525       13360 :     Shift = Op;
    4526             :     return true;
    4527             :   }
    4528             : 
    4529             :   return false;
    4530             : }
    4531             : 
    4532             : // Return true if we can prove that, whenever Neg and Pos are both in the
    4533             : // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
    4534             : // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
    4535             : //
    4536             : //     (or (shift1 X, Neg), (shift2 X, Pos))
    4537             : //
    4538             : // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
    4539             : // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
    4540             : // to consider shift amounts with defined behavior.
    4541         315 : static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) {
    4542             :   // If EltSize is a power of 2 then:
    4543             :   //
    4544             :   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
    4545             :   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
    4546             :   //
    4547             :   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
    4548             :   // for the stronger condition:
    4549             :   //
    4550             :   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
    4551             :   //
    4552             :   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
    4553             :   // we can just replace Neg with Neg' for the rest of the function.
    4554             :   //
    4555             :   // In other cases we check for the even stronger condition:
    4556             :   //
    4557             :   //     Neg == EltSize - Pos                                    [B]
    4558             :   //
    4559             :   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
    4560             :   // behavior if Pos == 0 (and consequently Neg == EltSize).
    4561             :   //
    4562             :   // We could actually use [A] whenever EltSize is a power of 2, but the
    4563             :   // only extra cases that it would match are those uninteresting ones
    4564             :   // where Neg and Pos are never in range at the same time.  E.g. for
    4565             :   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
    4566             :   // as well as (sub 32, Pos), but:
    4567             :   //
    4568             :   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
    4569             :   //
    4570             :   // always invokes undefined behavior for 32-bit X.
    4571             :   //
    4572             :   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
    4573         315 :   unsigned MaskLoBits = 0;
    4574         664 :   if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
    4575          68 :     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
    4576          68 :       if (NegC->getAPIntValue() == EltSize - 1) {
    4577          64 :         Neg = Neg.getOperand(0);
    4578          64 :         MaskLoBits = Log2_64(EltSize);
    4579             :       }
    4580             :     }
    4581             :   }
    4582             : 
    4583             :   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
    4584         630 :   if (Neg.getOpcode() != ISD::SUB)
    4585             :     return false;
    4586         410 :   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
    4587         205 :   if (!NegC)
    4588             :     return false;
    4589         410 :   SDValue NegOp1 = Neg.getOperand(1);
    4590             : 
    4591             :   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
    4592             :   // Pos'.  The truncation is redundant for the purpose of the equality.
    4593         227 :   if (MaskLoBits && Pos.getOpcode() == ISD::AND)
    4594          32 :     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
    4595          32 :       if (PosC->getAPIntValue() == EltSize - 1)
    4596          32 :         Pos = Pos.getOperand(0);
    4597             : 
    4598             :   // The condition we need is now:
    4599             :   //
    4600             :   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
    4601             :   //
    4602             :   // If NegOp1 == Pos then we need:
    4603             :   //
    4604             :   //              EltSize & Mask == NegC & Mask
    4605             :   //
    4606             :   // (because "x & Mask" is a truncation and distributes through subtraction).
    4607         205 :   APInt Width;
    4608         205 :   if (Pos == NegOp1)
    4609         396 :     Width = NegC->getAPIntValue();
    4610             : 
    4611             :   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
    4612             :   // Then the condition we want to prove becomes:
    4613             :   //
    4614             :   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
    4615             :   //
    4616             :   // which, again because "x & Mask" is a truncation, becomes:
    4617             :   //
    4618             :   //                NegC & Mask == (EltSize - PosC) & Mask
    4619             :   //             EltSize & Mask == (NegC + PosC) & Mask
    4620          23 :   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
    4621           6 :     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
    4622          24 :       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
    4623             :     else
    4624             :       return false;
    4625             :   } else
    4626             :     return false;
    4627             : 
    4628             :   // Now we just need to check that EltSize & Mask == Width & Mask.
    4629         201 :   if (MaskLoBits)
    4630             :     // EltSize & Mask is 0 since Mask is EltSize - 1.
    4631          44 :     return Width.getLoBits(MaskLoBits) == 0;
    4632         179 :   return Width == EltSize;
    4633             : }
    4634             : 
    4635             : // A subroutine of MatchRotate used once we have found an OR of two opposite
    4636             : // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
    4637             : // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
    4638             : // former being preferred if supported.  InnerPos and InnerNeg are Pos and
    4639             : // Neg with outer conversions stripped away.
    4640         315 : SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
    4641             :                                        SDValue Neg, SDValue InnerPos,
    4642             :                                        SDValue InnerNeg, unsigned PosOpcode,
    4643             :                                        unsigned NegOpcode, const SDLoc &DL) {
    4644             :   // fold (or (shl x, (*ext y)),
    4645             :   //          (srl x, (*ext (sub 32, y)))) ->
    4646             :   //   (rotl x, y) or (rotr x, (sub 32, y))
    4647             :   //
    4648             :   // fold (or (shl x, (*ext (sub 32, y))),
    4649             :   //          (srl x, (*ext y))) ->
    4650             :   //   (rotr x, y) or (rotl x, (sub 32, y))
    4651         630 :   EVT VT = Shifted.getValueType();
    4652         315 :   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())) {
    4653         197 :     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
    4654         591 :     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
    4655         591 :                        HasPos ? Pos : Neg).getNode();
    4656             :   }
    4657             : 
    4658             :   return nullptr;
    4659             : }
    4660             : 
    4661             : // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
    4662             : // idioms for rotate, and if the target supports rotation instructions, generate
    4663             : // a rot[lr].
    4664       98090 : SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
    4665             :   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
    4666      196180 :   EVT VT = LHS.getValueType();
    4667      194440 :   if (!TLI.isTypeLegal(VT)) return nullptr;
    4668             : 
    4669             :   // The target must have at least one rotate flavor.
    4670      192700 :   bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
    4671      192700 :   bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
    4672       96350 :   if (!HasROTL && !HasROTR) return nullptr;
    4673             : 
    4674             :   // Match "(X shl/srl V1) & V2" where V2 may not be present.
    4675       80381 :   SDValue LHSShift;   // The shift.
    4676       80381 :   SDValue LHSMask;    // AND value if any.
    4677       80381 :   if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
    4678             :     return nullptr; // Not part of a rotate.
    4679             : 
    4680        8815 :   SDValue RHSShift;   // The shift.
    4681        8815 :   SDValue RHSMask;    // AND value if any.
    4682        8815 :   if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
    4683             :     return nullptr; // Not part of a rotate.
    4684             : 
    4685       14477 :   if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
    4686             :     return nullptr;   // Not shifting the same value.
    4687             : 
    4688        2526 :   if (LHSShift.getOpcode() == RHSShift.getOpcode())
    4689             :     return nullptr;   // Shifts must disagree.
    4690             : 
    4691             :   // Canonicalize shl to left side in a shl/srl pair.
    4692        1308 :   if (RHSShift.getOpcode() == ISD::SHL) {
    4693         288 :     std::swap(LHS, RHS);
    4694         288 :     std::swap(LHSShift, RHSShift);
    4695             :     std::swap(LHSMask, RHSMask);
    4696             :   }
    4697             : 
    4698         654 :   unsigned EltSizeInBits = VT.getScalarSizeInBits();
    4699        1308 :   SDValue LHSShiftArg = LHSShift.getOperand(0);
    4700        1308 :   SDValue LHSShiftAmt = LHSShift.getOperand(1);
    4701        1308 :   SDValue RHSShiftArg = RHSShift.getOperand(0);
    4702        1308 :   SDValue RHSShiftAmt = RHSShift.getOperand(1);
    4703             : 
    4704             :   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
    4705             :   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
    4706             :   auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
    4707        1335 :                                         ConstantSDNode *RHS) {
    4708        9345 :     return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
    4709        3324 :   };
    4710        1308 :   if (matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
    4711         405 :     SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
    4712         405 :                               LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
    4713             : 
    4714             :     // If there is an AND of either shifted operand, apply it to the result.
    4715         405 :     if (LHSMask.getNode() || RHSMask.getNode()) {
    4716          47 :       SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
    4717          47 :       SDValue Mask = AllOnes;
    4718             : 
    4719          47 :       if (LHSMask.getNode()) {
    4720          94 :         SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
    4721          94 :         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
    4722         141 :                            DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
    4723             :       }
    4724          47 :       if (RHSMask.getNode()) {
    4725          68 :         SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
    4726          68 :         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
    4727         102 :                            DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
    4728             :       }
    4729             : 
    4730          94 :       Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
    4731             :     }
    4732             : 
    4733         405 :     return Rot.getNode();
    4734             :   }
    4735             : 
    4736             :   // If there is a mask here, and we have a variable shift, we can't be sure
    4737             :   // that we're masking out the right stuff.
    4738         249 :   if (LHSMask.getNode() || RHSMask.getNode())
    4739             :     return nullptr;
    4740             : 
    4741             :   // If the shift amount is sign/zext/any-extended just peel it off.
    4742         220 :   SDValue LExtOp0 = LHSShiftAmt;
    4743         220 :   SDValue RExtOp0 = RHSShiftAmt;
    4744         440 :   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
    4745         429 :        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
    4746         418 :        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
    4747         495 :        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
    4748         132 :       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
    4749         121 :        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
    4750         110 :        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
    4751          55 :        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
    4752         116 :     LExtOp0 = LHSShiftAmt.getOperand(0);
    4753         116 :     RExtOp0 = RHSShiftAmt.getOperand(0);
    4754             :   }
    4755             : 
    4756             :   SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
    4757         220 :                                    LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
    4758         220 :   if (TryL)
    4759             :     return TryL;
    4760             : 
    4761             :   SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
    4762          95 :                                    RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
    4763          95 :   if (TryR)
    4764             :     return TryR;
    4765             : 
    4766          23 :   return nullptr;
    4767             : }
    4768             : 
    4769             : namespace {
    4770             : /// Represents known origin of an individual byte in load combine pattern. The
    4771             : /// value of the byte is either constant zero or comes from memory.
    4772             : struct ByteProvider {
    4773             :   // For constant zero providers Load is set to nullptr. For memory providers
    4774             :   // Load represents the node which loads the byte from memory.
    4775             :   // ByteOffset is the offset of the byte in the value produced by the load.
    4776             :   LoadSDNode *Load;
    4777             :   unsigned ByteOffset;
    4778             : 
    4779             :   ByteProvider() : Load(nullptr), ByteOffset(0) {}
    4780             : 
    4781             :   static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
    4782       15575 :     return ByteProvider(Load, ByteOffset);
    4783             :   }
    4784       20818 :   static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
    4785             : 
    4786             :   bool isConstantZero() const { return !Load; }
    4787             :   bool isMemory() const { return Load; }
    4788             : 
    4789             :   bool operator==(const ByteProvider &Other) const {
    4790             :     return Other.Load == Load && Other.ByteOffset == ByteOffset;
    4791             :   }
    4792             : 
    4793             : private:
    4794             :   ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
    4795             :       : Load(Load), ByteOffset(ByteOffset) {}
    4796             : };
    4797             : 
    4798             : /// Recursively traverses the expression calculating the origin of the requested
    4799             : /// byte of the given value. Returns None if the provider can't be calculated.
    4800             : ///
    4801             : /// For all the values except the root of the expression verifies that the value
    4802             : /// has exactly one use and if it's not true return None. This way if the origin
    4803             : /// of the byte is returned it's guaranteed that the values which contribute to
    4804             : /// the byte are not used outside of this expression.
    4805             : ///
    4806             : /// Because the parts of the expression are not allowed to have more than one
    4807             : /// use this function iterates over trees, not DAGs. So it never visits the same
    4808             : /// node more than once.
    4809      212105 : const Optional<ByteProvider> calculateByteProvider(SDValue Op, unsigned Index,
    4810             :                                                    unsigned Depth,
    4811             :                                                    bool Root = false) {
    4812             :   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
    4813      212105 :   if (Depth == 10)
    4814         272 :     return None;
    4815             : 
    4816      339970 :   if (!Root && !Op.hasOneUse())
    4817       22315 :     return None;
    4818             : 
    4819             :   assert(Op.getValueType().isScalarInteger() && "can't handle other types");
    4820      189518 :   unsigned BitWidth = Op.getValueSizeInBits();
    4821      189518 :   if (BitWidth % 8 != 0)
    4822           0 :     return None;
    4823      189518 :   unsigned ByteWidth = BitWidth / 8;
    4824             :   assert(Index < ByteWidth && "invalid index requested");
    4825             :   (void) ByteWidth;
    4826             : 
    4827      379036 :   switch (Op.getOpcode()) {
    4828       92134 :   case ISD::OR: {
    4829      276402 :     auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
    4830       92134 :     if (!LHS)
    4831       70818 :       return None;
    4832       42632 :     auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
    4833       21316 :     if (!RHS)
    4834        2156 :       return None;
    4835             : 
    4836       19160 :     if (LHS->isConstantZero())
    4837             :       return RHS;
    4838       10630 :     if (RHS->isConstantZero())
    4839             :       return LHS;
    4840         147 :     return None;
    4841             :   }
    4842       23720 :   case ISD::SHL: {
    4843       70848 :     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
    4844             :     if (!ShiftOp)
    4845         312 :       return None;
    4846             : 
    4847       23408 :     uint64_t BitShift = ShiftOp->getZExtValue();
    4848       23408 :     if (BitShift % 8 != 0)
    4849        2972 :       return None;
    4850       20436 :     uint64_t ByteShift = BitShift / 8;
    4851             : 
    4852       20436 :     return Index < ByteShift
    4853       12559 :                ? ByteProvider::getConstantZero()
    4854       15754 :                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
    4855       36190 :                                        Depth + 1);
    4856             :   }
    4857        9520 :   case ISD::ANY_EXTEND:
    4858             :   case ISD::SIGN_EXTEND:
    4859             :   case ISD::ZERO_EXTEND: {
    4860       19040 :     SDValue NarrowOp = Op->getOperand(0);
    4861        9520 :     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
    4862        9520 :     if (NarrowBitWidth % 8 != 0)
    4863         159 :       return None;
    4864        9361 :     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
    4865             : 
    4866        9361 :     if (Index >= NarrowByteWidth)
    4867        2321 :       return Op.getOpcode() == ISD::ZERO_EXTEND
    4868        2321 :                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
    4869        2321 :                  : None;
    4870        7040 :     return calculateByteProvider(NarrowOp, Index, Depth + 1);
    4871             :   }
    4872          42 :   case ISD::BSWAP:
    4873         126 :     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
    4874         126 :                                  Depth + 1);
    4875       21877 :   case ISD::LOAD: {
    4876       43754 :     auto L = cast<LoadSDNode>(Op.getNode());
    4877       65588 :     if (L->isVolatile() || L->isIndexed())
    4878          54 :       return None;
    4879             : 
    4880       21823 :     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
    4881       21823 :     if (NarrowBitWidth % 8 != 0)
    4882         120 :       return None;
    4883       21703 :     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
    4884             : 
    4885       21703 :     if (Index >= NarrowByteWidth)
    4886        6128 :       return L->getExtensionType() == ISD::ZEXTLOAD
    4887        5938 :                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
    4888        6318 :                  : None;
    4889       31150 :     return ByteProvider::getMemory(L, Index);
    4890             :   }
    4891             :   }
    4892             : 
    4893       42225 :   return None;
    4894             : }
    4895             : } // namespace
    4896             : 
    4897             : /// Match a pattern where a wide type scalar value is loaded by several narrow
    4898             : /// loads and combined by shifts and ors. Fold it into a single load or a load
    4899             : /// and a BSWAP if the targets supports it.
    4900             : ///
    4901             : /// Assuming little endian target:
    4902             : ///  i8 *a = ...
    4903             : ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
    4904             : /// =>
    4905             : ///  i32 val = *((i32)a)
    4906             : ///
    4907             : ///  i8 *a = ...
    4908             : ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
    4909             : /// =>
    4910             : ///  i32 val = BSWAP(*((i32)a))
    4911             : ///
    4912             : /// TODO: This rule matches complex patterns with OR node roots and doesn't
    4913             : /// interact well with the worklist mechanism. When a part of the pattern is
    4914             : /// updated (e.g. one of the loads) its direct users are put into the worklist,
    4915             : /// but the root node of the pattern which triggers the load combine is not
    4916             : /// necessarily a direct user of the changed node. For example, once the address
    4917             : /// of t28 load is reassociated load combine won't be triggered:
    4918             : ///             t25: i32 = add t4, Constant:i32<2>
    4919             : ///           t26: i64 = sign_extend t25
    4920             : ///        t27: i64 = add t2, t26
    4921             : ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
    4922             : ///     t29: i32 = zero_extend t28
    4923             : ///   t32: i32 = shl t29, Constant:i8<8>
    4924             : /// t33: i32 = or t23, t32
    4925             : /// As a possible fix visitLoad can check if the load can be a part of a load
    4926             : /// combine pattern and add corresponding OR roots to the worklist.
    4927       97488 : SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
    4928             :   assert(N->getOpcode() == ISD::OR &&
    4929             :          "Can only match load combining against OR nodes");
    4930             : 
    4931             :   // Handles simple types only
    4932      194976 :   EVT VT = N->getValueType(0);
    4933      221849 :   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
    4934       12574 :     return SDValue();
    4935       84914 :   unsigned ByteWidth = VT.getSizeInBits() / 8;
    4936             : 
    4937       84914 :   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
    4938             :   // Before legalize we can introduce too wide illegal loads which will be later
    4939             :   // split into legal sized loads. This enables us to combine i64 load by i8
    4940             :   // patterns to a couple of i32 loads on 32 bit targets.
    4941      129841 :   if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
    4942       12602 :     return SDValue();
    4943             : 
    4944             :   std::function<unsigned(unsigned, unsigned)> LittleEndianByteAt = [](
    4945      144624 :     unsigned BW, unsigned i) { return i; };
    4946             :   std::function<unsigned(unsigned, unsigned)> BigEndianByteAt = [](
    4947      222814 :     unsigned BW, unsigned i) { return BW - i - 1; };
    4948             : 
    4949      144624 :   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
    4950       13808 :   auto MemoryByteOffset = [&] (ByteProvider P) {
    4951             :     assert(P.isMemory() && "Must be a memory byte provider");
    4952       13808 :     unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
    4953             :     assert(LoadBitWidth % 8 == 0 &&
    4954             :            "can only analyze providers for individual bytes not bit");
    4955       13808 :     unsigned LoadByteWidth = LoadBitWidth / 8;
    4956       13808 :     return IsBigEndianTarget
    4957       27616 :             ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
    4958       26992 :             : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
    4959       72312 :   };
    4960             : 
    4961      144624 :   Optional<BaseIndexOffset> Base;
    4962       72312 :   SDValue Chain;
    4963             : 
    4964      144624 :   SmallSet<LoadSDNode *, 8> Loads;
    4965      144624 :   Optional<ByteProvider> FirstByteProvider;
    4966       72312 :   int64_t FirstOffset = INT64_MAX;
    4967             : 
    4968             :   // Check if all the bytes of the OR we are looking at are loaded from the same
    4969             :   // base address. Collect bytes offsets from Base address in ByteOffsets.
    4970      216936 :   SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
    4971      169834 :   for (unsigned i = 0; i < ByteWidth; i++) {
    4972       96301 :     auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
    4973       98626 :     if (!P || !P->isMemory()) // All the bytes must be loaded from memory
    4974       69452 :       return SDValue();
    4975             : 
    4976       14244 :     LoadSDNode *L = P->Load;
    4977             :     assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() &&
    4978             :            "Must be enforced by calculateByteProvider");
    4979             :     assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
    4980             : 
    4981             :     // All loads must share the same chain
    4982       28488 :     SDValue LChain = L->getChain();
    4983       14244 :     if (!Chain)
    4984             :       Chain = LChain;
    4985        1514 :     else if (Chain != LChain)
    4986        1514 :       return SDValue();
    4987             : 
    4988             :     // Loads must share the same base address
    4989       25460 :     BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr(), DAG);
    4990       12730 :     int64_t ByteOffsetFromBase = 0;
    4991       12730 :     if (!Base)
    4992             :       Base = Ptr;
    4993        9043 :     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
    4994         125 :       return SDValue();
    4995             : 
    4996             :     // Calculate the offset of the current byte from the base address
    4997       12605 :     ByteOffsetFromBase += MemoryByteOffset(*P);
    4998       25210 :     ByteOffsets[i] = ByteOffsetFromBase;
    4999             : 
    5000             :     // Remember the first byte load
    5001       12605 :     if (ByteOffsetFromBase < FirstOffset) {
    5002             :       FirstByteProvider = P;
    5003             :       FirstOffset = ByteOffsetFromBase;
    5004             :     }
    5005             : 
    5006       12605 :     Loads.insert(L);
    5007             :   }
    5008             :   assert(Loads.size() > 0 && "All the bytes of the value must be loaded from "
    5009             :          "memory, so there must be at least one load which produces the value");
    5010             :   assert(Base && "Base address of the accessed memory location must be set");
    5011             :   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
    5012             : 
    5013             :   // Check if the bytes of the OR we are looking at match with either big or
    5014             :   // little endian value load
    5015             :   bool BigEndian = true, LittleEndian = true;
    5016       11693 :   for (unsigned i = 0; i < ByteWidth; i++) {
    5017       10508 :     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
    5018        5254 :     LittleEndian &= CurrentByteOffset == LittleEndianByteAt(ByteWidth, i);
    5019        5254 :     BigEndian &= CurrentByteOffset == BigEndianByteAt(ByteWidth, i);
    5020        5254 :     if (!BigEndian && !LittleEndian)
    5021          18 :       return SDValue();
    5022             :   }
    5023             :   assert((BigEndian != LittleEndian) && "should be either or");
    5024             :   assert(FirstByteProvider && "must be set");
    5025             : 
    5026             :   // Ensure that the first byte is loaded from zero offset of the first load.
    5027             :   // So the combined value can be loaded from the first load address.
    5028        1203 :   if (MemoryByteOffset(*FirstByteProvider) != 0)
    5029           6 :     return SDValue();
    5030        1197 :   LoadSDNode *FirstLoad = FirstByteProvider->Load;
    5031             : 
    5032             :   // The node we are looking at matches with the pattern, check if we can
    5033             :   // replace it with a single load and bswap if needed.
    5034             : 
    5035             :   // If the load needs byte swap check if the target supports it
    5036        1197 :   bool NeedsBswap = IsBigEndianTarget != BigEndian;
    5037             : 
    5038             :   // Before legalize we can introduce illegal bswaps which will be later
    5039             :   // converted to an explicit bswap sequence. This way we end up with a single
    5040             :   // load and byte shuffling instead of several loads and byte shuffling.
    5041        1197 :   if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
    5042           0 :     return SDValue();
    5043             : 
    5044             :   // Check that a load of the wide type is both allowed and fast on the target
    5045        1197 :   bool Fast = false;
    5046        4788 :   bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
    5047             :                                         VT, FirstLoad->getAddressSpace(),
    5048        1197 :                                         FirstLoad->getAlignment(), &Fast);
    5049        1512 :   if (!Allowed || !Fast)
    5050         998 :     return SDValue();
    5051             : 
    5052             :   SDValue NewLoad =
    5053         796 :       DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
    5054         995 :                   FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
    5055             : 
    5056             :   // Transfer chain users from old loads to the new load.
    5057         930 :   for (LoadSDNode *L : Loads)
    5058        2193 :     DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
    5059             : 
    5060         660 :   return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
    5061             : }
    5062             : 
    5063       77012 : SDValue DAGCombiner::visitXOR(SDNode *N) {
    5064      154024 :   SDValue N0 = N->getOperand(0);
    5065      154024 :   SDValue N1 = N->getOperand(1);
    5066      154024 :   EVT VT = N0.getValueType();
    5067             : 
    5068             :   // fold vector ops
    5069       77012 :   if (VT.isVector()) {
    5070       12024 :     if (SDValue FoldedVOp = SimplifyVBinOp(N))
    5071           1 :       return FoldedVOp;
    5072             : 
    5073             :     // fold (xor x, 0) -> x, vector edition
    5074       12023 :     if (ISD::isBuildVectorAllZeros(N0.getNode()))
    5075           2 :       return N1;
    5076       12021 :     if (ISD::isBuildVectorAllZeros(N1.getNode()))
    5077           7 :       return N0;
    5078             :   }
    5079             : 
    5080             :   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
    5081      154004 :   if (N0.isUndef() && N1.isUndef())
    5082           0 :     return DAG.getConstant(0, SDLoc(N), VT);
    5083             :   // fold (xor x, undef) -> undef
    5084      154004 :   if (N0.isUndef())
    5085           0 :     return N0;
    5086      154004 :   if (N1.isUndef())
    5087           0 :     return N1;
    5088             :   // fold (xor c1, c2) -> c1^c2
    5089      154004 :   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
    5090      154004 :   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
    5091       77002 :   if (N0C && N1C)
    5092        2604 :     return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C);
    5093             :   // canonicalize constant to RHS
    5094       76144 :   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
    5095          10 :      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
    5096          36 :     return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
    5097             :   // fold (xor x, 0) -> x
    5098       76125 :   if (isNullConstant(N1))
    5099           0 :     return N0;
    5100             : 
    5101       76125 :   if (SDValue NewSel = foldBinOpIntoSelect(N))
    5102           5 :     return NewSel;
    5103             : 
    5104             :   // reassociate xor
    5105      152240 :   if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1))
    5106          88 :     return RXOR;
    5107             : 
    5108             :   // fold !(x cc y) -> (x !cc y)
    5109       76032 :   SDValue LHS, RHS, CC;
    5110       76032 :   if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
    5111       74102 :     bool isInt = LHS.getValueType().isInteger();
    5112       74102 :     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
    5113       37051 :                                                isInt);
    5114             : 
    5115       37051 :     if (!LegalOperations ||
    5116          14 :         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
    5117       74090 :       switch (N0.getOpcode()) {
    5118           0 :       default:
    5119           0 :         llvm_unreachable("Unhandled SetCC Equivalent!");
    5120       37045 :       case ISD::SETCC:
    5121      111135 :         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
    5122           0 :       case ISD::SELECT_CC:
    5123           0 :         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
    5124           0 :                                N0.getOperand(3), NotCC);
    5125             :       }
    5126             :     }
    5127             :   }
    5128             : 
    5129             :   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
    5130       67859 :   if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND &&
    5131       39009 :       N0.getNode()->hasOneUse() &&
    5132          14 :       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
    5133           6 :     SDValue V = N0.getOperand(0);
    5134           6 :     SDLoc DL(N0);
    5135           6 :     V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V,
    5136           9 :                     DAG.getConstant(1, DL, V.getValueType()));
    5137           3 :     AddToWorklist(V.getNode());
    5138          12 :     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
    5139             :   }
    5140             : 
    5141             :   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
    5142       57334 :   if (isOneConstant(N1) && VT == MVT::i1 &&
    5143       11695 :       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
    5144         156 :     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
    5145          52 :     if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
    5146          72 :       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
    5147         144 :       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
    5148         144 :       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
    5149          36 :       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
    5150         144 :       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
    5151             :     }
    5152             :   }
    5153             :   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
    5154       46815 :   if (isAllOnesConstant(N1) &&
    5155       22751 :       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
    5156        1800 :     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
    5157        1096 :     if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
    5158         104 :       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
    5159         208 :       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
    5160         208 :       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
    5161          52 :       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
    5162         208 :       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
    5163             :     }
    5164             :   }
    5165             :   // fold (xor (and x, y), y) -> (and (not x), y)
    5166       78411 :   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
    5167        1288 :       N0->getOperand(1) == N1) {
    5168         100 :     SDValue X = N0->getOperand(0);
    5169         150 :     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
    5170          50 :     AddToWorklist(NotX.getNode());
    5171         200 :     return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1);
    5172             :   }
    5173             :   // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
    5174       79622 :   if (N1C && N0.getOpcode() == ISD::XOR) {
    5175         192 :     if (const ConstantSDNode *N00C = getAsNonOpaqueConstant(N0.getOperand(0))) {
    5176           0 :       SDLoc DL(N);
    5177           0 :       return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
    5178           0 :                          DAG.getConstant(N1C->getAPIntValue() ^
    5179           0 :                                          N00C->getAPIntValue(), DL, VT));
    5180             :     }
    5181         192 :     if (const ConstantSDNode *N01C = getAsNonOpaqueConstant(N0.getOperand(1))) {
    5182           0 :       SDLoc DL(N);
    5183           0 :       return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
    5184           0 :                          DAG.getConstant(N1C->getAPIntValue() ^
    5185           0 :                                          N01C->getAPIntValue(), DL, VT));
    5186             :     }
    5187             :   }
    5188             : 
    5189             :   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
    5190       38846 :   unsigned OpSizeInBits = VT.getScalarSizeInBits();
    5191       42720 :   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 &&
    5192       42014 :       N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0) &&
    5193         544 :       TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
    5194         142 :     if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
    5195         138 :       if (C->getAPIntValue() == (OpSizeInBits - 1))
    5196         345 :         return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0.getOperand(0));
    5197             :   }
    5198             : 
    5199             :   // fold (xor x, x) -> 0
    5200       38777 :   if (N0 == N1)
    5201          39 :     return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
    5202             : 
    5203             :   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
    5204             :   // Here is a concrete example of this equivalence:
    5205             :   // i16   x ==  14
    5206             :   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
    5207             :   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
    5208             :   //
    5209             :   // =>
    5210             :   //
    5211             :   // i16     ~1      == 0b1111111111111110
    5212             :   // i16 rol(~1, 14) == 0b1011111111111111
    5213             :   //
    5214             :   // Some additional tips to help conceptualize this transform:
    5215             :   // - Try to see the operation as placing a single zero in a value of all ones.
    5216             :   // - There exists no value for x which would allow the result to contain zero.
    5217             :   // - Values of x larger than the bitwidth are undefined and do not require a
    5218             :   //   consistent result.
    5219             :   // - Pushing the zero left requires shifting one bits in from the right.
    5220             :   // A rotate left of ~1 is a nice way of achieving the desired result.
    5221       78058 :   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL
    5222         185 :       && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
    5223         118 :     SDLoc DL(N);
    5224          59 :     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
    5225         118 :                        N0.getOperand(1));
    5226             :   }
    5227             : 
    5228             :   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
    5229      116115 :   if (N0.getOpcode() == N1.getOpcode())
    5230        7244 :     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
    5231         259 :       return Tmp;
    5232             : 
    5233             :   // Simplify the expression using non-local knowledge.
    5234       38446 :   if (SimplifyDemandedBits(SDValue(N, 0)))
    5235         110 :     return SDValue(N, 0);
    5236             : 
    5237       38336 :   return SDValue();
    5238             : }
    5239             : 
    5240             : /// Handle transforms common to the three shifts, when the shift amount is a
    5241             : /// constant.
    5242      245786 : SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
    5243      491572 :   SDNode *LHS = N->getOperand(0).getNode();
    5244      245786 :   if (!LHS->hasOneUse()) return SDValue();
    5245             : 
    5246             :   // We want to pull some binops through shifts, so that we have (and (shift))
    5247             :   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
    5248             :   // thing happens with address calculations, so it's important to canonicalize
    5249             :   // it.
    5250      137499 :   bool HighBitSet = false;  // Can we transform this if the high bit is set?
    5251             : 
    5252      137499 :   switch (LHS->getOpcode()) {
    5253      124358 :   default: return SDValue();
    5254             :   case ISD::OR:
    5255             :   case ISD::XOR:
    5256             :     HighBitSet = false; // We can only transform sra if the high bit is clear.
    5257             :     break;
    5258        5554 :   case ISD::AND:
    5259        5554 :     HighBitSet = true;  // We can only transform sra if the high bit is set.
    5260             :     break;
    5261        6572 :   case ISD::ADD:
    5262        6572 :     if (N->getOpcode() != ISD::SHL)
    5263        5515 :       return SDValue(); // only shl(add) not sr[al](add).
    5264             :     HighBitSet = false; // We can only transform sra if the high bit is clear.
    5265             :     break;
    5266             :   }
    5267             : 
    5268             :   // We require the RHS of the binop to be a constant and not opaque as well.
    5269       20718 :   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
    5270        2160 :   if (!BinOpCst) return SDValue();
    5271             : 
    5272             :   // FIXME: disable this unless the input to the binop is a shift by a constant
    5273             :   // or is copy/select.Enable this in other cases when figure out it's exactly profitable.
    5274       10932 :   SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
    5275        5371 :   bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL ||
    5276       10836 :                  BinOpLHSVal->getOpcode() == ISD::SRA ||
    5277        5466 :                  BinOpLHSVal->getOpcode() == ISD::SRL;
    5278        5466 :   bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg ||
    5279        5466 :                         BinOpLHSVal->getOpcode() == ISD::SELECT;
    5280             : 
    5281       10932 :   if ((!isShift || !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) &&
    5282             :       !isCopyOrSelect)
    5283        4270 :     return SDValue();
    5284             : 
    5285        1684 :   if (isCopyOrSelect && N->hasOneUse())
    5286         488 :     return SDValue();
    5287             : 
    5288        1416 :   EVT VT = N->getValueType(0);
    5289             : 
    5290             :   // If this is a signed shift right, and the high bit is modified by the
    5291             :   // logical operation, do not perform the transformation. The highBitSet
    5292             :   // boolean indicates the value of the high bit of the constant which would
    5293             :   // cause it to be modified for this operation.
    5294         708 :   if (N->getOpcode() == ISD::SRA) {
    5295           4 :     bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
    5296           4 :     if (BinOpRHSSignSet != HighBitSet)
    5297           4 :       return SDValue();
    5298             :   }
    5299             : 
    5300         704 :   if (!TLI.isDesirableToCommuteWithShift(LHS))
    5301           2 :     return SDValue();
    5302             : 
    5303             :   // Fold the constants, shifting the binop RHS by the shift amount.
    5304        3510 :   SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
    5305             :                                N->getValueType(0),
    5306        4914 :                                LHS->getOperand(1), N->getOperand(1));
    5307             :   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
    5308             : 
    5309             :   // Create the new shift.
    5310         702 :   SDValue NewShift = DAG.getNode(N->getOpcode(),
    5311        2808 :                                  SDLoc(LHS->getOperand(0)),
    5312        4212 :                                  VT, LHS->getOperand(0), N->getOperand(1));
    5313             : 
    5314             :   // Create the new binop.
    5315        3510 :   return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
    5316             : }
    5317             : 
    5318         393 : SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
    5319             :   assert(N->getOpcode() == ISD::TRUNCATE);
    5320             :   assert(N->getOperand(0).getOpcode() == ISD::AND);
    5321             : 
    5322             :   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
    5323        1134 :   if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
    5324         435 :     SDValue N01 = N->getOperand(0).getOperand(1);
    5325         145 :     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
    5326         284 :       SDLoc DL(N);
    5327         284 :       EVT TruncVT = N->getValueType(0);
    5328         426 :       SDValue N00 = N->getOperand(0).getOperand(0);
    5329         284 :       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
    5330         284 :       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
    5331         142 :       AddToWorklist(Trunc00.getNode());
    5332         142 :       AddToWorklist(Trunc01.getNode());
    5333         284 :       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
    5334             :     }
    5335             :   }
    5336             : 
    5337         251 :   return SDValue();
    5338             : }
    5339             : 
    5340        1600 : SDValue DAGCombiner::visitRotate(SDNode *N) {
    5341        3200 :   SDLoc dl(N);
    5342        3200 :   SDValue N0 = N->getOperand(0);
    5343        3200 :   SDValue N1 = N->getOperand(1);
    5344        3200 :   EVT VT = N->getValueType(0);
    5345        1600 :   unsigned Bitsize = VT.getScalarSizeInBits();
    5346             : 
    5347             :   // fold (rot x, 0) -> x
    5348        1600 :   if (isNullConstantOrNullSplatConstant(N1))
    5349           2 :     return N0;
    5350             : 
    5351             :   // fold (rot x, c) -> (rot x, c % BitSize)
    5352        1598 :   if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) {
    5353        1959 :     if (Cst->getAPIntValue().uge(Bitsize)) {
    5354           4 :       uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize);
    5355           2 :       return DAG.getNode(N->getOpcode(), dl, VT, N0,
    5356           6 :                          DAG.getConstant(RotAmt, dl, N1.getValueType()));
    5357             :     }
    5358             :   }
    5359             : 
    5360             :   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
    5361        3387 :   if (N1.getOpcode() == ISD::TRUNCATE &&
    5362         390 :       N1.getOperand(0).getOpcode() == ISD::AND) {
    5363          30 :     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
    5364          90 :       return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
    5365             :   }
    5366             : 
    5367        3132 :   unsigned NextOp = N0.getOpcode();
    5368             :   // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
    5369        1566 :   if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
    5370          11 :     SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
    5371          22 :     SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
    5372          33 :     if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
    5373          11 :       EVT ShiftVT = C1->getValueType(0);
    5374          22 :       bool SameSide = (N->getOpcode() == NextOp);
    5375          11 :       unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
    5376          11 :       if (SDValue CombinedShift =
    5377          11 :               DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) {
    5378          11 :         SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
    5379          11 :         SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
    5380             :             ISD::SREM, dl, ShiftVT, CombinedShift.getNode(),
    5381          11 :             BitsizeC.getNode());
    5382          33 :         return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
    5383          33 :                            CombinedShiftNorm);
    5384             :       }
    5385             :     }
    5386             :   }
    5387        1555 :   return SDValue();
    5388             : }
    5389             : 
    5390      172037 : SDValue DAGCombiner::visitSHL(SDNode *N) {
    5391      344074 :   SDValue N0 = N->getOperand(0);
    5392      344074 :   SDValue N1 = N->getOperand(1);
    5393      344074 :   EVT VT = N0.getValueType();
    5394      172037 :   unsigned OpSizeInBits = VT.getScalarSizeInBits();
    5395             : 
    5396             :   // fold vector ops
    5397      172037 :   if (VT.isVector()) {
    5398        2682 :     if (SDValue FoldedVOp = SimplifyVBinOp(N))
    5399           4 :       return FoldedVOp;
    5400             : 
    5401        1455 :     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
    5402             :     // If setcc produces all-one true value then:
    5403             :     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
    5404        1455 :     if (N1CV && N1CV->isConstant()) {
    5405        2836 :       if (N0.getOpcode() == ISD::AND) {
    5406          50 :         SDValue N00 = N0->getOperand(0);
    5407          50 :         SDValue N01 = N0->getOperand(1);
    5408          25 :         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
    5409             : 
    5410          54 :         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
    5411          12 :             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
    5412             :                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
    5413          16 :           if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
    5414           8 :                                                      N01CV, N1CV))
    5415          16 :             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
    5416             :         }
    5417             :       }
    5418             :     }
    5419             :   }
    5420             : 
    5421      172029 :   ConstantSDNode *N1C = isConstOrConstSplat(N1);
    5422             : 
    5423             :   // fold (shl c1, c2) -> c1<<c2
    5424      344058 :   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
    5425      175703 :   if (N0C && N1C && !N1C->isOpaque())
    5426       11022 :     return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
    5427             :   // fold (shl 0, x) -> 0
    5428      168355 :   if (isNullConstantOrNullSplatConstant(N0))
    5429          74 :     return N0;
    5430             :   // fold (shl x, c >= size(x)) -> undef
    5431             :   // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
    5432      160388 :   auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
    5433      320776 :     return Val->getAPIntValue().uge(OpSizeInBits);
    5434      328669 :   };
    5435      336562 :   if (matchUnaryPredicate(N1, MatchShiftTooBig))
    5436          43 :     return DAG.getUNDEF(VT);
    5437             :   // fold (shl x, 0) -> x
    5438      328150 :   if (N1C && N1C->isNullValue())
    5439         414 :     return N0;
    5440             :   // fold (shl undef, x) -> 0
    5441      335648 :   if (N0.isUndef())
    5442          48 :     return DAG.getConstant(0, SDLoc(N), VT);
    5443             : 
    5444      167808 :   if (SDValue NewSel = foldBinOpIntoSelect(N))
    5445       23669 :     return NewSel;
    5446             : 
    5447             :   // if (shl x, c) is known to be zero, return 0
    5448      432417 :   if (DAG.MaskedValueIsZero(SDValue(N, 0),
    5449      288278 :                             APInt::getAllOnesValue(OpSizeInBits)))
    5450        6258 :     return DAG.getConstant(0, SDLoc(N), VT);
    5451             :   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
    5452      286013 :   if (N1.getOpcode() == ISD::TRUNCATE &&
    5453        3814 :       N1.getOperand(0).getOpcode() == ISD::AND) {
    5454         315 :     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
    5455         316 :       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
    5456             :   }
    5457             : 
    5458      280741 :   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
    5459        5040 :     return SDValue(N, 0);
    5460             : 
    5461             :   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
    5462      273868 :   if (N0.getOpcode() == ISD::SHL) {
    5463             :     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
    5464         132 :                                           ConstantSDNode *RHS) {
    5465         264 :       APInt c1 = LHS->getAPIntValue();
    5466         264 :       APInt c2 = RHS->getAPIntValue();
    5467          66 :       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
    5468         462 :       return (c1 + c2).uge(OpSizeInBits);
    5469         292 :     };
    5470         876 :     if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
    5471          12 :       return DAG.getConstant(0, SDLoc(N), VT);
    5472             : 
    5473             :     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
    5474         138 :                                        ConstantSDNode *RHS) {
    5475         276 :       APInt c1 = LHS->getAPIntValue();
    5476         276 :       APInt c2 = RHS->getAPIntValue();
    5477          69 :       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
    5478         414 :       return (c1 + c2).ult(OpSizeInBits);
    5479         288 :     };
    5480         864 :     if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
    5481         108 :       SDLoc DL(N);
    5482         108 :       EVT ShiftVT = N1.getValueType();
    5483         162 :       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
    5484         162 :       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
    5485             :     }
    5486             :   }
    5487             : 
    5488             :   // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
    5489             :   // For this to be valid, the second form must not preserve any of the bits
    5490             :   // that are shifted out by the inner shift in the first form.  This means
    5491             :   // the outer shift size must be >= the number of bits added by the ext.
    5492             :   // As a corollary, we don't care what kind of ext it is.
    5493      308319 :   if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
    5494       98247 :               N0.getOpcode() == ISD::ANY_EXTEND ||
    5495      267326 :               N0.getOpcode() == ISD::SIGN_EXTEND) &&
    5496      166512 :       N0.getOperand(0).getOpcode() == ISD::SHL) {
    5497          80 :     SDValue N0Op0 = N0.getOperand(0);
    5498          80 :     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
    5499          93 :       APInt c1 = N0Op0C1->getAPIntValue();
    5500          93 :       APInt c2 = N1C->getAPIntValue();
    5501          26 :       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
    5502             : 
    5503          52 :       EVT InnerShiftVT = N0Op0.getValueType();
    5504          26 :       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
    5505          52 :       if (c2.uge(OpSizeInBits - InnerShiftSize)) {
    5506          22 :         SDLoc DL(N0);
    5507          44 :         APInt Sum = c1 + c2;
    5508          22 :         if (Sum.uge(OpSizeInBits))
    5509           4 :           return DAG.getConstant(0, DL, VT);
    5510             : 
    5511           7 :         return DAG.getNode(
    5512             :             ISD::SHL, DL, VT,
    5513          21 :             DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)),
    5514          42 :             DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
    5515             :       }
    5516             :     }
    5517             :   }
    5518             : 
    5519             :   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
    5520             :   // Only fold this if the inner zext has no other uses to avoid increasing
    5521             :   // the total number of instructions.
    5522      419931 :   if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
    5523      153734 :       N0.getOperand(0).getOpcode() == ISD::SRL) {
    5524         132 :     SDValue N0Op0 = N0.getOperand(0);
    5525         132 :     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
    5526         198 :       if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) {
    5527         128 :         uint64_t c1 = N0Op0C1->getZExtValue();
    5528         128 :         uint64_t c2 = N1C->getZExtValue();
    5529          64 :         if (c1 == c2) {
    5530          24 :           SDValue NewOp0 = N0.getOperand(0);
    5531          36 :           EVT CountVT = NewOp0.getOperand(1).getValueType();
    5532          24 :           SDLoc DL(N);
    5533          12 :           SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
    5534             :                                        NewOp0,
    5535          24 :                                        DAG.getConstant(c2, DL, CountVT));
    5536          12 :           AddToWorklist(NewSHL.getNode());
    5537          48 :           return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
    5538             :         }
    5539             :       }
    5540             :     }
    5541             :   }
    5542             : 
    5543             :   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
    5544             :   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
    5545      394887 :   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
    5546        3714 :       N0->getFlags().hasExact()) {
    5547         140 :     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
    5548         140 :       uint64_t C1 = N0C1->getZExtValue();
    5549         140 :       uint64_t C2 = N1C->getZExtValue();
    5550         140 :       SDLoc DL(N);
    5551          70 :       if (C1 <= C2)
    5552         135 :         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
    5553         135 :                            DAG.getConstant(C2 - C1, DL, N1.getValueType()));
    5554          75 :       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
    5555         100 :                          DAG.getConstant(C1 - C2, DL, N1.getValueType()));
    5556             :     }
    5557             :   }
    5558             : 
    5559             :   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
    5560             :   //                               (and (srl x, (sub c1, c2), MASK)
    5561             :   // Only fold this if the inner shift has no other uses -- if it does, folding
    5562             :   // this will increase the total number of instructions.
    5563      266314 :   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
    5564        1250 :     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
    5565        1220 :       uint64_t c1 = N0C1->getZExtValue();
    5566         610 :       if (c1 < OpSizeInBits) {
    5567        1220 :         uint64_t c2 = N1C->getZExtValue();
    5568        1220 :         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
    5569         610 :         SDValue Shift;
    5570         610 :         if (c2 > c1) {
    5571         104 :           Mask <<= c2 - c1;
    5572         208 :           SDLoc DL(N);
    5573         312 :           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
    5574         312 :                               DAG.getConstant(c2 - c1, DL, N1.getValueType()));
    5575             :         } else {
    5576        1012 :           Mask.lshrInPlace(c1 - c2);
    5577        1012 :           SDLoc DL(N);
    5578        1518 :           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
    5579        1518 :                               DAG.getConstant(c1 - c2, DL, N1.getValueType()));
    5580             :         }
    5581        1220 :         SDLoc DL(N0);
    5582         610 :         return DAG.getNode(ISD::AND, DL, VT, Shift,
    5583         610 :                            DAG.getConstant(Mask, DL, VT));
    5584             :       }
    5585             :     }
    5586             :   }
    5587             : 
    5588             :   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
    5589      273190 :   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
    5590          20 :       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
    5591          40 :     SDLoc DL(N);
    5592          20 :     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
    5593          40 :     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
    5594          60 :     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
    5595             :   }
    5596             : 
    5597             :   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
    5598             :   // Variant of version done on multiply, except mul by a power of 2 is turned
    5599             :   // into a shift.
    5600      142076 :   if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
    5601      139874 :       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
    5602        3710 :       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
    5603        3990 :     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
    5604        3990 :     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
    5605         798 :     AddToWorklist(Shl0.getNode());
    5606         798 :     AddToWorklist(Shl1.getNode());
    5607        3192 :     return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1);
    5608             :   }
    5609             : 
    5610             :   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
    5611      135637 :   if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
    5612      135519 :       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
    5613         160 :       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
    5614         115 :     SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
    5615          23 :     if (isConstantOrConstantVector(Shl))
    5616         115 :       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
    5617             :   }
    5618             : 
    5619      262427 :   if (N1C && !N1C->isOpaque())
    5620      127095 :     if (SDValue NewSHL = visitShiftByConstant(N, N1C))
    5621         167 :       return NewSHL;
    5622             : 
    5623      135165 :   return SDValue();
    5624             : }
    5625             : 
    5626       23648 : SDValue DAGCombiner::visitSRA(SDNode *N) {
    5627       47296 :   SDValue N0 = N->getOperand(0);
    5628       47296 :   SDValue N1 = N->getOperand(1);
    5629       47296 :   EVT VT = N0.getValueType();
    5630       23648 :   unsigned OpSizeInBits = VT.getScalarSizeInBits();
    5631             : 
    5632             :   // Arithmetic shifting an all-sign-bit value is a no-op.
    5633             :   // fold (sra 0, x) -> 0
    5634             :   // fold (sra -1, x) -> -1
    5635       23648 :   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
    5636         433 :     return N0;
    5637             : 
    5638             :   // fold vector ops
    5639       23215 :   if (VT.isVector())
    5640        2096 :     if (SDValue FoldedVOp = SimplifyVBinOp(N))
    5641           0 :       return FoldedVOp;
    5642             : 
    5643       23215 :   ConstantSDNode *N1C = isConstOrConstSplat(N1);
    5644             : 
    5645             :   // fold (sra c1, c2) -> (sra c1, c2)
    5646       46430 :   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
    5647       23215 :   if (N0C && N1C && !N1C->isOpaque())
    5648           0 :     return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
    5649             :   // fold (sra x, c >= size(x)) -> undef
    5650             :   // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
    5651       21244 :   auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
    5652       42488 :     return Val->getAPIntValue().uge(OpSizeInBits);
    5653       44459 :   };
    5654       46430 :   if (matchUnaryPredicate(N1, MatchShiftTooBig))
    5655          21 :     return DAG.getUNDEF(VT);
    5656             :   // fold (sra x, 0) -> x
    5657       44250 :   if (N1C && N1C->isNullValue())
    5658           8 :     return N0;
    5659             : 
    5660       23186 :   if (SDValue NewSel = foldBinOpIntoSelect(N))
    5661           2 :     return NewSel;
    5662             : 
    5663             :   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
    5664             :   // sext_inreg.
    5665       51296 :   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
    5666        4488 :     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
    5667        2244 :     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
    5668        2244 :     if (VT.isVector())
    5669         126 :       ExtVT = EVT::getVectorVT(*DAG.getContext(),
    5670         126 :                                ExtVT, VT.getVectorNumElements());
    5671        2244 :     if ((!LegalOperations ||
    5672        1755 :          TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
    5673        1479 :       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
    5674        1972 :                          N0.getOperand(0), DAG.getValueType(ExtVT));
    5675             :   }
    5676             : 
    5677             :   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
    5678       45382 :   if (N0.getOpcode() == ISD::SRA) {
    5679         313 :     SDLoc DL(N);
    5680         560 :     EVT ShiftVT = N1.getValueType();
    5681             : 
    5682             :     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
    5683         518 :                                           ConstantSDNode *RHS) {
    5684        1036 :       APInt c1 = LHS->getAPIntValue();
    5685        1036 :       APInt c2 = RHS->getAPIntValue();
    5686         259 :       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
    5687        1813 :       return (c1 + c2).uge(OpSizeInBits);
    5688         280 :     };
    5689         840 :     if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
    5690         717 :       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
    5691         478 :                          DAG.getConstant(OpSizeInBits - 1, DL, ShiftVT));
    5692             : 
    5693             :     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
    5694          50 :                                        ConstantSDNode *RHS) {
    5695         100 :       APInt c1 = LHS->getAPIntValue();
    5696         100 :       APInt c2 = RHS->getAPIntValue();
    5697          25 :       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
    5698         150 :       return (c1 + c2).ult(OpSizeInBits);
    5699          41 :     };
    5700         123 :     if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
    5701          24 :       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
    5702          24 :       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), Sum);
    5703             :     }
    5704             :   }
    5705             : 
    5706             :   // fold (sra (shl X, m), (sub result_size, n))
    5707             :   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
    5708             :   // result_size - n != m.
    5709             :   // If truncate is free for the target sext(shl) is likely to result in better
    5710             :   // code.
    5711       44888 :   if (N0.getOpcode() == ISD::SHL && N1C) {
    5712             :     // Get the two constanst of the shifts, CN0 = m, CN = n.
    5713        8658 :     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
    5714        4329 :     if (N01C) {
    5715        4329 :       LLVMContext &Ctx = *DAG.getContext();
    5716             :       // Determine what the truncate's result bitsize and type would be.
    5717        8658 :       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
    5718             : 
    5719        4329 :       if (VT.isVector())
    5720          55 :         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
    5721             : 
    5722             :       // Determine the residual right-shift amount.
    5723       12987 :       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
    5724             : 
    5725             :       // If the shift is not a no-op (in which case this should be just a sign
    5726             :       // extend already), the truncated to type is legal, sign_extend is legal
    5727             :       // on that type, and the truncate to that type is both legal and free,
    5728             :       // perform the transform.
    5729             :       if ((ShiftAmt > 0) &&
    5730        2559 :           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
    5731        4495 :           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
    5732          83 :           TLI.isTruncateFree(VT, TruncVT)) {
    5733             : 
    5734          26 :         SDLoc DL(N);
    5735          13 :         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
    5736          39 :             getShiftAmountTy(N0.getOperand(0).getValueType()));
    5737          13 :         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
    5738          26 :                                     N0.getOperand(0), Amt);
    5739          13 :         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
    5740          13 :                                     Shift);
    5741          13 :         return DAG.getNode(ISD::SIGN_EXTEND, DL,
    5742          26 :                            N->getValueType(0), Trunc);
    5743             :       }
    5744             :     }
    5745             :   }
    5746             : 
    5747             :   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
    5748       45162 :   if (N1.getOpcode() == ISD::TRUNCATE &&
    5749         600 :       N1.getOperand(0).getOpcode() == ISD::AND) {
    5750          15 :     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
    5751          52 :       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
    5752             :   }
    5753             : 
    5754             :   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
    5755             :   //      if c1 is equal to the number of bits the trunc removes
    5756       27556 :   if (N0.getOpcode() == ISD::TRUNCATE &&
    5757       12849 :       (N0.getOperand(0).getOpcode() == ISD::SRL ||
    5758        7725 :        N0.getOperand(0).getOpcode() == ISD::SRA) &&
    5759        7316 :       N0.getOperand(0).hasOneUse() &&
    5760       29000 :       N0.getOperand(0).getOperand(1).hasOneUse() &&
    5761             :       N1C) {
    5762         206 :     SDValue N0Op0 = N0.getOperand(0);
    5763         206 :     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
    5764         206 :       unsigned LargeShiftVal = LargeShift->getZExtValue();
    5765         206 :       EVT LargeVT = N0Op0.getValueType();
    5766             : 
    5767         103 :       if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
    5768         198 :         SDLoc DL(N);
    5769             :         SDValue Amt =
    5770         198 :           DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
    5771         396 :                           getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
    5772          99 :         SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
    5773         198 :                                   N0Op0.getOperand(0), Amt);
    5774         198 :         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
    5775             :       }
    5776             :     }
    5777             :   }
    5778             : 
    5779             :   // Simplify, based on bits shifted out of the LHS.
    5780       43516 :   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
    5781         999 :     return SDValue(N, 0);
    5782             : 
    5783             : 
    5784             :   // If the sign bit is known to be zero, switch this to a SRL.
    5785       21320 :   if (DAG.SignBitIsZero(N0))
    5786         484 :     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
    5787             : 
    5788       40392 :   if (N1C && !N1C->isOpaque())
    5789       19193 :     if (SDValue NewSRA = visitShiftByConstant(N, N1C))
    5790           0 :       return NewSRA;
    5791             : 
    5792       21199 :   return SDValue();
    5793             : }
    5794             : 
    5795      128022 : SDValue DAGCombiner::visitSRL(SDNode *N) {
    5796      256044 :   SDValue N0 = N->getOperand(0);
    5797      256044 :   SDValue N1 = N->getOperand(1);
    5798      256044 :   EVT VT = N0.getValueType();
    5799      128022 :   unsigned OpSizeInBits = VT.getScalarSizeInBits();
    5800             : 
    5801             :   // fold vector ops
    5802      128022 :   if (VT.isVector())
    5803        3023 :     if (SDValue FoldedVOp = SimplifyVBinOp(N))
    5804           2 :       return FoldedVOp;
    5805             : 
    5806      128020 :   ConstantSDNode *N1C = isConstOrConstSplat(N1);
    5807             : 
    5808             :   // fold (srl c1, c2) -> c1 >>u c2
    5809      256040 :   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
    5810      130535 :   if (N0C && N1C && !N1C->isOpaque())
    5811        7545 :     return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
    5812             :   // fold (srl 0, x) -> 0
    5813      125505 :   if (isNullConstantOrNullSplatConstant(N0))
    5814          54 :     return N0;
    5815             :   // fold (srl x, c >= size(x)) -> undef
    5816             :   // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
    5817      118211 :   auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
    5818      236422 :     return Val->getAPIntValue().uge(OpSizeInBits);
    5819      243662 :   };
    5820      250902 :   if (matchUnaryPredicate(N1, MatchShiftTooBig))
    5821          20 :     return DAG.getUNDEF(VT);
    5822             :   // fold (srl x, 0) -> x
    5823      243296 :   if (N1C && N1C->isNullValue())
    5824        1712 :     return N0;
    5825             : 
    5826      123719 :   if (SDValue NewSel = foldBinOpIntoSelect(N))
    5827           3 :     return NewSel;
    5828             : 
    5829             :   // if (srl x, c) is known to be zero, return 0
    5830      479732 :   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
    5831      363236 :                                    APInt::getAllOnesValue(OpSizeInBits)))
    5832        1038 :     return DAG.getConstant(0, SDLoc(N), VT);
    5833             : 
    5834             :   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
    5835      246740 :   if (N0.getOpcode() == ISD::SRL) {
    5836             :     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
    5837        8572 :                                           ConstantSDNode *RHS) {
    5838       17144 :       APInt c1 = LHS->getAPIntValue();
    5839       17144 :       APInt c2 = RHS->getAPIntValue();
    5840        4286 :       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
    5841       30002 :       return (c1 + c2).uge(OpSizeInBits);
    5842        4414 :     };
    5843       13242 :     if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
    5844          12 :       return DAG.getConstant(0, SDLoc(N), VT);
    5845             : 
    5846             :     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
    5847        8578 :                                        ConstantSDNode *RHS) {
    5848       17156 :       APInt c1 = LHS->getAPIntValue();
    5849       17156 :       APInt c2 = RHS->getAPIntValue();
    5850        4289 :       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
    5851       25734 :       return (c1 + c2).ult(OpSizeInBits);
    5852        4410 :     };
    5853       13230 :     if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
    5854        8548 :       SDLoc DL(N);
    5855        8548 :       EVT ShiftVT = N1.getValueType();
    5856       12822 :       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
    5857       12822 :       return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
    5858             :     }
    5859             :   }
    5860             : 
    5861             :   // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
    5862      245004 :   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
    5863       28764 :       N0.getOperand(0).getOpcode() == ISD::SRL) {
    5864       11616 :     if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
    5865        7744 :       uint64_t c1 = N001C->getZExtValue();
    5866        7744 :       uint64_t c2 = N1C->getZExtValue();
    5867       11616 :       EVT InnerShiftVT = N0.getOperand(0).getValueType();
    5868       15488 :       EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType();
    5869        3872 :       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
    5870             :       // This is only valid if the OpSizeInBits + c1 = size of inner shift.
    5871        3872 :       if (c1 + OpSizeInBits == InnerShiftSize) {
    5872        6612 :         SDLoc DL(N0);
    5873        3306 :         if (c1 + c2 >= InnerShiftSize)
    5874           0 :           return DAG.getConstant(0, DL, VT);
    5875        3306 :         return DAG.getNode(ISD::TRUNCATE, DL, VT,
    5876        3306 :                            DAG.getNode(ISD::SRL, DL, InnerShiftVT,
    5877        9918 :                                        N0.getOperand(0).getOperand(0),
    5878             :                                        DAG.getConstant(c1 + c2, DL,
    5879        9918 :                                                        ShiftCountVT)));
    5880             :       }
    5881             :     }
    5882             :   }
    5883             : 
    5884             :   // fold (srl (shl x, c), c) -> (and x, cst2)
    5885      232509 :   if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
    5886         210 :       isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
    5887         360 :     SDLoc DL(N);
    5888             :     SDValue Mask =
    5889         360 :         DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
    5890         180 :     AddToWorklist(Mask.getNode());
    5891         540 :     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
    5892             :   }
    5893             : 
    5894             :   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
    5895      331698 :   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
    5896             :     // Shifting in all undef bits?
    5897        1668 :     EVT SmallVT = N0.getOperand(0).getValueType();
    5898         556 :     unsigned BitSize = SmallVT.getScalarSizeInBits();
    5899        1112 :     if (N1C->getZExtValue() >= BitSize)
    5900         102 :       return DAG.getUNDEF(VT);
    5901             : 
    5902         556 :     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
    5903         204 :       uint64_t ShiftAmt = N1C->getZExtValue();
    5904         204 :       SDLoc DL0(N0);
    5905         102 :       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
    5906         204 :                                        N0.getOperand(0),
    5907             :                           DAG.getConstant(ShiftAmt, DL0,
    5908         204 :                                           getShiftAmountTy(SmallVT)));
    5909         102 :       AddToWorklist(SmallShift.getNode());
    5910         204 :       APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
    5911         204 :       SDLoc DL(N);
    5912         102 :       return DAG.getNode(ISD::AND, DL, VT,
    5913         102 :                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
    5914         204 :                          DAG.getConstant(Mask, DL, VT));
    5915             :     }
    5916             :   }
    5917             : 
    5918             :   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
    5919             :   // bit, which is unmodified by sra.
    5920      223448 :   if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
    5921        7558 :     if (N0.getOpcode() == ISD::SRA)
    5922        2270 :       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
    5923             :   }
    5924             : 
    5925             :   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
    5926      222590 :   if (N1C && N0.getOpcode() == ISD::CTLZ &&
    5927         100 :       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
    5928          98 :     KnownBits Known;
    5929         100 :     DAG.computeKnownBits(N0.getOperand(0), Known);
    5930             : 
    5931             :     // If any of the input bits are KnownOne, then the input couldn't be all
    5932             :     // zeros, thus the result of the srl will always be zero.
    5933          50 :     if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
    5934             : 
    5935             :     // If all of the bits input the to ctlz node are known to be zero, then
    5936             :     // the result of the ctlz is "32" and the result of the shift is one.
    5937         198 :     APInt UnknownBits = ~Known.Zero;
    5938          50 :     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
    5939             : 
    5940             :     // Otherwise, check to see if there is exactly one bit input to the ctlz.
    5941          50 :     if (UnknownBits.isPowerOf2()) {
    5942             :       // Okay, we know that only that the single bit specified by UnknownBits
    5943             :       // could be set on input to the CTLZ node. If this bit is set, the SRL
    5944             :       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
    5945             :       // to an SRL/XOR pair, which is likely to simplify more.
    5946           2 :       unsigned ShAmt = UnknownBits.countTrailingZeros();
    5947           4 :       SDValue Op = N0.getOperand(0);
    5948             : 
    5949           2 :       if (ShAmt) {
    5950           4 :         SDLoc DL(N0);
    5951           4 :         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
    5952             :                   DAG.getConstant(ShAmt, DL,
    5953           6 :                                   getShiftAmountTy(Op.getValueType())));
    5954           2 :         AddToWorklist(Op.getNode());
    5955             :       }
    5956             : 
    5957           4 :       SDLoc DL(N);
    5958           2 :       return DAG.getNode(ISD::XOR, DL, VT,
    5959           2 :                          Op, DAG.getConstant(1, DL, VT));
    5960             :     }
    5961             :   }
    5962             : 
    5963             :   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
    5964      230811 :   if (N1.getOpcode() == ISD::TRUNCATE &&
    5965        1430 :       N1.getOperand(0).getOpcode() == ISD::AND) {
    5966          33 :     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
    5967          80 :       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
    5968             :   }
    5969             : 
    5970             :   // fold operands of srl based on knowledge that the low bits are not
    5971             :   // demanded.
    5972      230506 :   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
    5973        7990 :     return SDValue(N, 0);
    5974             : 
    5975      306034 :   if (N1C && !N1C->isOpaque())
    5976       99498 :     if (SDValue NewSRL = visitShiftByConstant(N, N1C))
    5977         535 :       return NewSRL;
    5978             : 
    5979             :   // Attempt to convert a srl of a load into a narrower zero-extending load.
    5980      106503 :   if (SDValue NarrowLoad = ReduceLoadWidth(N))
    5981          34 :     return NarrowLoad;
    5982             : 
    5983             :   // Here is a common situation. We want to optimize:
    5984             :   //
    5985             :   //   %a = ...
    5986             :   //   %b = and i32 %a, 2
    5987             :   //   %c = srl i32 %b, 1
    5988             :   //   brcond i32 %c ...
    5989             :   //
    5990             :   // into
    5991             :   //
    5992             :   //   %a = ...
    5993             :   //   %b = and %a, 2
    5994             :   //   %c = setcc eq %b, 0
    5995             :   //   brcond %c ...
    5996             :   //
    5997             :   // However when after the source operand of SRL is optimized into AND, the SRL
    5998             :   // itself may not be optimized further. Look for it and add the BRCOND into
    5999             :   // the worklist.
    6000      101455 :   if (N->hasOneUse()) {
    6001      304365 :     SDNode *Use = *N->use_begin();
    6002      101455 :     if (Use->getOpcode() == ISD::BRCOND)
    6003          10 :       AddToWorklist(Use);
    6004      125093 :     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
    6005             :       // Also look pass the truncate.
    6006       47296 :       Use = *Use->use_begin();
    6007       23648 :       if (Use->getOpcode() == ISD::BRCOND)
    6008          53 :         AddToWorklist(Use);
    6009             :     }
    6010             :   }
    6011             : 
    6012      106469 :   return SDValue();
    6013             : }
    6014             : 
    6015         763 : SDValue DAGCombiner::visitABS(SDNode *N) {
    6016        1526 :   SDValue N0 = N->getOperand(0);
    6017        1526 :   EVT VT = N->getValueType(0);
    6018             : 
    6019             :   // fold (abs c1) -> c2
    6020         763 :   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
    6021           0 :     return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
    6022             :   // fold (abs (abs x)) -> (abs x)
    6023        1526 :   if (N0.getOpcode() == ISD::ABS)
    6024           8 :     return N0;
    6025             :   // fold (abs x) -> x iff not-negative
    6026         755 :   if (DAG.SignBitIsZero(N0))
    6027           8 :     return N0;
    6028         747 :   return SDValue();
    6029             : }
    6030             : 
    6031        1254 : SDValue DAGCombiner::visitBSWAP(SDNode *N) {
    6032        2508 :   SDValue N0 = N->getOperand(0);
    6033        2508 :   EVT VT = N->getValueType(0);
    6034             : 
    6035             :   // fold (bswap c1) -> c2
    6036        1254 :   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
    6037           0 :     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
    6038             :   // fold (bswap (bswap x)) -> x
    6039        2508 :   if (N0.getOpcode() == ISD::BSWAP)
    6040          56 :     return N0->getOperand(0);
    6041        1226 :   return SDValue();
    6042             : }
    6043             : 
    6044         462 : SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
    6045         924 :   SDValue N0 = N->getOperand(0);
    6046         924 :   EVT VT = N->getValueType(0);
    6047             : 
    6048             :   // fold (bitreverse c1) -> c2
    6049         462 :   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
    6050           0 :     return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
    6051             :   // fold (bitreverse (bitreverse x)) -> x
    6052         924 :   if (N0.getOpcode() == ISD::BITREVERSE)
    6053           8 :     return N0.getOperand(0);
    6054         458 :   return SDValue();
    6055             : }
    6056             : 
    6057         920 : SDValue DAGCombiner::visitCTLZ(SDNode *N) {
    6058        1840 :   SDValue N0 = N->getOperand(0);
    6059        1840 :   EVT VT = N->getValueType(0);
    6060             : 
    6061             :   // fold (ctlz c1) -> c2
    6062         920 :   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
    6063           0 :     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
    6064         920 :   return SDValue();
    6065             : }
    6066             : 
    6067         459 : SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
    6068         918 :   SDValue N0 = N->getOperand(0);
    6069         918 :   EVT VT = N->getValueType(0);
    6070             : 
    6071             :   // fold (ctlz_zero_undef c1) -> c2
    6072         459 :   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
    6073           0 :     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
    6074         459 :   return SDValue();
    6075             : }
    6076             : 
    6077         354 : SDValue DAGCombiner::visitCTTZ(SDNode *N) {
    6078         708 :   SDValue N0 = N->getOperand(0);
    6079         708 :   EVT VT = N->getValueType(0);
    6080             : 
    6081             :   // fold (cttz c1) -> c2
    6082         354 :   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
    6083           0 :     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
    6084         354 :   return SDValue();
    6085             : }
    6086             : 
    6087         349 : SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
    6088         698 :   SDValue N0 = N->getOperand(0);
    6089         698 :   EVT VT = N->getValueType(0);
    6090             : 
    6091             :   // fold (cttz_zero_undef c1) -> c2
    6092         349 :   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
    6093           0 :     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
    6094         349 :   return SDValue();
    6095             : }
    6096             : 
    6097        1202 : SDValue DAGCombiner::visitCTPOP(SDNode *N) {
    6098        2404 :   SDValue N0 = N->getOperand(0);
    6099        2404 :   EVT VT = N->getValueType(0);
    6100             : 
    6101             :   // fold (ctpop c1) -> c2
    6102        1202 :   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
    6103           0 :     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
    6104        1202 :   return SDValue();
    6105             : }
    6106             : 
    6107             : 
    6108             : /// \brief Generate Min/Max node
    6109          99 : static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
    6110             :                                    SDValue RHS, SDValue True, SDValue False,
    6111             :                                    ISD::CondCode CC, const TargetLowering &TLI,
    6112             :                                    SelectionDAG &DAG) {
    6113          84 :   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
    6114          16 :     return SDValue();
    6115             : 
    6116             :   switch (CC) {
    6117          43 :   case ISD::SETOLT:
    6118             :   case ISD::SETOLE:
    6119             :   case ISD::SETLT:
    6120             :   case ISD::SETLE:
    6121             :   case ISD::SETULT:
    6122             :   case ISD::SETULE: {
    6123          26 :     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
    6124          58 :     if (TLI.isOperationLegal(Opcode, VT))
    6125          15 :       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
    6126          28 :     return SDValue();
    6127             :   }
    6128          40 :   case ISD::SETOGT:
    6129             :   case ISD::SETOGE:
    6130             :   case ISD::SETGT:
    6131             :   case ISD::SETGE:
    6132             :   case ISD::SETUGT:
    6133             :   case ISD::SETUGE: {
    6134          23 :     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
    6135          52 :     if (TLI.isOperationLegal(Opcode, VT))
    6136          12 :       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
    6137          28 :     return SDValue();
    6138             :   }
    6139           0 :   default:
    6140           0 :     return SDValue();
    6141             :   }
    6142             : }
    6143             : 
    6144       50570 : SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
    6145      101140 :   SDValue Cond = N->getOperand(0);
    6146      101140 :   SDValue N1 = N->getOperand(1);
    6147      101140 :   SDValue N2 = N->getOperand(2);
    6148      101140 :   EVT VT = N->getValueType(0);
    6149      101140 :   EVT CondVT = Cond.getValueType();
    6150      101140 :   SDLoc DL(N);
    6151             : 
    6152       50570 :   if (!VT.isInteger())
    6153        4459 :     return SDValue();
    6154             : 
    6155       46111 :   auto *C1 = dyn_cast<ConstantSDNode>(N1);
    6156       46111 :   auto *C2 = dyn_cast<ConstantSDNode>(N2);
    6157       46111 :   if (!C1 || !C2)
    6158       18222 :     return SDValue();
    6159             : 
    6160             :   // Only do this before legalization to avoid conflicting with target-specific
    6161             :   // transforms in the other direction (create a select from a zext/sext). There
    6162             :   // is also a target-independent combine here in DAGCombiner in the other
    6163             :   // direction for (select Cond, -1, 0) when the condition is not i1.
    6164       54107 :   if (CondVT == MVT::i1 && !LegalOperations) {
    6165       24621 :     if (C1->isNullValue() && C2->isOne()) {
    6166             :       // select Cond, 0, 1 --> zext (!Cond)
    6167          62 :       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
    6168          31 :       if (VT != MVT::i1)
    6169          60 :         NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
    6170          31 :       return NotCond;
    6171             :     }
    6172       24559 :     if (C1->isNullValue() && C2->isAllOnesValue()) {
    6173             :       // select Cond, 0, -1 --> sext (!Cond)
    6174          50 :       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
    6175          25 :       if (VT != MVT::i1)
    6176          50 :         NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
    6177          25 :       return NotCond;
    6178             :     }
    6179       24574 :     if (C1->isOne() && C2->isNullValue()) {
    6180             :       // select Cond, 1, 0 --> zext (Cond)
    6181          35 :       if (VT != MVT::i1)
    6182          70 :         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
    6183          35 :       return Cond;
    6184             :     }
    6185       24526 :     if (C1->isAllOnesValue() && C2->isNullValue()) {
    6186             :       // select Cond, -1, 0 --> sext (Cond)
    6187          56 :       if (VT != MVT::i1)
    6188         112 :         Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
    6189          56 :       return Cond;
    6190             :     }
    6191             : 
    6192             :     // For any constants that differ by 1, we can transform the select into an
    6193             :     // extend and add. Use a target hook because some targets may prefer to
    6194             :     // transform in the other direction.
    6195       24371 :     if (TLI.convertSelectOfConstantsToMath(VT)) {
    6196      144216 :       if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {
    6197             :         // select Cond, C1, C1-1 --> add (zext Cond), C1-1
    6198          19 :         if (VT != MVT::i1)
    6199          38 :           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
    6200          38 :         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
    6201             :       }
    6202      144102 :       if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) {
    6203             :         // select Cond, C1, C1+1 --> add (sext Cond), C1+1
    6204          94 :         if (VT != MVT::i1)
    6205         188 :           Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
    6206         188 :         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
    6207             :       }
    6208             :     }
    6209             : 
    6210       24258 :     return SDValue();
    6211             :   }
    6212             : 
    6213             :   // fold (select Cond, 0, 1) -> (xor Cond, 1)
    6214             :   // We can't do this reliably if integer based booleans have different contents
    6215             :   // to floating point based booleans. This is because we can't tell whether we
    6216             :   // have an integer-based boolean or a floating-point-based boolean unless we
    6217             :   // can find the SETCC that produced it and inspect its operands. This is
    6218             :   // fairly easy if C is the SETCC node, but it can potentially be
    6219             :   // undiscoverable (or not reasonably discoverable). For example, it could be
    6220             :   // in another basic block or it could require searching a complicated
    6221             :   // expression.
    6222        6742 :   if (CondVT.isInteger() &&
    6223        6742 :       TLI.getBooleanContents(false, true) ==
    6224        1537 :           TargetLowering::ZeroOrOneBooleanContent &&
    6225        3074 :       TLI.getBooleanContents(false, false) ==
    6226        1537 :           TargetLowering::ZeroOrOneBooleanContent &&
    6227        3668 :       C1->isNullValue() && C2->isOne()) {
    6228             :     SDValue NotCond =
    6229           0 :         DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
    6230           0 :     if (VT.bitsEq(CondVT))
    6231           0 :       return NotCond;
    6232           0 :     return DAG.getZExtOrTrunc(NotCond, DL, VT);
    6233             :   }
    6234             : 
    6235        3371 :   return SDValue();
    6236             : }
    6237             : 
    6238       50696 : SDValue DAGCombiner::visitSELECT(SDNode *N) {
    6239      101392 :   SDValue N0 = N->getOperand(0);
    6240      101392 :   SDValue N1 = N->getOperand(1);
    6241      101392 :   SDValue N2 = N->getOperand(2);
    6242      101392 :   EVT VT = N->getValueType(0);
    6243      101392 :   EVT VT0 = N0.getValueType();
    6244      101392 :   SDLoc DL(N);
    6245             : 
    6246             :   // fold (select C, X, X) -> X
    6247       50696 :   if (N1 == N2)
    6248          59 :     return N1;
    6249             : 
    6250          58 :   if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) {
    6251             :     // fold (select true, X, Y) -> X
    6252             :     // fold (select false, X, Y) -> Y
    6253          58 :     return !N0C->isNullValue() ? N1 : N2;
    6254             :   }
    6255             : 
    6256             :   // fold (select X, X, Y) -> (or X, Y)
    6257             :   // fold (select X, 1, Y) -> (or C, Y)
    6258       54347 :   if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
    6259          18 :     return DAG.getNode(ISD::OR, DL, VT, N0, N2);
    6260             : 
    6261       50570 :   if (SDValue V = foldSelectOfConstants(N))
    6262         260 :     return V;
    6263             : 
    6264             :   // fold (select C, 0, X) -> (and (not C), X)
    6265       54064 :   if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
    6266          18 :     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
    6267           6 :     AddToWorklist(NOTNode.getNode());
    6268          12 :     return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
    6269             :   }
    6270             :   // fold (select C, X, 1) -> (or (not C), X)
    6271       54046 :   if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
    6272           6 :     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
    6273           2 :     AddToWorklist(NOTNode.getNode());
    6274           4 :     return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
    6275             :   }
    6276             :   // fold (select X, Y, X) -> (and X, Y)
    6277             :   // fold (select X, Y, 0) -> (and X, Y)
    6278       54038 :   if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
    6279           6 :     return DAG.getNode(ISD::AND, DL, VT, N0, N1);
    6280             : 
    6281             :   // If we can fold this based on the true/false value, do so.
    6282       50299 :   if (SimplifySelectOps(N, N1, N2))
    6283          82 :     return SDValue(N, 0); // Don't revisit N.
    6284             : 
    6285       93435 :   if (VT0 == MVT::i1) {
    6286             :     // The code in this block deals with the following 2 equivalences:
    6287             :     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
    6288             :     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
    6289             :     // The target can specify its preferred form with the
    6290             :     // shouldNormalizeToSelectSequence() callback. However we always transform
    6291             :     // to the right anyway if we find the inner select exists in the DAG anyway
    6292             :     // and we always transform to the left side if we know that we can further
    6293             :     // optimize the combination of the conditions.
    6294             :     bool normalizeToSequence =
    6295       43218 :         TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
    6296             :     // select (and Cond0, Cond1), X, Y
    6297             :     //   -> select Cond0, (select Cond1, X, Y), Y
    6298       43510 :     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
    6299         584 :       SDValue Cond0 = N0->getOperand(0);
    6300         584 :       SDValue Cond1 = N0->getOperand(1);
    6301             :       SDValue InnerSelect =
    6302         584 :           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
    6303         561 :       if (normalizeToSequence || !InnerSelect.use_empty())
    6304          23 :         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
    6305          46 :                            InnerSelect, N2);
    6306             :     }
    6307             :     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
    6308       43353 :     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
    6309         316 :       SDValue Cond0 = N0->getOperand(0);
    6310         316 :       SDValue Cond1 = N0->getOperand(1);
    6311             :       SDValue InnerSelect =
    6312         316 :           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
    6313         286 :       if (normalizeToSequence || !InnerSelect.use_empty())
    6314          31 :         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
    6315          62 :                            InnerSelect);
    6316             :     }
    6317             : 
    6318             :     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
    6319       43630 :     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
    6320         932 :       SDValue N1_0 = N1->getOperand(0);
    6321         932 :       SDValue N1_1 = N1->getOperand(1);
    6322         932 :       SDValue N1_2 = N1->getOperand(2);
    6323         159 :       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
    6324             :         // Create the actual and node if we can generate good code for it.
    6325          53 :         if (!normalizeToSequence) {
    6326           0 :           SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
    6327           0 :           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1, N2);
    6328             :         }
    6329             :         // Otherwise see if we can optimize the "and" to a better pattern.
    6330          53 :         if (SDValue Combined = visitANDLike(N0, N1_0, N))
    6331           2 :           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
    6332           4 :                              N2);
    6333             :       }
    6334             :     }
    6335             :     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
    6336       44016 :     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
    6337        1708 :       SDValue N2_0 = N2->getOperand(0);
    6338        1708 :       SDValue N2_1 = N2->getOperand(1);
    6339        1708 :       SDValue N2_2 = N2->getOperand(2);
    6340         162 :       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
    6341             :         // Create the actual or node if we can generate good code for it.
    6342          54 :         if (!normalizeToSequence) {
    6343          51 :           SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
    6344          34 :           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2);
    6345             :         }
    6346             :         // Otherwise see if we can optimize to a better pattern.
    6347          37 :         if (SDValue Combined = visitORLike(N0, N2_0, N))
    6348           6 :           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
    6349          12 :                              N2_2);
    6350             :       }
    6351             :     }
    6352             :   }
    6353             : 
    6354             :   // select (xor Cond, 1), X, Y -> select Cond, Y, X
    6355       93277 :   if (VT0 == MVT::i1) {
    6356       43139 :     if (N0->getOpcode() == ISD::XOR) {
    6357         304 :       if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) {
    6358          16 :         SDValue Cond0 = N0->getOperand(0);
    6359           8 :         if (C->isOne())
    6360          16 :           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N2, N1);
    6361             :       }
    6362             :     }
    6363             :   }
    6364             : 
    6365             :   // fold selects based on a setcc into other things, such as min/max/abs
    6366      100260 :   if (N0.getOpcode() == ISD::SETCC) {
    6367             :     // select x, y (fcmp lt x, y) -> fminnum x, y
    6368             :     // select x, y (fcmp gt x, y) -> fmaxnum x, y
    6369             :     //
    6370             :     // This is OK if we don't care about what happens if either operand is a
    6371             :     // NaN.
    6372             :     //
    6373             : 
    6374             :     // FIXME: Instead of testing for UnsafeFPMath, this should be checking for
    6375             :     // no signed zeros as well as no nans.
    6376       43171 :     const TargetOptions &Options = DAG.getTarget().Options;
    6377       43950 :     if (Options.UnsafeFPMath && VT.isFloatingPoint() && N0.hasOneUse() &&
    6378       43479 :         DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
    6379         297 :       ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
    6380             : 
    6381          99 :       if (SDValue FMinMax = combineMinNumMaxNum(
    6382         297 :               DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
    6383          27 :         return FMinMax;
    6384             :     }
    6385             : 
    6386       43144 :     if ((!LegalOperations &&
    6387       43144 :          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
    6388       40227 :         TLI.isOperationLegal(ISD::SELECT_CC, VT))
    6389        8802 :       return DAG.getNode(ISD::SELECT_CC, DL, VT, N0.getOperand(0),
    6390       11736 :                          N0.getOperand(1), N1, N2, N0.getOperand(2));
    6391       40210 :     return SimplifySelect(DL, N0, N1, N2);
    6392             :   }
    6393             : 
    6394        6959 :   return SDValue();
    6395             : }
    6396             : 
    6397             : static
    6398           8 : std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
    6399          16 :   SDLoc DL(N);
    6400             :   EVT LoVT, HiVT;
    6401          32 :   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
    6402             : 
    6403             :   // Split the inputs.
    6404           8 :   SDValue Lo, Hi, LL, LH, RL, RH;
    6405          24 :   std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
    6406          24 :   std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
    6407             : 
    6408          24 :   Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
    6409          24 :   Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
    6410             : 
    6411          16 :   return std::make_pair(Lo, Hi);
    6412             : }
    6413             : 
    6414             : // This function assumes all the vselect's arguments are CONCAT_VECTOR
    6415             : // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
    6416          42 : static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
    6417          84 :   SDLoc DL(N);
    6418          84 :   SDValue Cond = N->getOperand(0);
    6419          84 :   SDValue LHS = N->getOperand(1);
    6420          84 :   SDValue RHS = N->getOperand(2);
    6421          84 :   EVT VT = N->getValueType(0);
    6422          42 :   int NumElems = VT.getVectorNumElements();
    6423             :   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
    6424             :          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
    6425             :          Cond.getOpcode() == ISD::BUILD_VECTOR);
    6426             : 
    6427             :   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
    6428             :   // binary ones here.
    6429          42 :   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
    6430          10 :     return SDValue();
    6431             : 
    6432             :   // We're sure we have an even number of elements due to the
    6433             :   // concat_vectors we have as arguments to vselect.
    6434             :   // Skip BV elements until we find one that's not an UNDEF
    6435             :   // After we find an UNDEF element, keep looping until we get to half the
    6436             :   // length of the BV and see if all the non-undef nodes are the same.
    6437             :   ConstantSDNode *BottomHalf = nullptr;
    6438         164 :   for (int i = 0; i < NumElems / 2; ++i) {
    6439         170 :     if (Cond->getOperand(i)->isUndef())
    6440           0 :       continue;
    6441             : 
    6442          85 :     if (BottomHalf == nullptr)
    6443             :       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
    6444         106 :     else if (Cond->getOperand(i).getNode() != BottomHalf)
    6445          19 :       return SDValue();
    6446             :   }
    6447             : 
    6448             :   // Do the same for the second half of the BuildVector
    6449             :   ConstantSDNode *TopHalf = nullptr;
    6450          83 :   for (int i = NumElems / 2; i < NumElems; ++i) {
    6451          76 :     if (Cond->getOperand(i)->isUndef())
    6452           0 :       continue;
    6453             : 
    6454          38 :     if (TopHalf == nullptr)
    6455             :       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
    6456          50 :     else if (Cond->getOperand(i).getNode() != TopHalf)
    6457           3 :       return SDValue();
    6458             :   }
    6459             : 
    6460             :   assert(TopHalf && BottomHalf &&
    6461             :          "One half of the selector was all UNDEFs and the other was all the "
    6462             :          "same value. This should have been addressed before this function.");
    6463             :   return DAG.getNode(
    6464             :       ISD::CONCAT_VECTORS, DL, VT,
    6465          30 :       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
    6466          40 :       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
    6467             : }
    6468             : 
    6469         282 : SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
    6470             : 
    6471         282 :   if (Level >= AfterLegalizeTypes)
    6472         217 :     return SDValue();
    6473             : 
    6474          65 :   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
    6475         130 :   SDValue Mask = MSC->getMask();
    6476         130 :   SDValue Data  = MSC->getValue();
    6477          65 :   SDLoc DL(N);
    6478             : 
    6479             :   // If the MSCATTER data type requires splitting and the mask is provided by a
    6480             :   // SETCC, then split both nodes and its operands before legalization. This
    6481             :   // prevents the type legalizer from unrolling SETCC into scalar comparisons
    6482             :   // and enables future optimizations (e.g. min/max pattern matching on X86).
    6483         130 :   if (Mask.getOpcode() != ISD::SETCC)
    6484          65 :     return SDValue();
    6485             : 
    6486             :   // Check if any splitting is required.
    6487           0 :   if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
    6488             :       TargetLowering::TypeSplitVector)
    6489           0 :     return SDValue();
    6490           0 :   SDValue MaskLo, MaskHi, Lo, Hi;
    6491           0 :   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
    6492             : 
    6493             :   EVT LoVT, HiVT;
    6494           0 :   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
    6495             : 
    6496           0 :   SDValue Chain = MSC->getChain();
    6497             : 
    6498           0 :   EVT MemoryVT = MSC->getMemoryVT();
    6499           0 :   unsigned Alignment = MSC->getOriginalAlignment();
    6500             : 
    6501           0 :   EVT LoMemVT, HiMemVT;
    6502           0 :   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
    6503             : 
    6504             :   SDValue DataLo, DataHi;
    6505           0 :   std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
    6506             : 
    6507           0 :   SDValue BasePtr = MSC->getBasePtr();
    6508             :   SDValue IndexLo, IndexHi;
    6509           0 :   std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
    6510             : 
    6511           0 :   MachineMemOperand *MMO = DAG.getMachineFunction().
    6512           0 :     getMachineMemOperand(MSC->getPointerInfo(),
    6513           0 :                           MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
    6514           0 :                           Alignment, MSC->getAAInfo(), MSC->getRanges());
    6515             : 
    6516           0 :   SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo };
    6517           0 :   Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
    6518           0 :                             DL, OpsLo, MMO);
    6519             : 
    6520           0 :   SDValue OpsHi[] = {Chain, DataHi, MaskHi, BasePtr, IndexHi};
    6521           0 :   Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
    6522           0 :                             DL, OpsHi, MMO);
    6523             : 
    6524           0 :   AddToWorklist(Lo.getNode());
    6525           0 :   AddToWorklist(Hi.getNode());
    6526             : 
    6527           0 :   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
    6528             : }
    6529             : 
    6530         369 : SDValue DAGCombiner::visitMSTORE(SDNode *N) {
    6531             : 
    6532         369 :   if (Level >= AfterLegalizeTypes)
    6533         239 :     return SDValue();
    6534             : 
    6535         130 :   MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
    6536         260 :   SDValue Mask = MST->getMask();
    6537         130 :   SDValue Data  = MST->getValue();
    6538         260 :   EVT VT = Data.getValueType();
    6539         130 :   SDLoc DL(N);
    6540             : 
    6541             :   // If the MSTORE data type requires splitting and the mask is provided by a
    6542             :   // SETCC, then split both nodes and its operands before legalization. This
    6543             :   // prevents the type legalizer from unrolling SETCC into scalar comparisons
    6544             :   // and enables future optimizations (e.g. min/max pattern matching on X86).
    6545         260 :   if (Mask.getOpcode() == ISD::SETCC) {
    6546             : 
    6547             :     // Check if any splitting is required.
    6548          72 :     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
    6549             :         TargetLowering::TypeSplitVector)
    6550          34 :       return SDValue();
    6551             : 
    6552           2 :     SDValue MaskLo, MaskHi, Lo, Hi;
    6553           6 :     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
    6554             : 
    6555           4 :     SDValue Chain = MST->getChain();
    6556           4 :     SDValue Ptr   = MST->getBasePtr();
    6557             : 
    6558           2 :     EVT MemoryVT = MST->getMemoryVT();
    6559           4 :     unsigned Alignment = MST->getOriginalAlignment();
    6560             : 
    6561             :     // if Alignment is equal to the vector size,
    6562             :     // take the half of it for the second part
    6563             :     unsigned SecondHalfAlignment =
    6564           2 :       (Alignment == VT.getSizeInBits() / 8) ? Alignment / 2 : Alignment;
    6565             : 
    6566           2 :     EVT LoMemVT, HiMemVT;
    6567           6 :     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
    6568             : 
    6569             :     SDValue DataLo, DataHi;
    6570           6 :     std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
    6571             : 
    6572           2 :     MachineMemOperand *MMO = DAG.getMachineFunction().
    6573           4 :       getMachineMemOperand(MST->getPointerInfo(),
    6574           2 :                            MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
    6575           6 :                            Alignment, MST->getAAInfo(), MST->getRanges());
    6576             : 
    6577           4 :     Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
    6578             :                             MST->isTruncatingStore(),
    6579           4 :                             MST->isCompressingStore());
    6580             : 
    6581           4 :     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
    6582           2 :                                      MST->isCompressingStore());
    6583             : 
    6584           4 :     MMO = DAG.getMachineFunction().
    6585           4 :       getMachineMemOperand(MST->getPointerInfo(),
    6586           2 :                            MachineMemOperand::MOStore,  HiMemVT.getStoreSize(),
    6587           6 :                            SecondHalfAlignment, MST->getAAInfo(),
    6588             :                            MST->getRanges());
    6589             : 
    6590           4 :     Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
    6591             :                             MST->isTruncatingStore(),
    6592           4 :                             MST->isCompressingStore());
    6593             : 
    6594           2 :     AddToWorklist(Lo.getNode());
    6595           2 :     AddToWorklist(Hi.getNode());
    6596             : 
    6597           6 :     return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
    6598             :   }
    6599          94 :   return SDValue();
    6600             : }
    6601             : 
    6602         642 : SDValue DAGCombiner::visitMGATHER(SDNode *N) {
    6603             : 
    6604         642 :   if (Level >= AfterLegalizeTypes)
    6605         435 :     return SDValue();
    6606             : 
    6607         207 :   MaskedGatherSDNode *MGT = dyn_cast<MaskedGatherSDNode>(N);
    6608         414 :   SDValue Mask = MGT->getMask();
    6609         207 :   SDLoc DL(N);
    6610             : 
    6611             :   // If the MGATHER result requires splitting and the mask is provided by a
    6612             :   // SETCC, then split both nodes and its operands before legalization. This
    6613             :   // prevents the type legalizer from unrolling SETCC into scalar comparisons
    6614             :   // and enables future optimizations (e.g. min/max pattern matching on X86).
    6615             : 
    6616         414 :   if (Mask.getOpcode() != ISD::SETCC)
    6617         207 :     return SDValue();
    6618             : 
    6619           0 :   EVT VT = N->getValueType(0);
    6620             : 
    6621             :   // Check if any splitting is required.
    6622           0 :   if (TLI.getTypeAction(*DAG.getContext(), VT) !=
    6623             :       TargetLowering::TypeSplitVector)
    6624           0 :     return SDValue();
    6625             : 
    6626           0 :   SDValue MaskLo, MaskHi, Lo, Hi;
    6627           0 :   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
    6628             : 
    6629           0 :   SDValue Src0 = MGT->getValue();
    6630             :   SDValue Src0Lo, Src0Hi;
    6631           0 :   std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
    6632             : 
    6633             :   EVT LoVT, HiVT;
    6634           0 :   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
    6635             : 
    6636           0 :   SDValue Chain = MGT->getChain();
    6637           0 :   EVT MemoryVT = MGT->getMemoryVT();
    6638           0 :   unsigned Alignment = MGT->getOriginalAlignment();
    6639             : 
    6640           0 :   EVT LoMemVT, HiMemVT;
    6641           0 :   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
    6642             : 
    6643           0 :   SDValue BasePtr = MGT->getBasePtr();
    6644           0 :   SDValue Index = MGT->getIndex();
    6645             :   SDValue IndexLo, IndexHi;
    6646           0 :   std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
    6647             : 
    6648           0 :   MachineMemOperand *MMO = DAG.getMachineFunction().
    6649           0 :     getMachineMemOperand(MGT->getPointerInfo(),
    6650           0 :                           MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
    6651           0 :                           Alignment, MGT->getAAInfo(), MGT->getRanges());
    6652             : 
    6653           0 :   SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo };
    6654           0 :   Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
    6655           0 :                             MMO);
    6656             : 
    6657           0 :   SDValue OpsHi[] = {Chain, Src0Hi, MaskHi, BasePtr, IndexHi};
    6658           0 :   Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
    6659           0 :                             MMO);
    6660             : 
    6661           0 :   AddToWorklist(Lo.getNode());
    6662           0 :   AddToWorklist(Hi.getNode());
    6663             : 
    6664             :   // Build a factor node to remember that this load is independent of the
    6665             :   // other one.
    6666           0 :   Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
    6667           0 :                       Hi.getValue(1));
    6668             : 
    6669             :   // Legalized the chain result - switch anything that used the old chain to
    6670             :   // use the new one.
    6671           0 :   DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
    6672             : 
    6673           0 :   SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
    6674             : 
    6675           0 :   SDValue RetOps[] = { GatherRes, Chain };
    6676           0 :   return DAG.getMergeValues(RetOps, DL);
    6677             : }
    6678             : 
    6679         725 : SDValue DAGCombiner::visitMLOAD(SDNode *N) {
    6680             : 
    6681         725 :   if (Level >= AfterLegalizeTypes)
    6682         463 :     return SDValue();
    6683             : 
    6684         262 :   MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N);
    6685         524 :   SDValue Mask = MLD->getMask();
    6686         262 :   SDLoc DL(N);
    6687             : 
    6688             :   // If the MLOAD result requires splitting and the mask is provided by a
    6689             :   // SETCC, then split both nodes and its operands before legalization. This
    6690             :   // prevents the type legalizer from unrolling SETCC into scalar comparisons
    6691             :   // and enables future optimizations (e.g. min/max pattern matching on X86).
    6692             : 
    6693         524 :   if (Mask.getOpcode() == ISD::SETCC) {
    6694         152 :     EVT VT = N->getValueType(0);
    6695             : 
    6696             :     // Check if any splitting is required.
    6697         152 :     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
    6698             :         TargetLowering::TypeSplitVector)
    6699          70 :       return SDValue();
    6700             : 
    6701           6 :     SDValue MaskLo, MaskHi, Lo, Hi;
    6702          18 :     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
    6703             : 
    6704           6 :     SDValue Src0 = MLD->getSrc0();
    6705             :     SDValue Src0Lo, Src0Hi;
    6706          18 :     std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
    6707             : 
    6708             :     EVT LoVT, HiVT;
    6709          24 :     std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
    6710             : 
    6711          12 :     SDValue Chain = MLD->getChain();
    6712          12 :     SDValue Ptr   = MLD->getBasePtr();
    6713           6 :     EVT MemoryVT = MLD->getMemoryVT();
    6714          12 :     unsigned Alignment = MLD->getOriginalAlignment();
    6715             : 
    6716             :     // if Alignment is equal to the vector size,
    6717             :     // take the half of it for the second part
    6718             :     unsigned SecondHalfAlignment =
    6719          18 :       (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
    6720           6 :          Alignment/2 : Alignment;
    6721             : 
    6722           6 :     EVT LoMemVT, HiMemVT;
    6723          18 :     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
    6724             : 
    6725           6 :     MachineMemOperand *MMO = DAG.getMachineFunction().
    6726          12 :     getMachineMemOperand(MLD->getPointerInfo(),
    6727           6 :                          MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
    6728          18 :                          Alignment, MLD->getAAInfo(), MLD->getRanges());
    6729             : 
    6730          12 :     Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
    6731           6 :                            ISD::NON_EXTLOAD, MLD->isExpandingLoad());
    6732             : 
    6733          12 :     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
    6734           6 :                                      MLD->isExpandingLoad());
    6735             : 
    6736          12 :     MMO = DAG.getMachineFunction().
    6737          12 :     getMachineMemOperand(MLD->getPointerInfo(),
    6738           6 :                          MachineMemOperand::MOLoad,  HiMemVT.getStoreSize(),
    6739          18 :                          SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
    6740             : 
    6741          12 :     Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
    6742           6 :                            ISD::NON_EXTLOAD, MLD->isExpandingLoad());
    6743             : 
    6744           6 :     AddToWorklist(Lo.getNode());
    6745           6 :     AddToWorklist(Hi.getNode());
    6746             : 
    6747             :     // Build a factor node to remember that this load is independent of the
    6748             :     // other one.
    6749          12 :     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
    6750          30 :                         Hi.getValue(1));
    6751             : 
    6752             :     // Legalized the chain result - switch anything that used the old chain to
    6753             :     // use the new one.
    6754          12 :     DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
    6755             : 
    6756          12 :     SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
    6757             : 
    6758           6 :     SDValue RetOps[] = { LoadRes, Chain };
    6759          12 :     return DAG.getMergeValues(RetOps, DL);
    6760             :   }
    6761         186 :   return SDValue();
    6762             : }
    6763             : 
    6764             : /// A vector select of 2 constant vectors can be simplified to math/logic to
    6765             : /// avoid a variable select instruction and possibly avoid constant loads.
    6766       25870 : SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
    6767       51740 :   SDValue Cond = N->getOperand(0);
    6768       51740 :   SDValue N1 = N->getOperand(1);
    6769       51740 :   SDValue N2 = N->getOperand(2);
    6770       51740 :   EVT VT = N->getValueType(0);
    6771       65273 :   if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
    6772       17574 :       !TLI.convertSelectOfConstantsToMath(VT) ||
    6773       27479 :       !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
    6774         209 :       !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
    6775       25712 :     return SDValue();
    6776             : 
    6777             :   // Check if we can use the condition value to increment/decrement a single
    6778             :   // constant value. This simplifies a select to an add and removes a constant
    6779             :   // load/materialization from the general case.
    6780         158 :   bool AllAddOne = true;
    6781         158 :   bool AllSubOne = true;
    6782         158 :   unsigned Elts = VT.getVectorNumElements();
    6783         910 :   for (unsigned i = 0; i != Elts; ++i) {
    6784        1504 :     SDValue N1Elt = N1.getOperand(i);
    6785        1504 :     SDValue N2Elt = N2.getOperand(i);
    6786        2256 :     if (N1Elt.isUndef() || N2Elt.isUndef())
    6787           0 :       continue;
    6788             : 
    6789        1504 :     const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
    6790        1504 :     const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
    6791        3760 :     if (C1 != C2 + 1)
    6792         662 :       AllAddOne = false;
    6793        3760 :     if (C1 != C2 - 1)
    6794         612 :       AllSubOne = false;
    6795             :   }
    6796             : 
    6797             :   // Further simplifications for the extra-special cases where the constants are
    6798             :   // all 0 or all -1 should be implemented as folds of these patterns.
    6799         158 :   SDLoc DL(N);
    6800         158 :   if (AllAddOne || AllSubOne) {
    6801             :     // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
    6802             :     // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
    6803          40 :     auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
    6804          80 :     SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
    6805          80 :     return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
    6806             :   }
    6807             : 
    6808             :   // The general case for select-of-constants:
    6809             :   // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
    6810             :   // ...but that only makes sense if a vselect is slower than 2 logic ops, so
    6811             :   // leave that to a machine-specific pass.
    6812         118 :   return SDValue();
    6813             : }
    6814             : 
    6815       26306 : SDValue DAGCombiner::visitVSELECT(SDNode *N) {
    6816       52612 :   SDValue N0 = N->getOperand(0);
    6817       52612 :   SDValue N1 = N->getOperand(1);
    6818       52612 :   SDValue N2 = N->getOperand(2);
    6819       52612 :   SDLoc DL(N);
    6820             : 
    6821             :   // fold (vselect C, X, X) -> X
    6822       26306 :   if (N1 == N2)
    6823           4 :     return N1;
    6824             : 
    6825             :   // Canonicalize integer abs.
    6826             :   // vselect (setg[te] X,  0),  X, -X ->
    6827             :   // vselect (setgt    X, -1),  X, -X ->
    6828             :   // vselect (setl[te] X,  0), -X,  X ->
    6829             :   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
    6830       52604 :   if (N0.getOpcode() == ISD::SETCC) {
    6831       13647 :     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
    6832       13647 :     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
    6833        4549 :     bool isAbs = false;
    6834        4549 :     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
    6835             : 
    6836        5161 :     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
    6837        4347 :          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
    6838        5533 :         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
    6839         492 :       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
    6840         630 :     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
    6841        4627 :              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
    6842         162 :       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
    6843             : 
    6844         327 :     if (isAbs) {
    6845         654 :       EVT VT = LHS.getValueType();
    6846         327 :       if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
    6847         494 :         return DAG.getNode(ISD::ABS, DL, VT, LHS);
    6848             : 
    6849          80 :       SDValue Shift = DAG.getNode(
    6850             :           ISD::SRA, DL, VT, LHS,
    6851         160 :           DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
    6852         160 :       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
    6853          80 :       AddToWorklist(Shift.getNode());
    6854          80 :       AddToWorklist(Add.getNode());
    6855         160 :       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
    6856             :     }
    6857             :   }
    6858             : 
    6859       25975 :   if (SimplifySelectOps(N, N1, N2))
    6860           6 :     return SDValue(N, 0);  // Don't revisit N.
    6861             : 
    6862             :   // Fold (vselect (build_vector all_ones), N1, N2) -> N1
    6863       25969 :   if (ISD::isBuildVectorAllOnes(N0.getNode()))
    6864          33 :     return N1;
    6865             :   // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
    6866       25936 :   if (ISD::isBuildVectorAllZeros(N0.getNode()))
    6867          56 :     return N2;
    6868             : 
    6869             :   // The ConvertSelectToConcatVector function is assuming both the above
    6870             :   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
    6871             :   // and addressed.
    6872       26564 :   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
    6873       26264 :       N2.getOpcode() == ISD::CONCAT_VECTORS &&
    6874         384 :       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
    6875          42 :     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
    6876          10 :       return CV;
    6877             :   }
    6878             : 
    6879       25870 :   if (SDValue V = foldVSelectOfConstants(N))
    6880          40 :     return V;
    6881             : 
    6882       25830 :   return SDValue();
    6883             : }
    6884             : 
    6885       15094 : SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
    6886       30188 :   SDValue N0 = N->getOperand(0);
    6887       30188 :   SDValue N1 = N->getOperand(1);
    6888       30188 :   SDValue N2 = N->getOperand(2);
    6889       30188 :   SDValue N3 = N->getOperand(3);
    6890       30188 :   SDValue N4 = N->getOperand(4);
    6891       15094 :   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
    6892             : 
    6893             :   // fold select_cc lhs, rhs, x, x, cc -> x
    6894       15094 :   if (N2 == N3)
    6895          24 :     return N2;
    6896             : 
    6897             :   // Determine if the condition we're dealing with is constant
    6898       15070 :   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
    6899       60280 :                                   CC, SDLoc(N), false)) {
    6900         271 :     AddToWorklist(SCC.getNode());
    6901             : 
    6902         542 :     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
    6903          27 :       if (!SCCC->isNullValue())
    6904         272 :         return N2;    // cond always true -> true val
    6905             :       else
    6906          10 :         return N3;    // cond always false -> false val
    6907         244 :     } else if (SCC->isUndef()) {
    6908             :       // When the condition is UNDEF, just return the first operand. This is
    6909             :       // coherent the DAG creation, no setcc node is created in this case
    6910           0 :       return N2;
    6911         488 :     } else if (SCC.getOpcode() == ISD::SETCC) {
    6912             :       // Fold to a simpler select_cc
    6913         684 :       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
    6914         684 :                          SCC.getOperand(0), SCC.getOperand(1), N2, N3,
    6915        1140 :                          SCC.getOperand(2));
    6916             :     }
    6917             :   }
    6918             : 
    6919             :   // If we can fold this based on the true/false value, do so.
    6920       14815 :   if (SimplifySelectOps(N, N2, N3))
    6921           0 :     return SDValue(N, 0);  // Don't revisit N.
    6922             : 
    6923             :   // fold select_cc into other things, such as min/max/abs
    6924       29630 :   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
    6925             : }
    6926             : 
    6927      152726 : SDValue DAGCombiner::visitSETCC(SDNode *N) {
    6928      458178 :   return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
    6929             :                        cast<CondCodeSDNode>(N->getOperand(2))->get(),
    6930     1069082 :                        SDLoc(N));
    6931             : }
    6932             : 
    6933          89 : SDValue DAGCombiner::visitSETCCE(SDNode *N) {
    6934         178 :   SDValue LHS = N->getOperand(0);
    6935         178 :   SDValue RHS = N->getOperand(1);
    6936         178 :   SDValue Carry = N->getOperand(2);
    6937         178 :   SDValue Cond = N->getOperand(3);
    6938             : 
    6939             :   // If Carry is false, fold to a regular SETCC.
    6940         178 :   if (Carry.getOpcode() == ISD::CARRY_FALSE)
    6941           0 :     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
    6942             : 
    6943          89 :   return SDValue();
    6944             : }
    6945             : 
    6946         155 : SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
    6947         310 :   SDValue LHS = N->getOperand(0);
    6948         310 :   SDValue RHS = N->getOperand(1);
    6949         310 :   SDValue Carry = N->getOperand(2);
    6950         310 :   SDValue Cond = N->getOperand(3);
    6951             : 
    6952             :   // If Carry is false, fold to a regular SETCC.
    6953         155 :   if (isNullConstant(Carry))
    6954          28 :     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
    6955             : 
    6956         148 :   return SDValue();
    6957             : }
    6958             : 
    6959             : /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
    6960             : /// a build_vector of constants.
    6961             : /// This function is called by the DAGCombiner when visiting sext/zext/aext
    6962             : /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
    6963             : /// Vector extends are not folded if operations are legal; this is to
    6964             : /// avoid introducing illegal build_vector dag nodes.
    6965      194649 : static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
    6966             :                                          SelectionDAG &DAG, bool LegalTypes,
    6967             :                                          bool LegalOperations) {
    6968      389298 :   unsigned Opcode = N->getOpcode();
    6969      389298 :   SDValue N0 = N->getOperand(0);
    6970      389298 :   EVT VT = N->getValueType(0);
    6971             : 
    6972             :   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
    6973             :          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
    6974             :          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
    6975             :          && "Expected EXTEND dag node in input!");
    6976             : 
    6977             :   // fold (sext c1) -> c1
    6978             :   // fold (zext c1) -> c1
    6979             :   // fold (aext c1) -> c1
    6980      194395 :   if (isa<ConstantSDNode>(N0))
    6981         762 :     return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode();
    6982             : 
    6983             :   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
    6984             :   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
    6985             :   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
    6986      194395 :   EVT SVT = VT.getScalarType();
    6987      209375 :   if (!(VT.isVector() &&
    6988        6066 :       (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) &&
    6989       14980 :       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
    6990             :     return nullptr;
    6991             : 
    6992             :   // We can fold this node into a build_vector.
    6993         221 :   unsigned VTBits = SVT.getSizeInBits();
    6994         663 :   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
    6995         221 :   SmallVector<SDValue, 8> Elts;
    6996         221 :   unsigned NumElts = VT.getVectorNumElements();
    6997         442 :   SDLoc DL(N);
    6998             : 
    6999        1327 :   for (unsigned i=0; i != NumElts; ++i) {
    7000        2212 :     SDValue Op = N0->getOperand(i);
    7001        1192 :     if (Op->isUndef()) {
    7002          86 :       Elts.push_back(DAG.getUNDEF(SVT));
    7003          86 :       continue;
    7004             :     }
    7005             : 
    7006        2040 :     SDLoc DL(Op);
    7007             :     // Get the constant value and if needed trunc it to the size of the type.
    7008             :     // Nodes like build_vector might have constants wider than the scalar type.
    7009        3060 :     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
    7010        1020 :     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
    7011         960 :       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
    7012             :     else
    7013        1080 :       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
    7014             :   }
    7015             : 
    7016         221 :   return DAG.getBuildVector(VT, DL, Elts).getNode();
    7017             : }
    7018             : 
    7019             : // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
    7020             : // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
    7021             : // transformation. Returns true if extension are possible and the above
    7022             : // mentioned transformation is profitable.
    7023        1504 : static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
    7024             :                                     unsigned ExtOpc,
    7025             :                                     SmallVectorImpl<SDNode *> &ExtendNodes,
    7026             :                                     const TargetLowering &TLI) {
    7027        1504 :   bool HasCopyToRegUses = false;
    7028        4512 :   bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType());
    7029        1504 :   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
    7030        4512 :                             UE = N0.getNode()->use_end();
    7031        4487 :        UI != UE; ++UI) {
    7032        3329 :     SDNode *User = *UI;
    7033        3329 :     if (User == N)
    7034        2695 :       continue;
    7035        4040 :     if (UI.getUse().getResNo() != N0.getResNo())
    7036             :       continue;
    7037             :     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
    7038        1194 :     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
    7039        1872 :       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
    7040        1193 :       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
    7041             :         // Sign bits will be lost after a zext.
    7042         346 :         return false;
    7043             :       bool Add = false;
    7044        2899 :       for (unsigned i = 0; i != 2; ++i) {
    7045        2382 :         SDValue UseOp = User->getOperand(i);
    7046        1191 :         if (UseOp == N0)
    7047         588 :           continue;
    7048          43 :         if (!isa<ConstantSDNode>(UseOp))
    7049          43 :           return false;
    7050         560 :         Add = true;
    7051             :       }
    7052         560 :       if (Add)
    7053         560 :         ExtendNodes.push_back(User);
    7054         560 :       continue;
    7055             :     }
    7056             :     // If truncates aren't free and there are users we can't
    7057             :     // extend, it isn't worthwhile.
    7058         570 :     if (!isTruncFree)
    7059             :       return false;
    7060             :     // Remember if this value is live-out.
    7061         288 :     if (User->getOpcode() == ISD::CopyToReg)
    7062          56 :       HasCopyToRegUses = true;
    7063             :   }
    7064             : 
    7065        1158 :   if (HasCopyToRegUses) {
    7066          56 :     bool BothLiveOut = false;
    7067          56 :     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
    7068          77 :          UI != UE; ++UI) {
    7069          66 :       SDUse &Use = UI.getUse();
    7070          66 :       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
    7071             :         BothLiveOut = true;
    7072             :         break;
    7073             :       }
    7074             :     }
    7075          56 :     if (BothLiveOut)
    7076             :       // Both unextended and extended values are live out. There had better be
    7077             :       // a good reason for the transformation.
    7078          90 :       return ExtendNodes.size();
    7079             :   }
    7080             :   return true;
    7081             : }
    7082             : 
    7083        8115 : void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
    7084             :                                   SDValue Trunc, SDValue ExtLoad,
    7085             :                                   const SDLoc &DL, ISD::NodeType ExtType) {
    7086             :   // Extend SetCC uses if necessary.
    7087       16788 :   for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
    7088        1116 :     SDNode *SetCC = SetCCs[i];
    7089        1116 :     SmallVector<SDValue, 4> Ops;
    7090             : 
    7091        1674 :     for (unsigned j = 0; j != 2; ++j) {
    7092        2232 :       SDValue SOp = SetCC->getOperand(j);
    7093        1116 :       if (SOp == Trunc)
    7094           0 :         Ops.push_back(ExtLoad);
    7095             :       else
    7096        3348 :         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
    7097             :     }
    7098             : 
    7099        1116 :     Ops.push_back(SetCC->getOperand(2));
    7100        2790 :     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
    7101             :   }
    7102        8115 : }
    7103             : 
    7104             : // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
    7105      135058 : SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
    7106      270116 :   SDValue N0 = N->getOperand(0);
    7107      270116 :   EVT DstVT = N->getValueType(0);
    7108      270116 :   EVT SrcVT = N0.getValueType();
    7109             : 
    7110             :   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
    7111             :           N->getOpcode() == ISD::ZERO_EXTEND) &&
    7112             :          "Unexpected node type (not an extend)!");
    7113             : 
    7114             :   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
    7115             :   // For example, on a target with legal v4i32, but illegal v8i32, turn:
    7116             :   //   (v8i32 (sext (v8i16 (load x))))
    7117             :   // into:
    7118             :   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
    7119             :   //                          (v4i32 (sextload (x + 16)))))
    7120             :   // Where uses of the original load, i.e.:
    7121             :   //   (v8i16 (load x))
    7122             :   // are replaced with:
    7123             :   //   (v8i16 (truncate
    7124             :   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
    7125             :   //                            (v4i32 (sextload (x + 16)))))))
    7126             :   //
    7127             :   // This combine is only applicable to illegal, but splittable, vectors.
    7128             :   // All legal types, and illegal non-vector types, are handled elsewhere.
    7129             :   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
    7130             :   //
    7131      270116 :   if (N0->getOpcode() != ISD::LOAD)
    7132      132165 :     return SDValue();
    7133             : 
    7134        2893 :   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
    7135             : 
    7136        8319 :   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
    7137       12530 :       !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
    7138        8995 :       !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
    7139        2495 :     return SDValue();
    7140             : 
    7141         398 :   SmallVector<SDNode *, 4> SetCCs;
    7142         796 :   if (!ExtendUsesToFormExtLoad(N, N0, N->getOpcode(), SetCCs, TLI))
    7143           0 :     return SDValue();
    7144             : 
    7145             :   ISD::LoadExtType ExtType =
    7146         398 :       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
    7147             : 
    7148             :   // Try to split the vector types to get down to legal types.
    7149         398 :   EVT SplitSrcVT = SrcVT;
    7150         398 :   EVT SplitDstVT = DstVT;
    7151        2629 :   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
    7152         889 :          SplitSrcVT.getVectorNumElements() > 1) {
    7153         671 :     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
    7154         671 :     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
    7155             :   }
    7156             : 
    7157         398 :   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
    7158         218 :     return SDValue();
    7159             : 
    7160         180 :   SDLoc DL(N);
    7161             :   const unsigned NumSplits =
    7162         180 :       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
    7163         180 :   const unsigned Stride = SplitSrcVT.getStoreSize();
    7164         360 :   SmallVector<SDValue, 4> Loads;
    7165         360 :   SmallVector<SDValue, 4> Chains;
    7166             : 
    7167         180 :   SDValue BasePtr = LN0->getBasePtr();
    7168         631 :   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
    7169         451 :     const unsigned Offset = Idx * Stride;
    7170        1353 :     const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
    7171             : 
    7172         451 :     SDValue SplitLoad = DAG.getExtLoad(
    7173         902 :         ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr,
    7174         451 :         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
    7175        2255 :         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
    7176             : 
    7177         902 :     BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
    7178        1353 :                           DAG.getConstant(Stride, DL, BasePtr.getValueType()));
    7179             : 
    7180         902 :     Loads.push_back(SplitLoad.getValue(0));
    7181         902 :     Chains.push_back(SplitLoad.getValue(1));
    7182             :   }
    7183             : 
    7184         720 :   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
    7185         540 :   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
    7186             : 
    7187             :   // Simplify TF.
    7188         180 :   AddToWorklist(NewChain.getNode());
    7189             : 
    7190         180 :   CombineTo(N, NewValue);
    7191             : 
    7192             :   // Replace uses of the original load (before extension)
    7193             :   // with a truncate of the concatenated sextloaded vectors.
    7194             :   SDValue Trunc =
    7195         900 :       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
    7196         360 :   CombineTo(N0.getNode(), Trunc, NewChain);
    7197         180 :   ExtendSetCCUses(SetCCs, Trunc, NewValue, DL,
    7198         180 :                   (ISD::NodeType)N->getOpcode());
    7199         180 :   return SDValue(N, 0); // Return N so it doesn't get rechecked!
    7200             : }
    7201             : 
    7202             : /// If we're narrowing or widening the result of a vector select and the final
    7203             : /// size is the same size as a setcc (compare) feeding the select, then try to
    7204             : /// apply the cast operation to the select's operands because matching vector
    7205             : /// sizes for a select condition and other operands should be more efficient.
    7206      257874 : SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
    7207      515748 :   unsigned CastOpcode = Cast->getOpcode();
    7208             :   assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
    7209             :           CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
    7210             :           CastOpcode == ISD::FP_ROUND) &&
    7211             :          "Unexpected opcode for vector select narrowing/widening");
    7212             : 
    7213             :   // We only do this transform before legal ops because the pattern may be
    7214             :   // obfuscated by target-specific operations after legalization. Do not create
    7215             :   // an illegal select op, however, because that may be difficult to lower.
    7216      515748 :   EVT VT = Cast->getValueType(0);
    7217      257874 :   if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
    7218      118985 :     return SDValue();
    7219             : 
    7220      277778 :   SDValue VSel = Cast->getOperand(0);
    7221      277835 :   if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
    7222          38 :       VSel.getOperand(0).getOpcode() != ISD::SETCC)
    7223      138870 :     return SDValue();
    7224             : 
    7225             :   // Does the setcc have the same vector size as the casted select?
    7226          38 :   SDValue SetCC = VSel.getOperand(0);
    7227          57 :   EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
    7228          19 :   if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
    7229           3 :     return SDValue();
    7230             : 
    7231             :   // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
    7232          32 :   SDValue A = VSel.getOperand(1);
    7233          32 :   SDValue B = VSel.getOperand(2);
    7234          16 :   SDValue CastA, CastB;
    7235          16 :   SDLoc DL(Cast);
    7236          16 :   if (CastOpcode == ISD::FP_ROUND) {
    7237             :     // FP_ROUND (fptrunc) has an extra flag operand to pass along.
    7238          12 :     CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
    7239          12 :     CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
    7240             :   } else {
    7241          24 :     CastA = DAG.getNode(CastOpcode, DL, VT, A);
    7242          24 :     CastB = DAG.getNode(CastOpcode, DL, VT, B);
    7243             :   }
    7244          16 :   return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
    7245             : }
    7246             : 
    7247       22642 : SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
    7248       45284 :   SDValue N0 = N->getOperand(0);
    7249       45284 :   EVT VT = N->getValueType(0);
    7250       45284 :   SDLoc DL(N);
    7251             : 
    7252       45284 :   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
    7253       45284 :                                               LegalOperations))
    7254         147 :     return SDValue(Res, 0);
    7255             : 
    7256             :   // fold (sext (sext x)) -> (sext x)
    7257             :   // fold (sext (aext x)) -> (sext x)
    7258       67483 :   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
    7259          18 :     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
    7260             : 
    7261       44978 :   if (N0.getOpcode() == ISD::TRUNCATE) {
    7262             :     // fold (sext (truncate (load x))) -> (sext (smaller load x))
    7263             :     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
    7264        5266 :     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
    7265          24 :       SDNode *oye = N0.getOperand(0).getNode();
    7266          12 :       if (NarrowLoad.getNode() != N0.getNode()) {
    7267          24 :         CombineTo(N0.getNode(), NarrowLoad);
    7268             :         // CombineTo deleted the truncate, if needed, but not what's under it.
    7269          12 :         AddToWorklist(oye);
    7270             :       }
    7271          12 :       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
    7272             :     }
    7273             : 
    7274             :     // See if the value being truncated is already sign extended.  If so, just
    7275             :     // eliminate the trunc/sext pair.
    7276       10508 :     SDValue Op = N0.getOperand(0);
    7277        5254 :     unsigned OpBits   = Op.getScalarValueSizeInBits();
    7278        5254 :     unsigned MidBits  = N0.getScalarValueSizeInBits();
    7279        5254 :     unsigned DestBits = VT.getScalarSizeInBits();
    7280        5254 :     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
    7281             : 
    7282        5254 :     if (OpBits == DestBits) {
    7283             :       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
    7284             :       // bits, it is already ready.
    7285        2937 :       if (NumSignBits > DestBits-MidBits)
    7286         789 :         return Op;
    7287        2317 :     } else if (OpBits < DestBits) {
    7288             :       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
    7289             :       // bits, just sext from i32.
    7290        1822 :       if (NumSignBits > OpBits-MidBits)
    7291          48 :         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
    7292             :     } else {
    7293             :       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
    7294             :       // bits, just truncate to i32.
    7295         495 :       if (NumSignBits > OpBits-MidBits)
    7296          38 :         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
    7297             :     }
    7298             : 
    7299             :     // fold (sext (truncate x)) -> (sextinreg x).
    7300        4645 :     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
    7301             :                                                  N0.getValueType())) {
    7302        4422 :       if (OpBits < DestBits)
    7303        7192 :         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
    7304        2624 :       else if (OpBits > DestBits)
    7305        1904 :         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
    7306        4422 :       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
    7307        8844 :                          DAG.getValueType(N0.getValueType()));
    7308             :     }
    7309             :   }
    7310             : 
    7311             :   // fold (sext (load x)) -> (sext (truncate (sextload x)))
    7312             :   // Only generate vector extloads when 1) they're legal, and 2) they are
    7313             :   // deemed desirable by the target.
    7314       27762 :   if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
    7315        8748 :       ((!LegalOperations && !VT.isVector() &&
    7316        3517 :         !cast<LoadSDNode>(N0)->isVolatile()) ||
    7317        5004 :        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) {
    7318        2850 :     bool DoXform = true;
    7319        3524 :     SmallVector<SDNode*, 4> SetCCs;
    7320        5700 :     if (!N0.hasOneUse())
    7321         213 :       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
    7322        2850 :     if (VT.isVector())
    7323        1852 :       DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
    7324        2850 :     if (DoXform) {
    7325        2176 :       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
    7326        6528 :       SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
    7327        2176 :                                        LN0->getBasePtr(), N0.getValueType(),
    7328        6528 :                                        LN0->getMemOperand());
    7329        6528 :       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
    7330        8704 :                                   N0.getValueType(), ExtLoad);
    7331        2176 :       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND);
    7332             :       // If the load value is used only by N, replace it via CombineTo N.
    7333        4352 :       bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
    7334        2176 :       CombineTo(N, ExtLoad);
    7335        2176 :       if (NoReplaceTrunc)
    7336        6327 :         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
    7337             :       else
    7338         134 :         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
    7339        2176 :       return SDValue(N, 0);
    7340             :     }
    7341             :   }
    7342             : 
    7343             :   // fold (sext (load x)) to multiple smaller sextloads.
    7344             :   // Only on illegal but splittable vectors.
    7345       15047 :   if (SDValue ExtLoad = CombineExtLoad(N))
    7346         119 :     return ExtLoad;
    7347             : 
    7348             :   // fold (sext (sextload x)) -> (sext (truncate (sextload x)))
    7349             :   // fold (sext ( extload x)) -> (sext (truncate (sextload x)))
    7350       29817 :   if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
    7351         117 :       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
    7352          33 :     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
    7353          33 :     EVT MemVT = LN0->getMemoryVT();
    7354          66 :     if ((!LegalOperations && !LN0->isVolatile()) ||
    7355          62 :         TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) {
    7356           9 :       SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
    7357           3 :                                        LN0->getBasePtr(), MemVT,
    7358           6 :                                        LN0->getMemOperand());
    7359           3 :       CombineTo(N, ExtLoad);
    7360           3 :       CombineTo(N0.getNode(),
    7361           9 :                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
    7362             :                             N0.getValueType(), ExtLoad),
    7363          18 :                 ExtLoad.getValue(1));
    7364           3 :       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
    7365             :     }
    7366             :   }
    7367             : 
    7368             :   // fold (sext (and/or/xor (load x), cst)) ->
    7369             :   //      (and/or/xor (sextload x), (sext cst))
    7370       58266 :   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
    7371       15149 :        N0.getOpcode() == ISD::XOR) &&
    7372        2847 :       isa<LoadSDNode>(N0.getOperand(0)) &&
    7373           6 :       N0.getOperand(1).getOpcode() == ISD::Constant &&
    7374       14925 :       TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()) &&
    7375           0 :       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
    7376           0 :     LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
    7377           0 :     if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) {
    7378           0 :       bool DoXform = true;
    7379           0 :       SmallVector<SDNode*, 4> SetCCs;
    7380           0 :       if (!N0.hasOneUse())
    7381           0 :         DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND,
    7382             :                                           SetCCs, TLI);
    7383           0 :       if (DoXform) {
    7384           0 :         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT,
    7385           0 :                                          LN0->getChain(), LN0->getBasePtr(),
    7386             :                                          LN0->getMemoryVT(),
    7387           0 :                                          LN0->getMemOperand());
    7388           0 :         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
    7389           0 :         Mask = Mask.sext(VT.getSizeInBits());
    7390           0 :         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
    7391           0 :                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
    7392           0 :         SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
    7393           0 :                                     SDLoc(N0.getOperand(0)),
    7394           0 :                                     N0.getOperand(0).getValueType(), ExtLoad);
    7395           0 :         ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND);
    7396           0 :         bool NoReplaceTruncAnd = !N0.hasOneUse();
    7397           0 :         bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
    7398           0 :         CombineTo(N, And);
    7399             :         // If N0 has multiple uses, change other uses as well.
    7400           0 :         if (NoReplaceTruncAnd) {
    7401             :           SDValue TruncAnd =
    7402           0 :               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
    7403           0 :           CombineTo(N0.getNode(), TruncAnd);
    7404             :         }
    7405           0 :         if (NoReplaceTrunc)
    7406           0 :           DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
    7407             :         else
    7408           0 :           CombineTo(LN0, Trunc, ExtLoad.getValue(1));
    7409           0 :         return SDValue(N,0); // Return N so it doesn't get rechecked!
    7410             :       }
    7411             :     }
    7412             :   }
    7413             : 
    7414       29850 :   if (N0.getOpcode() == ISD::SETCC) {
    7415        7022 :     SDValue N00 = N0.getOperand(0);
    7416        7022 :     SDValue N01 = N0.getOperand(1);
    7417       10533 :     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
    7418       10533 :     EVT N00VT = N0.getOperand(0).getValueType();
    7419             : 
    7420             :     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
    7421             :     // Only do this before legalize for now.
    7422        5707 :     if (VT.isVector() && !LegalOperations &&
    7423        2196 :         TLI.getBooleanContents(N00VT) ==
    7424             :             TargetLowering::ZeroOrNegativeOneBooleanContent) {
    7425             :       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
    7426             :       // of the same size as the compared operands. Only optimize sext(setcc())
    7427             :       // if this is the case.
    7428        2196 :       EVT SVT = getSetCCResultType(N00VT);
    7429             : 
    7430             :       // We know that the # elements of the results is the same as the
    7431             :       // # elements of the compare (and the # elements of the compare result
    7432             :       // for that matter).  Check to see that they are the same size.  If so,
    7433             :       // we know that the element size of the sext'd result matches the
    7434             :       // element size of the compare operands.
    7435        2196 :       if (VT.getSizeInBits() == SVT.getSizeInBits())
    7436        3934 :         return DAG.getSetCC(DL, VT, N00, N01, CC);
    7437             : 
    7438             :       // If the desired elements are smaller or larger than the source
    7439             :       // elements, we can use a matching integer vector type and then
    7440             :       // truncate/sign extend.
    7441         335 :       EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
    7442         337 :       if (SVT == MatchingVecType) {
    7443         212 :         SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
    7444         212 :         return DAG.getSExtOrTrunc(VsetCC, DL, VT);
    7445             :       }
    7446             :     }
    7447             : 
    7448             :     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
    7449             :     // Here, T can be 1 or -1, depending on the type of the setcc and
    7450             :     // getBooleanContents().
    7451        1438 :     unsigned SetCCWidth = N0.getScalarValueSizeInBits();
    7452             : 
    7453             :     // To determine the "true" side of the select, we need to know the high bit
    7454             :     // of the value returned by the setcc if it evaluates to true.
    7455             :     // If the type of the setcc is i1, then the true case of the select is just
    7456             :     // sext(i1 1), that is, -1.
    7457             :     // If the type of the setcc is larger (say, i8) then the value of the high
    7458             :     // bit depends on getBooleanContents(), so ask TLI for a real "true" value
    7459             :     // of the appropriate width.
    7460        1437 :     SDValue ExtTrueVal = (SetCCWidth == 1) ? DAG.getAllOnesConstant(DL, VT)
    7461        2875 :                                            : TLI.getConstTrueVal(DAG, VT, DL);
    7462        1438 :     SDValue Zero = DAG.getConstant(0, DL, VT);
    7463        1438 :     if (SDValue SCC =
    7464        1438 :             SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
    7465           1 :       return SCC;
    7466             : 
    7467        1437 :     if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
    7468         743 :       EVT SetCCVT = getSetCCResultType(N00VT);
    7469             :       // Don't do this transform for i1 because there's a select transform
    7470             :       // that would reverse it.
    7471             :       // TODO: We should not do this transform at all without a target hook
    7472             :       // because a sext is likely cheaper than a select?
    7473         838 :       if (SetCCVT.getScalarSizeInBits() != 1 &&
    7474          95 :           (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
    7475          95 :         SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
    7476          95 :         return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
    7477             :       }
    7478             :     }
    7479             :   }
    7480             : 
    7481             :   // fold (sext x) -> (zext x) if the sign bit is known zero.
    7482       25512 :   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
    7483       12756 :       DAG.SignBitIsZero(N0))
    7484         776 :     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
    7485             : 
    7486       12368 :   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
    7487           4 :     return NewVSel;
    7488             : 
    7489       12364 :   return SDValue();
    7490             : }
    7491             : 
    7492             : // isTruncateOf - If N is a truncate of some other value, return true, record
    7493             : // the value being truncated in Op and which of Op's bits are zero/one in Known.
    7494             : // This function computes KnownBits to avoid a duplicated call to
    7495             : // computeKnownBits in the caller.
    7496      126081 : static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
    7497             :                          KnownBits &Known) {
    7498      252162 :   if (N->getOpcode() == ISD::TRUNCATE) {
    7499       12718 :     Op = N->getOperand(0);
    7500        6359 :     DAG.computeKnownBits(Op, Known);
    7501             :     return true;
    7502             :   }
    7503             : 
    7504      237017 :   if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
    7505       76413 :       cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
    7506             :     return false;
    7507             : 
    7508       14290 :   SDValue Op0 = N->getOperand(0);
    7509       14290 :   SDValue Op1 = N->getOperand(1);
    7510             :   assert(Op0.getValueType() == Op1.getValueType());
    7511             : 
    7512        7145 :   if (isNullConstant(Op0))
    7513           0 :     Op = Op1;
    7514        7145 :   else if (isNullConstant(Op1))
    7515        4728 :     Op = Op0;
    7516             :   else
    7517             :     return false;
    7518             : 
    7519        4728 :   DAG.computeKnownBits(Op, Known);
    7520             : 
    7521       23640 :   if (!(Known.Zero | 1).isAllOnesValue())
    7522             :     return false;
    7523             : 
    7524             :   return true;
    7525             : }
    7526             : 
    7527      130342 : SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
    7528      260684 :   SDValue N0 = N->getOperand(0);
    7529      260684 :   EVT VT = N->getValueType(0);
    7530             : 
    7531      260684 :   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
    7532      260684 :                                               LegalOperations))
    7533         126 :     return SDValue(Res, 0);
    7534             : 
    7535             :   // fold (zext (zext x)) -> (zext x)
    7536             :   // fold (zext (aext x)) -> (zext x)
    7537      390636 :   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
    7538         132 :     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
    7539         176 :                        N0.getOperand(0));
    7540             : 
    7541             :   // fold (zext (truncate x)) -> (zext x) or
    7542             :   //      (zext (truncate x)) -> (truncate x)
    7543             :   // This is valid when the truncated bits of x are already zero.
    7544             :   // FIXME: We should extend this to work for vectors too.
    7545      130172 :   SDValue Op;
    7546      260344 :   KnownBits Known;
    7547      130172 :   if (!VT.isVector() && isTruncateOf(DAG, N0, Op, Known)) {
    7548             :     APInt TruncatedBits =
    7549        6510 :       (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
    7550             :       APInt(Op.getValueSizeInBits(), 0) :
    7551             :       APInt::getBitsSet(Op.getValueSizeInBits(),
    7552             :                         N0.getValueSizeInBits(),
    7553       13018 :                         std::min(Op.getValueSizeInBits(),
    7554       35432 :                                  VT.getSizeInBits()));
    7555        6510 :     if (TruncatedBits.isSubsetOf(Known.Zero))
    7556       10866 :       return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
    7557             :   }
    7558             : 
    7559             :   // fold (zext (truncate (load x))) -> (zext (smaller load x))
    7560             :   // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
    7561      253100 :   if (N0.getOpcode() == ISD::TRUNCATE) {
    7562        2972 :     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
    7563          26 :       SDNode *oye = N0.getOperand(0).getNode();
    7564          13 :       if (NarrowLoad.getNode() != N0.getNode()) {
    7565          26 :         CombineTo(N0.getNode(), NarrowLoad);
    7566             :         // CombineTo deleted the truncate, if needed, but not what's under it.
    7567          13 :         AddToWorklist(oye);
    7568             :       }
    7569          13 :       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
    7570             :     }
    7571             :   }
    7572             : 
    7573             :   // fold (zext (truncate x)) -> (and x, mask)
    7574      253074 :   if (N0.getOpcode() == ISD::TRUNCATE) {
    7575             :     // fold (zext (truncate (load x))) -> (zext (smaller load x))
    7576             :     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
    7577        2959 :     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
    7578           0 :       SDNode *oye = N0.getOperand(0).getNode();
    7579           0 :       if (NarrowLoad.getNode() != N0.getNode()) {
    7580           0 :         CombineTo(N0.getNode(), NarrowLoad);
    7581             :         // CombineTo deleted the truncate, if needed, but not what's under it.
    7582           0 :         AddToWorklist(oye);
    7583             :       }
    7584           0 :       return SDValue(N, 0); // Return N so it doesn't get rechecked!
    7585             :     }
    7586             : 
    7587        8877 :     EVT SrcVT = N0.getOperand(0).getValueType();
    7588        5918 :     EVT MinVT = N0.getValueType();
    7589             : 
    7590             :     // Try to mask before the extension to avoid having to generate a larger mask,
    7591             :     // possibly over several sub-vectors.
    7592        2959 :     if (SrcVT.bitsLT(VT)) {
    7593         726 :       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
    7594          31 :                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
    7595        1390 :         SDValue Op = N0.getOperand(0);
    7596        2085 :         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
    7597         695 :         AddToWorklist(Op.getNode());
    7598        2085 :         return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
    7599             :       }
    7600             :     }
    7601             : 
    7602        2264 :     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
    7603        9056 :       SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
    7604        2264 :       AddToWorklist(Op.getNode());
    7605        6792 :       return DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
    7606             :     }
    7607             :   }
    7608             : 
    7609             :   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
    7610             :   // if either of the casts is not free.
    7611      126822 :   if (N0.getOpcode() == ISD::AND &&
    7612        6914 :       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
    7613      124814 :       N0.getOperand(1).getOpcode() == ISD::Constant &&
    7614        2304 :       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
    7615         428 :                            N0.getValueType()) ||
    7616          88 :        !TLI.isZExtFree(N0.getValueType(), VT))) {
    7617        1143 :     SDValue X = N0.getOperand(0).getOperand(0);
    7618        1143 :     X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
    7619        2286 :     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
    7620        1143 :     Mask = Mask.zext(VT.getSizeInBits());
    7621         762 :     SDLoc DL(N);
    7622         381 :     return DAG.getNode(ISD::AND, DL, VT,
    7623         381 :                        X, DAG.getConstant(Mask, DL, VT));
    7624             :   }
    7625             : 
    7626             :   // fold (zext (load x)) -> (zext (truncate (zextload x)))
    7627             :   // Only generate vector extloads when 1) they're legal, and 2) they are
    7628             :   // deemed desirable by the target.
    7629      137063 :   if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
    7630       11775 :       ((!LegalOperations && !VT.isVector() &&
    7631        4609 :         !cast<LoadSDNode>(N0)->isVolatile()) ||
    7632        5307 :        TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) {
    7633        3893 :     bool DoXform = true;
    7634        4600 :     SmallVector<SDNode*, 4> SetCCs;
    7635        7786 :     if (!N0.hasOneUse())
    7636         722 :       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
    7637        3893 :     if (VT.isVector())
    7638        1690 :       DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
    7639        3893 :     if (DoXform) {
    7640        3186 :       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
    7641        9558 :       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
    7642        6372 :                                        LN0->getChain(),
    7643        3186 :                                        LN0->getBasePtr(), N0.getValueType(),
    7644        9558 :                                        LN0->getMemOperand());
    7645             : 
    7646        9558 :       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
    7647       12744 :                                   N0.getValueType(), ExtLoad);
    7648        6372 :       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::ZERO_EXTEND);
    7649             :       // If the load value is used only by N, replace it via CombineTo N.
    7650        6372 :       bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
    7651        3186 :       CombineTo(N, ExtLoad);
    7652        3186 :       if (NoReplaceTrunc)
    7653        9231 :         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
    7654             :       else
    7655         218 :         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
    7656        3186 :       return SDValue(N, 0); // Return N so it doesn't get rechecked!
    7657             :     }
    7658             :   }
    7659             : 
    7660             :   // fold (zext (load x)) to multiple smaller zextloads.
    7661             :   // Only on illegal but splittable vectors.
    7662      120011 :   if (SDValue ExtLoad = CombineExtLoad(N))
    7663          61 :     return ExtLoad;
    7664             : 
    7665             :   // fold (zext (and/or/xor (load x), cst)) ->
    7666             :   //      (and/or/xor (zextload x), (zext cst))
    7667             :   // Unless (and (load x) cst) will match as a zextload already and has
    7668             :   // additional users.
    7669      470358 :   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
    7670      149728 :        N0.getOpcode() == ISD::XOR) &&
    7671      100693 :       isa<LoadSDNode>(N0.getOperand(0)) &&
    7672         537 :       N0.getOperand(1).getOpcode() == ISD::Constant &&
    7673      120362 :       TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()) &&
    7674         215 :       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
    7675         138 :     LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
    7676          92 :     if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) {
    7677          46 :       bool DoXform = true;
    7678          49 :       SmallVector<SDNode*, 4> SetCCs;
    7679          92 :       if (!N0.hasOneUse()) {
    7680          16 :         if (N0.getOpcode() == ISD::AND) {
    7681          12 :           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
    7682           4 :           auto NarrowLoad = false;
    7683           8 :           EVT LoadResultTy = AndC->getValueType(0);
    7684           4 :           EVT ExtVT, LoadedVT;
    7685           4 :           if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT, LoadedVT,
    7686             :                                NarrowLoad))
    7687           1 :             DoXform = false;
    7688             :         }
    7689             :         if (DoXform)
    7690          14 :           DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0),
    7691             :                                             ISD::ZERO_EXTEND, SetCCs, TLI);
    7692             :       }
    7693           8 :       if (DoXform) {
    7694         129 :         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT,
    7695          86 :                                          LN0->getChain(), LN0->getBasePtr(),
    7696             :                                          LN0->getMemoryVT(),
    7697          86 :                                          LN0->getMemOperand());
    7698         258 :         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
    7699         129 :         Mask = Mask.zext(VT.getSizeInBits());
    7700          86 :         SDLoc DL(N);
    7701          43 :         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
    7702          86 :                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
    7703          43 :         SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
    7704         172 :                                     SDLoc(N0.getOperand(0)),
    7705         172 :                                     N0.getOperand(0).getValueType(), ExtLoad);
    7706          43 :         ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::ZERO_EXTEND);
    7707          86 :         bool NoReplaceTruncAnd = !N0.hasOneUse();
    7708          86 :         bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
    7709          43 :         CombineTo(N, And);
    7710             :         // If N0 has multiple uses, change other uses as well.
    7711          43 :         if (NoReplaceTruncAnd) {
    7712             :           SDValue TruncAnd =
    7713          15 :               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
    7714          10 :           CombineTo(N0.getNode(), TruncAnd);
    7715             :         }
    7716          43 :         if (NoReplaceTrunc)
    7717          84 :           DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
    7718             :         else
    7719          30 :           CombineTo(LN0, Trunc, ExtLoad.getValue(1));
    7720          43 :         return SDValue(N,0); // Return N so it doesn't get rechecked!
    7721             :       }
    7722             :     }
    7723             :   }
    7724             : 
    7725             :   // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
    7726             :   // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
    7727      239740 :   if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
    7728         222 :       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
    7729          63 :     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
    7730          63 :     EVT MemVT = LN0->getMemoryVT();
    7731         129 :     if ((!LegalOperations && !LN0->isVolatile()) ||
    7732          70 :         TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)) {
    7733          99 :       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
    7734          66 :                                        LN0->getChain(),
    7735          33 :                                        LN0->getBasePtr(), MemVT,
    7736          66 :                                        LN0->getMemOperand());
    7737          33 :       CombineTo(N, ExtLoad);
    7738          33 :       CombineTo(N0.getNode(),
    7739          99 :                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(),
    7740             :                             ExtLoad),
    7741         198 :                 ExtLoad.getValue(1));
    7742          33 :       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
    7743             :     }
    7744             :   }
    7745             : 
    7746      239748 :   if (N0.getOpcode() == ISD::SETCC) {
    7747             :     // Only do this before legalize for now.
    7748       90965 :     if (!LegalOperations && VT.isVector() &&
    7749       46043 :         N0.getValueType().getVectorElementType() == MVT::i1) {
    7750         279 :       EVT N00VT = N0.getOperand(0).getValueType();
    7751         279 :       if (getSetCCResultType(N00VT) == N0.getValueType())
    7752          13 :         return SDValue();
    7753             : 
    7754             :       // We know that the # elements of the results is the same as the #
    7755             :       // elements of the compare (and the # elements of the compare result for
    7756             :       // that matter). Check to see that they are the same size. If so, we know
    7757             :       // that the element size of the sext'd result matches the element size of
    7758             :       // the compare operands.
    7759          80 :       SDLoc DL(N);
    7760          80 :       SDValue VecOnes = DAG.getConstant(1, DL, VT);
    7761          80 :       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
    7762             :         // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
    7763         222 :         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
    7764         296 :                                      N0.getOperand(1), N0.getOperand(2));
    7765         148 :         return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
    7766             :       }
    7767             : 
    7768             :       // If the desired elements are smaller or larger than the source
    7769             :       // elements we can use a matching integer vector type and then
    7770             :       // truncate/sign extend.
    7771             :       EVT MatchingElementType = EVT::getIntegerVT(
    7772           6 :           *DAG.getContext(), N00VT.getScalarSizeInBits());
    7773             :       EVT MatchingVectorType = EVT::getVectorVT(
    7774           6 :           *DAG.getContext(), MatchingElementType, N00VT.getVectorNumElements());
    7775             :       SDValue VsetCC =
    7776          18 :           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
    7777          24 :                       N0.getOperand(1), N0.getOperand(2));
    7778           6 :       return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
    7779           6 :                          VecOnes);
    7780             :     }
    7781             : 
    7782             :     // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
    7783       91493 :     SDLoc DL(N);
    7784       45762 :     if (SDValue SCC = SimplifySelectCC(
    7785      183048 :             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
    7786       45762 :             DAG.getConstant(0, DL, VT),
    7787      274572 :             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
    7788          31 :       return SCC;
    7789             :   }
    7790             : 
    7791             :   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
    7792      354208 :   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
    7793       16701 :       isa<ConstantSDNode>(N0.getOperand(1)) &&
    7794      130774 :       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
    7795          14 :       N0.hasOneUse()) {
    7796          26 :     SDValue ShAmt = N0.getOperand(1);
    7797          26 :     unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
    7798          26 :     if (N0.getOpcode() == ISD::SHL) {
    7799          18 :       SDValue InnerZExt = N0.getOperand(0);
    7800             :       // If the original shl may be shifting out bits, do not perform this
    7801             :       // transformation.
    7802           9 :       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
    7803          18 :         InnerZExt.getOperand(0).getValueSizeInBits();
    7804           9 :       if (ShAmtVal > KnownZeroBits)
    7805           0 :         return SDValue();
    7806             :     }
    7807             : 
    7808          13 :     SDLoc DL(N);
    7809             : 
    7810             :     // Ensure that the shift amount is wide enough for the shifted value.
    7811          13 :     if (VT.getSizeInBits() >= 256)
    7812           0 :       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
    7813             : 
    7814          13 :     return DAG.getNode(N0.getOpcode(), DL, VT,
    7815          39 :                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
    7816          52 :                        ShAmt);
    7817             :   }
    7818             : 
    7819      119737 :   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
    7820           2 :     return NewVSel;
    7821             : 
    7822      119735 :   return SDValue();
    7823             : }
    7824             : 
    7825       35147 : SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
    7826       70294 :   SDValue N0 = N->getOperand(0);
    7827       70294 :   EVT VT = N->getValueType(0);
    7828             : 
    7829       70294 :   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
    7830       70294 :                                               LegalOperations))
    7831         143 :     return SDValue(Res, 0);
    7832             : 
    7833             :   // fold (aext (aext x)) -> (aext x)
    7834             :   // fold (aext (zext x)) -> (zext x)
    7835             :   // fold (aext (sext x)) -> (sext x)
    7836       70008 :   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
    7837       70000 :       N0.getOpcode() == ISD::ZERO_EXTEND ||
    7838       34996 :       N0.getOpcode() == ISD::SIGN_EXTEND)
    7839          78 :     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
    7840             : 
    7841             :   // fold (aext (truncate (load x))) -> (aext (smaller load x))
    7842             :   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
    7843       69982 :   if (N0.getOpcode() == ISD::TRUNCATE) {
    7844        4784 :     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
    7845          26 :       SDNode *oye = N0.getOperand(0).getNode();
    7846          13 :       if (NarrowLoad.getNode() != N0.getNode()) {
    7847          26 :         CombineTo(N0.getNode(), NarrowLoad);
    7848             :         // CombineTo deleted the truncate, if needed, but not what's under it.
    7849          13 :         AddToWorklist(oye);
    7850             :       }
    7851          13 :       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
    7852             :     }
    7853             :   }
    7854             : 
    7855             :   // fold (aext (truncate x))
    7856       69956 :   if (N0.getOpcode() == ISD::TRUNCATE)
    7857       19084 :     return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
    7858             : 
    7859             :   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
    7860             :   // if the trunc is not free.
    7861       32635 :   if (N0.getOpcode() == ISD::AND &&
    7862        6751 :       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
    7863       35852 :       N0.getOperand(1).getOpcode() == ISD::Constant &&
    7864       11130 :       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
    7865        1855 :                           N0.getValueType())) {
    7866          82 :     SDLoc DL(N);
    7867         123 :     SDValue X = N0.getOperand(0).getOperand(0);
    7868          41 :     X = DAG.getAnyExtOrTrunc(X, DL, VT);
    7869         246 :     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
    7870         123 :     Mask = Mask.zext(VT.getSizeInBits());
    7871          41 :     return DAG.getNode(ISD::AND, DL, VT,
    7872          41 :                        X, DAG.getConstant(Mask, DL, VT));
    7873             :   }
    7874             : 
    7875             :   // fold (aext (load x)) -> (aext (truncate (extload x)))
    7876             :   // None of the supported targets knows how to perform load and any_ext
    7877             :   // on vectors in one instruction.  We only perform this transformation on
    7878             :   // scalars.
    7879       36200 :   if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
    7880        8939 :       ISD::isUNINDEXEDLoad(N0.getNode()) &&
    7881        8939 :       TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
    7882        2670 :     bool DoXform = true;
    7883        2810 :     SmallVector<SDNode*, 4> SetCCs;
    7884        5340 :     if (!N0.hasOneUse())
    7885         164 :       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
    7886         164 :     if (DoXform) {
    7887        2530 :       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
    7888        7590 :       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
    7889        5060 :                                        LN0->getChain(),
    7890        2530 :                                        LN0->getBasePtr(), N0.getValueType(),
    7891        7590 :                                        LN0->getMemOperand());
    7892        7590 :       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
    7893       10120 :                                   N0.getValueType(), ExtLoad);
    7894        5060 :       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
    7895             :                       ISD::ANY_EXTEND);
    7896             :       // If the load value is used only by N, replace it via CombineTo N.
    7897        5060 :       bool NoReplaceTrunc = N0.hasOneUse();
    7898        2530 :       CombineTo(N, ExtLoad);
    7899        2530 :       if (NoReplaceTrunc)
    7900        7518 :         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
    7901             :       else
    7902          48 :         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
    7903        2530 :       return SDValue(N, 0); // Return N so it doesn't get rechecked!
    7904             :     }
    7905             :   }
    7906             : 
    7907             :   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
    7908             :   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
    7909             :   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
    7910       27636 :   if (N0.getOpcode() == ISD::LOAD &&
    7911       28464 :       !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
    7912         276 :       N0.hasOneUse()) {
    7913         176 :     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
    7914         176 :     ISD::LoadExtType ExtType = LN0->getExtensionType();
    7915         176 :     EVT MemVT = LN0->getMemoryVT();
    7916         209 :     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
    7917         465 :       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
    7918         310 :                                        VT, LN0->getChain(), LN0->getBasePtr(),
    7919         310 :                                        MemVT, LN0->getMemOperand());
    7920         155 :       CombineTo(N, ExtLoad);
    7921         155 :       CombineTo(N0.getNode(),
    7922         465 :                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
    7923             :                             N0.getValueType(), ExtLoad),
    7924         930 :                 ExtLoad.getValue(1));
    7925         155 :       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
    7926             :     }
    7927             :   }
    7928             : 
    7929       54962 :   if (N0.getOpcode() == ISD::SETCC) {
    7930             :     // For vectors:
    7931             :     // aext(setcc) -> vsetcc
    7932             :     // aext(setcc) -> truncate(vsetcc)
    7933             :     // aext(setcc) -> aext(vsetcc)
    7934             :     // Only do this before legalize for now.
    7935        1542 :     if (VT.isVector() && !LegalOperations) {
    7936         756 :       EVT N0VT = N0.getOperand(0).getValueType();
    7937             :         // We know that the # elements of the results is the same as the
    7938             :         // # elements of the compare (and the # elements of the compare result
    7939             :         // for that matter).  Check to see that they are the same size.  If so,
    7940             :         // we know that the element size of the sext'd result matches the
    7941             :         // element size of the compare operands.
    7942         252 :       if (VT.getSizeInBits() == N0VT.getSizeInBits())
    7943         775 :         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
    7944         310 :                              N0.getOperand(1),
    7945         620 :                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
    7946             :       // If the desired elements are smaller or larger than the source
    7947             :       // elements we can use a matching integer vector type and then
    7948             :       // truncate/any extend
    7949             :       else {
    7950          97 :         EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
    7951             :         SDValue VsetCC =
    7952         485 :           DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
    7953         194 :                         N0.getOperand(1),
    7954         388 :                         cast<CondCodeSDNode>(N0.getOperand(2))->get());
    7955         291 :         return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
    7956             :       }
    7957             :     }
    7958             : 
    7959             :     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
    7960        2566 :     SDLoc DL(N);
    7961        1290 :     if (SDValue SCC = SimplifySelectCC(
    7962        5160 :             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
    7963        1290 :             DAG.getConstant(0, DL, VT),
    7964        7740 :             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
    7965          14 :       return SCC;
    7966             :   }
    7967             : 
    7968       27215 :   return SDValue();
    7969             : }
    7970             : 
    7971       46858 : SDValue DAGCombiner::visitAssertZext(SDNode *N) {
    7972       93716 :   SDValue N0 = N->getOperand(0);
    7973       93716 :   SDValue N1 = N->getOperand(1);
    7974       46858 :   EVT EVT = cast<VTSDNode>(N1)->getVT();
    7975             : 
    7976             :   // fold (assertzext (assertzext x, vt), vt) -> (assertzext x, vt)
    7977       93716 :   if (N0.getOpcode() == ISD::AssertZext &&
    7978        1704 :       EVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
    7979         470 :     return N0;
    7980             : 
    7981       46388 :   return SDValue();
    7982             : }
    7983             : 
    7984             : /// If the result of a wider load is shifted to right of N  bits and then
    7985             : /// truncated to a narrower type and where N is a multiple of number of bits of
    7986             : /// the narrower type, transform it to a narrower load from address + N / num of
    7987             : /// bits of new type. If the result is to be extended, also fold the extension
    7988             : /// to form a extending load.
    7989      289424 : SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
    7990      578848 :   unsigned Opc = N->getOpcode();
    7991             : 
    7992      289424 :   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
    7993      578848 :   SDValue N0 = N->getOperand(0);
    7994      578848 :   EVT VT = N->getValueType(0);
    7995      289424 :   EVT ExtVT = VT;
    7996             : 
    7997             :   // This transformation isn't valid for vector loads.
    7998      289424 :   if (VT.isVector())
    7999       12997 :     return SDValue();
    8000             : 
    8001             :   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
    8002             :   // extended to VT.
    8003      276427 :   if (Opc == ISD::SIGN_EXTEND_INREG) {
    8004       35258 :     ExtType = ISD::SEXTLOAD;
    8005      105774 :     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
    8006      241169 :   } else if (Opc == ISD::SRL) {
    8007             :     // Another special-case: SRL is basically zero-extending a narrower value.
    8008      103606 :     ExtType = ISD::ZEXTLOAD;
    8009      103606 :     N0 = SDValue(N, 0);
    8010      304734 :     ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
    8011        6084 :     if (!N01) return SDValue();
    8012       97522 :     ExtVT = EVT::getIntegerVT(*DAG.getContext(),
    8013      195044 :                               VT.getSizeInBits() - N01->getZExtValue());
    8014             :   }
    8015      388716 :   if (LegalOperations && !TLI.isLoadExtLegal(ExtType, VT, ExtVT))
    8016       67262 :     return SDValue();
    8017             : 
    8018      203081 :   unsigned EVTBits = ExtVT.getSizeInBits();
    8019             : 
    8020             :   // Do not generate loads of non-round integer types since these can
    8021             :   // be expensive (and would be wrong if the type is not byte sized).
    8022      203081 :   if (!ExtVT.isRound())
    8023       32898 :     return SDValue();
    8024             : 
    8025      170183 :   unsigned ShAmt = 0;
    8026      401646 :   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
    8027      172192 :     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
    8028       57360 :       ShAmt = N01->getZExtValue();
    8029             :       // Is the shift amount a multiple of size of VT?
    8030       57360 :       if ((ShAmt & (EVTBits-1)) == 0) {
    8031      100588 :         N0 = N0.getOperand(0);
    8032             :         // Is the load width a multiple of size of VT?
    8033       50294 :         if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
    8034          13 :           return SDValue();
    8035             :       }
    8036             : 
    8037             :       // At this point, we must have a load or else we can't do the transform.
    8038       57347 :       if (!isa<LoadSDNode>(N0)) return SDValue();
    8039             : 
    8040             :       // Because a SRL must be assumed to *need* to zero-extend the high bits
    8041             :       // (as opposed to anyext the high bits), we can't combine the zextload
    8042             :       // lowering of SRL and an sextload.
    8043        6240 :       if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD)
    8044          35 :         return SDValue();
    8045             : 
    8046             :       // If the shift amount is larger than the input type then we're not
    8047             :       // accessing any of the loaded bytes.  If the load was a zextload/extload
    8048             :       // then the result of the shift+trunc is zero/undef (handled elsewhere).
    8049        3085 :       if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
    8050           2 :         return SDValue();
    8051             :     }
    8052             :   }
    8053             : 
    8054             :   // If the load is shifted left (and the result isn't shifted back right),
    8055             :   // we can fold the truncate through the shift.
    8056        3083 :   unsigned ShLeftAmt = 0;
    8057      226176 :   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
    8058        4329 :       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
    8059         483 :     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
    8060           9 :       ShLeftAmt = N01->getZExtValue();
    8061          18 :       N0 = N0.getOperand(0);
    8062             :     }
    8063             :   }
    8064             : 
    8065             :   // If we haven't found a load, we can't narrow it.  Don't transform one with
    8066             :   // multiple uses, this would require adding a new load.
    8067      123900 :   if (!isa<LoadSDNode>(N0) || !N0.hasOneUse())
    8068      113751 :     return SDValue();
    8069             : 
    8070             :   // Don't change the width of a volatile load.
    8071        2155 :   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
    8072        4310 :   if (LN0->isVolatile())
    8073         140 :     return SDValue();
    8074             : 
    8075             :   // Verify that we are actually reducing a load width here.
    8076        2015 :   if (LN0->getMemoryVT().getSizeInBits() < EVTBits)
    8077         290 :     return SDValue();
    8078             : 
    8079             :   // For the transform to be legal, the load must produce only two values
    8080             :   // (the value loaded and the chain).  Don't transform a pre-increment
    8081             :   // load, for example, which produces an extra value.  Otherwise the
    8082             :   // transformation is not equivalent, and the downstream logic to replace
    8083             :   // uses gets things wrong.
    8084        1725 :   if (LN0->getNumValues() > 2)
    8085           0 :     return SDValue();
    8086             : 
    8087             :   // If the load that we're shrinking is an extload and we're not just
    8088             :   // discarding the extension we can't simply shrink the load. Bail.
    8089             :   // TODO: It would be possible to merge the extensions in some cases.
    8090        2248 :   if (LN0->getExtensionType() != ISD::NON_EXTLOAD &&
    8091        2248 :       LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt)
    8092           2 :     return SDValue();
    8093             : 
    8094        1723 :   if (!TLI.shouldReduceLoadWidth(LN0, ExtType, ExtVT))
    8095         928 :     return SDValue();
    8096             : 
    8097        2385 :   EVT PtrType = N0.getOperand(1).getValueType();
    8098             : 
    8099        1590 :   if (PtrType == MVT::Untyped || PtrType.isExtended())
    8100             :     // It's not possible to generate a constant of extended or untyped type.
    8101           0 :     return SDValue();
    8102             : 
    8103             :   // For big endian targets, we need to adjust the offset to the pointer to
    8104             :   // load the correct bytes.
    8105        1590 :   if (DAG.getDataLayout().isBigEndian()) {
    8106         198 :     unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
    8107          99 :     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
    8108          99 :     ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
    8109             :   }
    8110             : 
    8111         795 :   uint64_t PtrOff = ShAmt / 8;
    8112        2385 :   unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
    8113         795 :   SDLoc DL(LN0);
    8114             :   // The original load itself didn't wrap, so an offset within it doesn't.
    8115         795 :   SDNodeFlags Flags;
    8116         795 :   Flags.setNoUnsignedWrap(true);
    8117         795 :   SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
    8118         795 :                                PtrType, LN0->getBasePtr(),
    8119             :                                DAG.getConstant(PtrOff, DL, PtrType),
    8120        1590 :                                Flags);
    8121         795 :   AddToWorklist(NewPtr.getNode());
    8122             : 
    8123         795 :   SDValue Load;
    8124         795 :   if (ExtType == ISD::NON_EXTLOAD)
    8125        3135 :     Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
    8126         627 :                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
    8127        3135 :                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
    8128             :   else
    8129         840 :     Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr,
    8130         168 :                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
    8131         168 :                           NewAlign, LN0->getMemOperand()->getFlags(),
    8132         840 :                           LN0->getAAInfo());
    8133             : 
    8134             :   // Replace the old load's chain with the new load's chain.
    8135        1590 :   WorklistRemover DeadNodes(*this);
    8136        2385 :   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
    8137             : 
    8138             :   // Shift the result left, if we've swallowed a left shift.
    8139         795 :   SDValue Result = Load;
    8140         795 :   if (ShLeftAmt != 0) {
    8141          18 :     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
    8142           9 :     if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
    8143           0 :       ShImmTy = VT;
    8144             :     // If the shift amount is as large as the result size (but, presumably,
    8145             :     // no larger than the source) then the useful bits of the result are
    8146             :     // zero; we can't simply return the shortened shift, because the result
    8147             :     // of that operation is undefined.
    8148          18 :     SDLoc DL(N0);
    8149           9 :     if (ShLeftAmt >= VT.getSizeInBits())
    8150           7 :       Result = DAG.getConstant(0, DL, VT);
    8151             :     else
    8152           4 :       Result = DAG.getNode(ISD::SHL, DL, VT,
    8153           4 :                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
    8154             :   }
    8155             : 
    8156             :   // Return the new loaded value.
    8157         795 :   return Result;
    8158             : }
    8159             : 
    8160       40913 : SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
    8161       81826 :   SDValue N0 = N->getOperand(0);
    8162       81826 :   SDValue N1 = N->getOperand(1);
    8163       81826 :   EVT VT = N->getValueType(0);
    8164       40913 :   EVT EVT = cast<VTSDNode>(N1)->getVT();
    8165       40913 :   unsigned VTBits = VT.getScalarSizeInBits();
    8166       40913 :   unsigned EVTBits = EVT.getScalarSizeInBits();
    8167             : 
    8168       81826 :   if (N0.isUndef())
    8169           1 :     return DAG.getUNDEF(VT);
    8170             : 
    8171             :   // fold (sext_in_reg c1) -> c1
    8172       40912 :   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
    8173         136 :     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
    8174             : 
    8175             :   // If the input is already sign extended, just drop the extension.
    8176       40878 :   if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
    8177        1168 :     return N0;
    8178             : 
    8179             :   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
    8180       79422 :   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
    8181           6 :       EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
    8182           6 :     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
    8183           8 :                        N0.getOperand(0), N1);
    8184             : 
    8185             :   // fold (sext_in_reg (sext x)) -> (sext x)
    8186             :   // fold (sext_in_reg (aext x)) -> (sext x)
    8187             :   // if x is small enough.
    8188      158832 :   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
    8189       13198 :     SDValue N00 = N0.getOperand(0);
    8190        6604 :     if (N00.getScalarValueSizeInBits() <= EVTBits &&
    8191           8 :         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
    8192          12 :       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
    8193             :   }
    8194             : 
    8195             :   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_in_reg x)
    8196       79350 :   if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
    8197       79290 :        N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
    8198       79410 :        N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
    8199         120 :       N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
    8200          29 :     if (!LegalOperations ||
    8201           0 :         TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
    8202         116 :       return DAG.getSignExtendVectorInReg(N0.getOperand(0), SDLoc(N), VT);
    8203             :   }
    8204             : 
    8205             :   // fold (sext_in_reg (zext x)) -> (sext x)
    8206             :   // iff we are extending the source sign bit.
    8207       79352 :   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
    8208          10 :     SDValue N00 = N0.getOperand(0);
    8209           9 :     if (N00.getScalarValueSizeInBits() == EVTBits &&
    8210           4 :         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
    8211          16 :       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
    8212             :   }
    8213             : 
    8214             :   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
    8215       79344 :   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1)))
    8216           9 :     return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
    8217             : 
    8218             :   // fold operands of sext_in_reg based on knowledge that the top bits are not
    8219             :   // demanded.
    8220       39669 :   if (SimplifyDemandedBits(SDValue(N, 0)))
    8221        2047 :     return SDValue(N, 0);
    8222             : 
    8223             :   // fold (sext_in_reg (load x)) -> (smaller sextload x)
    8224             :   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
    8225       37622 :   if (SDValue NarrowLoad = ReduceLoadWidth(N))
    8226         134 :     return NarrowLoad;
    8227             : 
    8228             :   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
    8229             :   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
    8230             :   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
    8231       74976 :   if (N0.getOpcode() == ISD::SRL) {
    8232       30344 :     if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
    8233        9258 :       if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
    8234             :         // We can turn this into an SRA iff the input to the SRL is already sign
    8235             :         // extended enough.
    8236       18514 :         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
    8237        9257 :         if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
    8238        5481 :           return DAG.getNode(ISD::SRA, SDLoc(N), VT,
    8239        9135 :                              N0.getOperand(0), N0.getOperand(1));
    8240             :       }
    8241             :   }
    8242             : 
    8243             :   // fold (sext_inreg (extload x)) -> (sextload x)
    8244       37512 :   if (ISD::isEXTLoad(N0.getNode()) &&
    8245        3723 :       ISD::isUNINDEXEDLoad(N0.getNode()) &&
    8246        3546 :       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
    8247        4125 :       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
    8248        1826 :        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
    8249         825 :     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
    8250        2475 :     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
    8251        1650 :                                      LN0->getChain(),
    8252         825 :                                      LN0->getBasePtr(), EVT,
    8253        1650 :                                      LN0->getMemOperand());
    8254         825 :     CombineTo(N, ExtLoad);
    8255        2475 :     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
    8256         825 :     AddToWorklist(ExtLoad.getNode());
    8257         825 :     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
    8258             :   }
    8259             :   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
    8260       35010 :   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
    8261          58 :       N0.hasOneUse() &&
    8262           0 :       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
    8263           0 :       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
    8264           0 :        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
    8265           0 :     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
    8266           0 :     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
    8267           0 :                                      LN0->getChain(),
    8268           0 :                                      LN0->getBasePtr(), EVT,
    8269           0 :                                      LN0->getMemOperand());
    8270           0 :     CombineTo(N, ExtLoad);
    8271           0 :     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
    8272           0 :     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
    8273             :   }
    8274             : 
    8275             :   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
    8276      103230 :   if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
    8277         162 :     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
    8278         162 :                                            N0.getOperand(1), false))
    8279          24 :       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
    8280          24 :                          BSwap, N1);
    8281             :   }
    8282             : 
    8283       34828 :   return SDValue();
    8284             : }
    8285             : 
    8286        2926 : SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
    8287        5852 :   SDValue N0 = N->getOperand(0);
    8288        5852 :   EVT VT = N->getValueType(0);
    8289             : 
    8290        5852 :   if (N0.isUndef())
    8291           0 :     return DAG.getUNDEF(VT);
    8292             : 
    8293        5852 :   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
    8294        5852 :                                               LegalOperations))
    8295          28 :     return SDValue(Res, 0);
    8296             : 
    8297        2898 :   return SDValue();
    8298             : }
    8299             : 
    8300        3592 : SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
    8301        7184 :   SDValue N0 = N->getOperand(0);
    8302        7184 :   EVT VT = N->getValueType(0);
    8303             : 
    8304        7184 :   if (N0.isUndef())
    8305           0 :     return DAG.getUNDEF(VT);
    8306             : 
    8307        7184 :   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
    8308        7184 :                                               LegalOperations))
    8309          31 :     return SDValue(Res, 0);
    8310             : 
    8311        3561 :   return SDValue();
    8312             : }
    8313             : 
    8314      154503 : SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
    8315      309006 :   SDValue N0 = N->getOperand(0);
    8316      309006 :   EVT VT = N->getValueType(0);
    8317      309006 :   bool isLE = DAG.getDataLayout().isLittleEndian();
    8318             : 
    8319             :   // noop truncate
    8320      463934 :   if (N0.getValueType() == N->getValueType(0))
    8321           0 :     return N0;
    8322             :   // fold (truncate c1) -> c1
    8323      154503 :   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
    8324       11068 :     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
    8325             :   // fold (truncate (truncate x)) -> (truncate x)
    8326      303472 :   if (N0.getOpcode() == ISD::TRUNCATE)
    8327        5535 :     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
    8328             :   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
    8329      300180 :   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
    8330      299688 :       N0.getOpcode() == ISD::SIGN_EXTEND ||
    8331      149059 :       N0.getOpcode() == ISD::ANY_EXTEND) {
    8332             :     // if the source is smaller than the dest, we still need an extend.
    8333       10836 :     if (N0.getOperand(0).getValueType().bitsLT(VT))
    8334         546 :       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
    8335             :     // if the source is larger than the dest, than we just need the truncate.
    8336       10563 :     if (N0.getOperand(0).getValueType().bitsGT(VT))
    8337        8180 :       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
    8338             :     // if the source and dest are the same type, we can drop both the extend
    8339             :     // and the truncate.
    8340        3770 :     return N0.getOperand(0);
    8341             :   }
    8342             : 
    8343             :   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
    8344      398283 :   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
    8345        1348 :     return SDValue();
    8346             : 
    8347             :   // Fold extract-and-trunc into a narrow extract. For example:
    8348             :   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
    8349             :   //   i32 y = TRUNCATE(i64 x)
    8350             :   //        -- becomes --
    8351             :   //   v16i8 b = BITCAST (v2i64 val)
    8352             :   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
    8353             :   //
    8354             :   // Note: We only run this optimization after type legalization (which often
    8355             :   // creates this pattern) and before operation legalization after which
    8356             :   // we need to be more careful about the vector instructions that we generate.
    8357      159028 :   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
    8358      187259 :       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
    8359             : 
    8360       22506 :     EVT VecTy = N0.getOperand(0).getValueType();
    8361       15004 :     EVT ExTy = N0.getValueType();
    8362       15004 :     EVT TrTy = N->getValueType(0);
    8363             : 
    8364        7502 :     unsigned NumElem = VecTy.getVectorNumElements();
    8365        7502 :     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
    8366             : 
    8367        7502 :     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
    8368             :     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
    8369             : 
    8370       15004 :     SDValue EltNo = N0->getOperand(1);
    8371        7501 :     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
    8372       14574 :       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
    8373       21861 :       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
    8374        7287 :       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
    8375             : 
    8376       14574 :       SDLoc DL(N);
    8377        7287 :       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
    8378       14574 :                          DAG.getBitcast(NVT, N0.getOperand(0)),
    8379       14574 :                          DAG.getConstant(Index, DL, IndexTy));
    8380             :     }
    8381             :   }
    8382             : 
    8383             :   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
    8384      276930 :   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
    8385         208 :     EVT SrcVT = N0.getValueType();
    8386         208 :     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
    8387         104 :         TLI.isTruncateFree(SrcVT, VT)) {
    8388          96 :       SDLoc SL(N0);
    8389          96 :       SDValue Cond = N0.getOperand(0);
    8390         144 :       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
    8391         144 :       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
    8392         144 :       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
    8393             :     }
    8394             :   }
    8395             : 
    8396             :   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
    8397      140852 :   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
    8398      140448 :       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
    8399         822 :       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
    8400        1410 :     SDValue Amt = N0.getOperand(1);
    8401         942 :     KnownBits Known;
    8402         705 :     DAG.computeKnownBits(Amt, Known);
    8403         705 :     unsigned Size = VT.getScalarSizeInBits();
    8404        2115 :     if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
    8405         936 :       SDLoc SL(N);
    8406         936 :       EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
    8407             : 
    8408        1404 :       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
    8409         936 :       if (AmtVT != Amt.getValueType()) {
    8410           4 :         Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
    8411           4 :         AddToWorklist(Amt.getNode());
    8412             :       }
    8413         936 :       return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
    8414             :     }
    8415             :   }
    8416             : 
    8417             :   // Fold a series of buildvector, bitcast, and truncate if possible.
    8418             :   // For example fold
    8419             :   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
    8420             :   //   (2xi32 (buildvector x, y)).
    8421      156619 :   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
    8422        4296 :       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
    8423      139497 :       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
    8424        1042 :       N0.getOperand(0).hasOneUse()) {
    8425             : 
    8426        1042 :     SDValue BuildVect = N0.getOperand(0);
    8427        1042 :     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
    8428         521 :     EVT TruncVecEltTy = VT.getVectorElementType();
    8429             : 
    8430             :     // Check that the element types match.
    8431         521 :     if (BuildVectEltTy == TruncVecEltTy) {
    8432             :       // Now we only need to compute the offset of the truncated elements.
    8433           2 :       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
    8434           1 :       unsigned TruncVecNumElts = VT.getVectorNumElements();
    8435           1 :       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
    8436             : 
    8437             :       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
    8438             :              "Invalid number of elements");
    8439             : 
    8440           2 :       SmallVector<SDValue, 8> Opnds;
    8441           3 :       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
    8442           4 :         Opnds.push_back(BuildVect.getOperand(i));
    8443             : 
    8444           4 :       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
    8445             :     }
    8446             :   }
    8447             : 
    8448             :   // See if we can simplify the input to this truncate through knowledge that
    8449             :   // only the low bits are being used.
    8450             :   // For example "trunc (or (shl x, 8), y)" // -> trunc y
    8451             :   // Currently we only perform this optimization on scalars because vectors
    8452             :   // may have different active low bits.
    8453      137865 :   if (!VT.isVector()) {
    8454             :     APInt Mask =
    8455      259610 :         APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
    8456      130456 :     if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
    8457        5208 :       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
    8458             :   }
    8459             : 
    8460             :   // fold (truncate (load x)) -> (smaller load x)
    8461             :   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
    8462      235281 :   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
    8463      129318 :     if (SDValue Reduced = ReduceLoadWidth(N))
    8464         589 :       return Reduced;
    8465             : 
    8466             :     // Handle the case where the load remains an extending load even
    8467             :     // after truncation.
    8468      259045 :     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
    8469        1587 :       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
    8470        4651 :       if (!LN0->isVolatile() &&
    8471        4253 :           LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
    8472         864 :         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
    8473         576 :                                          VT, LN0->getChain(), LN0->getBasePtr(),
    8474             :                                          LN0->getMemoryVT(),
    8475         576 :                                          LN0->getMemOperand());
    8476         864 :         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
    8477         288 :         return NewLoad;
    8478             :       }
    8479             :     }
    8480             :   }
    8481             : 
    8482             :   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
    8483             :   // where ... are all 'undef'.
    8484      271372 :   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
    8485         311 :     SmallVector<EVT, 8> VTs;
    8486         159 :     SDValue V;
    8487         159 :     unsigned Idx = 0;
    8488         159 :     unsigned NumDefs = 0;
    8489             : 
    8490         500 :     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
    8491         668 :       SDValue X = N0.getOperand(i);
    8492         668 :       if (!X.isUndef()) {
    8493         311 :         V = X;
    8494         311 :         Idx = i;
    8495         311 :         NumDefs++;
    8496             :       }
    8497             :       // Stop if more than one members are non-undef.
    8498         334 :       if (NumDefs > 1)
    8499             :         break;
    8500         182 :       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
    8501             :                                      VT.getVectorElementType(),
    8502         546 :                                      X.getValueType().getVectorNumElements()));
    8503             :     }
    8504             : 
    8505         159 :     if (NumDefs == 0)
    8506           7 :       return DAG.getUNDEF(VT);
    8507             : 
    8508         159 :     if (NumDefs == 1) {
    8509             :       assert(V.getNode() && "The single defined operand is empty!");
    8510          14 :       SmallVector<SDValue, 8> Opnds;
    8511          44 :       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
    8512          53 :         if (i != Idx) {
    8513          46 :           Opnds.push_back(DAG.getUNDEF(VTs[i]));
    8514          23 :           continue;
    8515             :         }
    8516          35 :         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
    8517           7 :         AddToWorklist(NV.getNode());
    8518           7 :         Opnds.push_back(NV);
    8519             :       }
    8520          35 :       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
    8521             :     }
    8522             :   }
    8523             : 
    8524             :   // Fold truncate of a bitcast of a vector to an extract of the low vector
    8525             :   // element.
    8526             :   //
    8527             :   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
    8528      284079 :   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
    8529       23626 :     SDValue VecSrc = N0.getOperand(0);
    8530       23626 :     EVT SrcVT = VecSrc.getValueType();
    8531       33351 :     if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
    8532        9725 :         (!LegalOperations ||
    8533        8827 :          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
    8534       19384 :       SDLoc SL(N);
    8535             : 
    8536       29076 :       EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
    8537        9692 :       unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1;
    8538        9692 :       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
    8539        9692 :                          VecSrc, DAG.getConstant(Idx, SL, IdxVT));
    8540             :     }
    8541             :   }
    8542             : 
    8543             :   // Simplify the operands using demanded-bits information.
    8544      244573 :   if (!VT.isVector() &&
    8545      244573 :       SimplifyDemandedBits(SDValue(N, 0)))
    8546        9079 :     return SDValue(N, 0);
    8547             : 
    8548             :   // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
    8549             :   // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
    8550             :   // When the adde's carry is not used.
    8551      350733 :   if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
    8552      116935 :       N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
    8553           9 :       (!LegalOperations || TLI.isOperationLegal(N0.getOpcode(), VT))) {
    8554          18 :     SDLoc SL(N);
    8555          27 :     auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
    8556          27 :     auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
    8557          18 :     auto VTs = DAG.getVTList(VT, N0->getValueType(1));
    8558          27 :     return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
    8559             :   }
    8560             : 
    8561      116899 :   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
    8562           4 :     return NewVSel;
    8563             : 
    8564      116895 :   return SDValue();
    8565             : }
    8566             : 
    8567             : static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
    8568       75668 :   SDValue Elt = N->getOperand(i);
    8569       75668 :   if (Elt.getOpcode() != ISD::MERGE_VALUES)
    8570             :     return Elt.getNode();
    8571         196 :   return Elt.getOperand(Elt.getResNo()).getNode();
    8572             : }
    8573             : 
    8574             : /// build_pair (load, load) -> load
    8575             : /// if load locations are consecutive.
    8576       18917 : SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
    8577             :   assert(N->getOpcode() == ISD::BUILD_PAIR);
    8578             : 
    8579       37834 :   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
    8580       37834 :   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
    8581       25329 :   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
    8582        6084 :       LD1->getAddressSpace() != LD2->getAddressSpace())
    8583       16889 :     return SDValue();
    8584        4056 :   EVT LD1VT = LD1->getValueType(0);
    8585        2028 :   unsigned LD1Bytes = LD1VT.getSizeInBits() / 8;
    8586        6084 :   if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
    8587        2028 :       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
    8588        3778 :     unsigned Align = LD1->getAlignment();
    8589        3778 :     unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
    8590        3778 :         VT.getTypeForEVT(*DAG.getContext()));
    8591             : 
    8592        3629 :     if (NewAlign <= Align &&
    8593        1740 :         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
    8594        8700 :       return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
    8595        6960 :                          LD1->getPointerInfo(), Align);
    8596             :   }
    8597             : 
    8598         288 :   return SDValue();
    8599             : }
    8600             : 
    8601             : static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
    8602             :   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
    8603             :   // and Lo parts; on big-endian machines it doesn't.
    8604          20 :   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
    8605             : }
    8606             : 
    8607      376994 : static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
    8608             :                                     const TargetLowering &TLI) {
    8609             :   // If this is not a bitcast to an FP type or if the target doesn't have
    8610             :   // IEEE754-compliant FP logic, we're done.
    8611      753988 :   EVT VT = N->getValueType(0);
    8612      376994 :   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
    8613      363872 :     return SDValue();
    8614             : 
    8615             :   // TODO: Use splat values for the constant-checking below and remove this
    8616             :   // restriction.
    8617       26244 :   SDValue N0 = N->getOperand(0);
    8618       26244 :   EVT SourceVT = N0.getValueType();
    8619       13122 :   if (SourceVT.isVector())
    8620       12317 :     return SDValue();
    8621             : 
    8622             :   unsigned FPOpcode;
    8623         805 :   APInt SignMask;
    8624        1610 :   switch (N0.getOpcode()) {
    8625          40 :   case ISD::AND:
    8626          40 :     FPOpcode = ISD::FABS;
    8627         240 :     SignMask = ~APInt::getSignMask(SourceVT.getSizeInBits());
    8628          40 :     break;
    8629          29 :   case ISD::XOR:
    8630          29 :     FPOpcode = ISD::FNEG;
    8631         116 :     SignMask = APInt::getSignMask(SourceVT.getSizeInBits());
    8632          29 :     break;
    8633             :   // TODO: ISD::OR --> ISD::FNABS?
    8634         736 :   default:
    8635         736 :     return SDValue();
    8636             :   }
    8637             : 
    8638             :   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
    8639             :   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
    8640         138 :   SDValue LogicOp0 = N0.getOperand(0);
    8641         153 :   ConstantSDNode *LogicOp1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
    8642          34 :   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
    8643           4 :       LogicOp0.getOpcode() == ISD::BITCAST &&
    8644          12 :       LogicOp0->getOperand(0).getValueType() == VT)
    8645          16 :     return DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0->getOperand(0));
    8646             : 
    8647          65 :   return SDValue();
    8648             : }
    8649             : 
    8650      465355 : SDValue DAGCombiner::visitBITCAST(SDNode *N) {
    8651      930710 :   SDValue N0 = N->getOperand(0);
    8652      930710 :   EVT VT = N->getValueType(0);
    8653             : 
    8654      930710 :   if (N0.isUndef())
    8655         109 :     return DAG.getUNDEF(VT);
    8656             : 
    8657             :   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
    8658             :   // Only do this before legalize, since afterward the target may be depending
    8659             :   // on the bitconvert.
    8660             :   // First check to see if this is all constant.
    8661      568658 :   if (!LegalTypes &&
    8662      656284 :       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
    8663       63472 :       VT.isVector()) {
    8664       62812 :     bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant();
    8665             : 
    8666      125624 :     EVT DestEltVT = N->getValueType(0).getVectorElementType();
    8667             :     assert(!DestEltVT.isVector() &&
    8668             :            "Element type of vector ValueType must not be vector!");
    8669       62812 :     if (isSimple)
    8670       61987 :       return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
    8671             :   }
    8672             : 
    8673             :   // If the input is a constant, let getNode fold it.
    8674      400866 :   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
    8675             :     // If we can't allow illegal operations, we need to check that this is just
    8676             :     // a fp -> int or int -> conversion and that the resulting operation will
    8677             :     // be legal.
    8678        2461 :     if (!LegalOperations ||
    8679        2272 :         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
    8680        2468 :          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
    8681          88 :         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
    8682          26 :          TLI.isOperationLegal(ISD::Constant, VT)))
    8683         190 :       return DAG.getBitcast(VT, N0);
    8684             :   }
    8685             : 
    8686             :   // (conv (conv x, t1), t2) -> (conv x, t2)
    8687      806138 :   if (N0.getOpcode() == ISD::BITCAST)
    8688       45440 :     return DAG.getBitcast(VT, N0.getOperand(0));
    8689             : 
    8690             :   // fold (conv (load x)) -> (load (conv*)x)
    8691             :   // If the resultant load doesn't need a higher alignment than the original!
    8692      579152 :   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
    8693             :       // Do not change the width of a volatile load.
    8694      125919 :       !cast<LoadSDNode>(N0)->isVolatile() &&
    8695             :       // Do not remove the cast if the types differ in endian layout.
    8696      243728 :       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
    8697      243726 :           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
    8698      122766 :       (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
    8699       16772 :       TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
    8700        3387 :     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
    8701        6774 :     unsigned OrigAlign = LN0->getAlignment();
    8702             : 
    8703        3387 :     bool Fast = false;
    8704       10161 :     if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
    8705        3387 :                                LN0->getAddressSpace(), OrigAlign, &Fast) &&
    8706             :         Fast) {
    8707             :       SDValue Load =
    8708       16775 :           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
    8709        6710 :                       LN0->getPointerInfo(), OrigAlign,
    8710       13420 :                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
    8711       10065 :       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
    8712        3355 :       return Load;
    8713             :     }
    8714             :   }
    8715             : 
    8716      376994 :   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
    8717           4 :     return V;
    8718             : 
    8719             :   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
    8720             :   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
    8721             :   //
    8722             :   // For ppc_fp128:
    8723             :   // fold (bitcast (fneg x)) ->
    8724             :   //     flipbit = signbit
    8725             :   //     (xor (bitcast x) (build_pair flipbit, flipbit))
    8726             :   //
    8727             :   // fold (bitcast (fabs x)) ->
    8728             :   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
    8729             :   //     (xor (bitcast x) (build_pair flipbit, flipbit))
    8730             :   // This often reduces constant pool loads.
    8731      754619 :   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
    8732      377233 :        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
    8733         383 :       N0.getNode()->hasOneUse() && VT.isInteger() &&
    8734      377303 :       !VT.isVector() && !N0.getValueType().isVector()) {
    8735          48 :     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
    8736          24 :     AddToWorklist(NewConv.getNode());
    8737             : 
    8738          48 :     SDLoc DL(N);
    8739          58 :     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
    8740             :       assert(VT.getSizeInBits() == 128);
    8741          10 :       SDValue SignBit = DAG.getConstant(
    8742          50 :           APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
    8743          10 :       SDValue FlipBit;
    8744          20 :       if (N0.getOpcode() == ISD::FNEG) {
    8745           5 :         FlipBit = SignBit;
    8746           5 :         AddToWorklist(FlipBit.getNode());
    8747             :       } else {
    8748             :         assert(N0.getOpcode() == ISD::FABS);
    8749             :         SDValue Hi =
    8750          15 :             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
    8751          15 :                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
    8752          35 :                                               SDLoc(NewConv)));
    8753           5 :         AddToWorklist(Hi.getNode());
    8754          25 :         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
    8755           5 :         AddToWorklist(FlipBit.getNode());
    8756             :       }
    8757             :       SDValue FlipBits =
    8758          40 :           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
    8759          10 :       AddToWorklist(FlipBits.getNode());
    8760          20 :       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
    8761             :     }
    8762          28 :     APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
    8763          28 :     if (N0.getOpcode() == ISD::FNEG)
    8764           4 :       return DAG.getNode(ISD::XOR, DL, VT,
    8765           4 :                          NewConv, DAG.getConstant(SignBit, DL, VT));
    8766             :     assert(N0.getOpcode() == ISD::FABS);
    8767          10 :     return DAG.getNode(ISD::AND, DL, VT,
    8768          50 :                        NewConv, DAG.getConstant(~SignBit, DL, VT));
    8769             :   }
    8770             : 
    8771             :   // fold (bitconvert (fcopysign cst, x)) ->
    8772             :   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
    8773             :   // Note that we don't handle (copysign x, cst) because this can always be
    8774             :   // folded to an fneg or fabs.
    8775             :   //
    8776             :   // For ppc_fp128:
    8777             :   // fold (bitcast (fcopysign cst, x)) ->
    8778             :   //     flipbit = (and (extract_element
    8779             :   //                     (xor (bitcast cst), (bitcast x)), 0),
    8780             :   //                    signbit)
    8781             :   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
    8782      377284 :   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
    8783         306 :       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
    8784      376978 :       VT.isInteger() && !VT.isVector()) {
    8785          12 :     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
    8786           6 :     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
    8787           6 :     if (isTypeLegal(IntXVT)) {
    8788          12 :       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
    8789           6 :       AddToWorklist(X.getNode());
    8790             : 
    8791             :       // If X has a different width than the result/lhs, sext it or truncate it.
    8792           6 :       unsigned VTWidth = VT.getSizeInBits();
    8793           6 :       if (OrigXWidth < VTWidth) {
    8794           0 :         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
    8795           0 :         AddToWorklist(X.getNode());
    8796           6 :       } else if (OrigXWidth > VTWidth) {
    8797             :         // To get the sign bit in the right place, we have to shift it right
    8798             :         // before truncating.
    8799           0 :         SDLoc DL(X);
    8800           0 :         X = DAG.getNode(ISD::SRL, DL,
    8801             :                         X.getValueType(), X,
    8802           0 :                         DAG.getConstant(OrigXWidth-VTWidth, DL,
    8803           0 :                                         X.getValueType()));
    8804           0 :         AddToWorklist(X.getNode());
    8805           0 :         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
    8806           0 :         AddToWorklist(X.getNode());
    8807             :       }
    8808             : 
    8809          17 :       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
    8810          15 :         APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
    8811          10 :         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
    8812           5 :         AddToWorklist(Cst.getNode());
    8813          10 :         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
    8814           5 :         AddToWorklist(X.getNode());
    8815          20 :         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
    8816           5 :         AddToWorklist(XorResult.getNode());
    8817           5 :         SDValue XorResult64 = DAG.getNode(
    8818          10 :             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
    8819          15 :             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
    8820          35 :                                   SDLoc(XorResult)));
    8821           5 :         AddToWorklist(XorResult64.getNode());
    8822             :         SDValue FlipBit =
    8823          15 :             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
    8824          35 :                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
    8825           5 :         AddToWorklist(FlipBit.getNode());
    8826             :         SDValue FlipBits =
    8827          20 :             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
    8828           5 :         AddToWorklist(FlipBits.getNode());
    8829          20 :         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
    8830             :       }
    8831           2 :       APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
    8832           3 :       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
    8833           3 :                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
    8834           1 :       AddToWorklist(X.getNode());
    8835             : 
    8836           2 :       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
    8837           3 :       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
    8838           7 :                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
    8839           1 :       AddToWorklist(Cst.getNode());
    8840             : 
    8841           4 :       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
    8842             :     }
    8843             :   }
    8844             : 
    8845             :   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
    8846      753920 :   if (N0.getOpcode() == ISD::BUILD_PAIR)
    8847        2721 :     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
    8848          38 :       return CombineLD;
    8849             : 
    8850             :   // Remove double bitcasts from shuffles - this is often a legacy of
    8851             :   // XformToShuffleWithZero being used to combine bitmaskings (of
    8852             :   // float vectors bitcast to integer vectors) into shuffles.
    8853             :   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
    8854      820034 :   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
    8855      104289 :       N0->getOpcode() == ISD::VECTOR_SHUFFLE &&
    8856      772744 :       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
    8857      377970 :       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
    8858         524 :     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
    8859             : 
    8860             :     // If operands are a bitcast, peek through if it casts the original VT.
    8861             :     // If operands are a constant, just bitcast back to original VT.
    8862        1048 :     auto PeekThroughBitcast = [&](SDValue Op) {
    8863        2096 :       if (Op.getOpcode() == ISD::BITCAST &&
    8864        1831 :           Op.getOperand(0).getValueType() == VT)
    8865         538 :         return SDValue(Op.getOperand(0));
    8866        2496 :       if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
    8867         464 :           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
    8868         644 :         return DAG.getBitcast(VT, Op);
    8869         457 :       return SDValue();
    8870         524 :     };
    8871             : 
    8872             :     // FIXME: If either input vector is bitcast, try to convert the shuffle to
    8873             :     // the result type of this bitcast. This would eliminate at least one
    8874             :     // bitcast. See the transform in InstCombine.
    8875        1048 :     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
    8876        1048 :     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
    8877         524 :     if (!(SV0 && SV1))
    8878         378 :       return SDValue();
    8879             : 
    8880             :     int MaskScale =
    8881         292 :         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
    8882         146 :     SmallVector<int, 8> NewMask;
    8883        1602 :     for (int M : SVN->getMask())
    8884        1860 :       for (int i = 0; i != MaskScale; ++i)
    8885        1278 :         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
    8886             : 
    8887         292 :     bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
    8888         146 :     if (!LegalMask) {
    8889           0 :       std::swap(SV0, SV1);
    8890           0 :       ShuffleVectorSDNode::commuteMask(NewMask);
    8891           0 :       LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
    8892             :     }
    8893             : 
    8894         146 :     if (LegalMask)
    8895         584 :       return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
    8896             :   }
    8897             : 
    8898      376398 :   return SDValue();
    8899             : }
    8900             : 
    8901             : SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
    8902       32392 :   EVT VT = N->getValueType(0);
    8903       16196 :   return CombineConsecutiveLoads(N, VT);
    8904             : }
    8905             : 
    8906             : /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
    8907             : /// operands. DstEltVT indicates the destination element value type.
    8908       62110 : SDValue DAGCombiner::
    8909             : ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
    8910      124220 :   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
    8911             : 
    8912             :   // If this is already the right type, we're done.
    8913       62110 :   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
    8914             : 
    8915       62110 :   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
    8916       62110 :   unsigned DstBitSize = DstEltVT.getSizeInBits();
    8917             : 
    8918             :   // If this is a conversion of N elements of one type to N elements of another
    8919             :   // type, convert each element.  This handles FP<->INT cases.
    8920       62110 :   if (SrcBitSize == DstBitSize) {
    8921         146 :     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
    8922         438 :                               BV->getValueType(0).getVectorNumElements());
    8923             : 
    8924             :     // Due to the FP element handling below calling this routine recursively,
    8925             :     // we can end up with a scalar-to-vector node here.
    8926         146 :     if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
    8927           0 :       return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
    8928           0 :                          DAG.getBitcast(DstEltVT, BV->getOperand(0)));
    8929             : 
    8930         146 :     SmallVector<SDValue, 8> Ops;
    8931        1178 :     for (SDValue Op : BV->op_values()) {
    8932             :       // If the vector element type is not legal, the BUILD_VECTOR operands
    8933             :       // are promoted and implicitly truncated.  Make that explicit here.
    8934         886 :       if (Op.getValueType() != SrcEltVT)
    8935           0 :         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
    8936         443 :       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
    8937         886 :       AddToWorklist(Ops.back().getNode());
    8938             :     }
    8939         584 :     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
    8940             :   }
    8941             : 
    8942             :   // Otherwise, we're growing or shrinking the elements.  To avoid having to
    8943             :   // handle annoying details of growing/shrinking FP values, we convert them to
    8944             :   // int first.
    8945       61964 :   if (SrcEltVT.isFloatingPoint()) {
    8946             :     // Convert the input float vector to a int vector where the elements are the
    8947             :     // same sizes.
    8948          41 :     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
    8949          41 :     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
    8950          41 :     SrcEltVT = IntVT;
    8951             :   }
    8952             : 
    8953             :   // Now we know the input is an integer vector.  If the output is a FP type,
    8954             :   // convert to integer first, then to FP of the right size.
    8955       61964 :   if (DstEltVT.isFloatingPoint()) {
    8956          41 :     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
    8957          41 :     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
    8958             : 
    8959             :     // Next, convert to FP elements of the same size.
    8960          41 :     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
    8961             :   }
    8962             : 
    8963       61923 :   SDLoc DL(BV);
    8964             : 
    8965             :   // Okay, we know the src/dst types are both integers of differing types.
    8966             :   // Handling growing first.
    8967             :   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
    8968       61923 :   if (SrcBitSize < DstBitSize) {
    8969       61818 :     unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
    8970             : 
    8971      123636 :     SmallVector<SDValue, 8> Ops;
    8972      247436 :     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
    8973      123800 :          i += NumInputsPerOutput) {
    8974      247600 :       bool isLE = DAG.getDataLayout().isLittleEndian();
    8975      123800 :       APInt NewBits = APInt(DstBitSize, 0);
    8976             :       bool EltIsUndef = true;
    8977      619896 :       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
    8978             :         // Shift the previously computed bits over.
    8979      248048 :         NewBits <<= SrcBitSize;
    8980      496096 :         SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
    8981      496096 :         if (Op.isUndef()) continue;
    8982      247902 :         EltIsUndef = false;
    8983             : 
    8984      495804 :         NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
    8985      743706 :                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
    8986             :       }
    8987             : 
    8988      123800 :       if (EltIsUndef)
    8989          12 :         Ops.push_back(DAG.getUNDEF(DstEltVT));
    8990             :       else
    8991      123788 :         Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
    8992             :     }
    8993             : 
    8994       61818 :     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
    8995      123636 :     return DAG.getBuildVector(VT, DL, Ops);
    8996             :   }
    8997             : 
    8998             :   // Finally, this must be the case where we are shrinking elements: each input
    8999             :   // turns into multiple outputs.
    9000         105 :   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
    9001         105 :   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
    9002         315 :                             NumOutputsPerInput*BV->getNumOperands());
    9003         105 :   SmallVector<SDValue, 8> Ops;
    9004             : 
    9005        1126 :   for (const SDValue &Op : BV->op_values()) {
    9006         468 :     if (Op.isUndef()) {
    9007          10 :       Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
    9008          10 :       continue;
    9009             :     }
    9010             : 
    9011             :     APInt OpVal = cast<ConstantSDNode>(Op)->
    9012        1344 :                   getAPIntValue().zextOrTrunc(SrcBitSize);
    9013             : 
    9014        1828 :     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
    9015        2760 :       APInt ThisVal = OpVal.trunc(DstBitSize);
    9016        1380 :       Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
    9017        1380 :       OpVal.lshrInPlace(DstBitSize);
    9018             :     }
    9019             : 
    9020             :     // For big endian targets, swap the order of the pieces of each element.
    9021         896 :     if (DAG.getDataLayout().isBigEndian())
    9022          32 :       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
    9023             :   }
    9024             : 
    9025         210 :   return DAG.getBuildVector(VT, DL, Ops);
    9026             : }
    9027             : 
    9028             : static bool isContractable(SDNode *N) {
    9029       14666 :   SDNodeFlags F = N->getFlags();
    9030        7343 :   return F.hasAllowContract() || F.hasUnsafeAlgebra();
    9031             : }
    9032             : 
    9033             : /// Try to perform FMA combining on a given FADD node.
    9034       26656 : SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
    9035       53312 :   SDValue N0 = N->getOperand(0);
    9036       53312 :   SDValue N1 = N->getOperand(1);
    9037       53312 :   EVT VT = N->getValueType(0);
    9038       53312 :   SDLoc SL(N);
    9039             : 
    9040       26656 :   const TargetOptions &Options = DAG.getTarget().Options;
    9041             : 
    9042             :   // Floating-point multiply-add with intermediate rounding.
    9043       53312 :   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
    9044             : 
    9045             :   // Floating-point multiply-add without intermediate rounding.
    9046             :   bool HasFMA =
    9047       34342 :       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
    9048       37591 :       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
    9049             : 
    9050             :   // No valid opcode, do not combine.
    9051       26656 :   if (!HasFMAD && !HasFMA)
    9052       14698 :     return SDValue();
    9053             : 
    9054       29227 :   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
    9055       22235 :                               Options.UnsafeFPMath || HasFMAD);
    9056             :   // If the addition is not contractable, do not combine.
    9057        6083 :   if (!AllowFusionGlobally && !isContractable(N))
    9058        6083 :     return SDValue();
    9059             : 
    9060       11750 :   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
    9061       11750 :   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
    9062          56 :     return SDValue();
    9063             : 
    9064             :   // Always prefer FMAD to FMA for precision.
    9065       11638 :   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
    9066        5819 :   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
    9067        5819 :   bool LookThroughFPExt = TLI.isFPExtFree(VT);
    9068             : 
    9069             :   // Is the node an FMUL and contractable either due to global flags or
    9070             :   // SDNodeFlags.
    9071             :   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
    9072       27540 :     if (N.getOpcode() != ISD::FMUL)
    9073             :       return false;
    9074        2996 :     return AllowFusionGlobally || isContractable(N.getNode());
    9075        5819 :   };
    9076             :   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
    9077             :   // prefer to fold the multiply with fewer uses.
    9078        6768 :   if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
    9079         612 :     if (N0.getNode()->use_size() > N1.getNode()->use_size())
    9080             :       std::swap(N0, N1);
    9081             :   }
    9082             : 
    9083             :   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
    9084        7373 :   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
    9085        1497 :     return DAG.getNode(PreferredFusedOpcode, SL, VT,
    9086        4491 :                        N0.getOperand(0), N0.getOperand(1), N1);
    9087             :   }
    9088             : 
    9089             :   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
    9090             :   // Note: Commutes FADD operands.
    9091        4588 :   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
    9092         239 :     return DAG.getNode(PreferredFusedOpcode, SL, VT,
    9093         717 :                        N1.getOperand(0), N1.getOperand(1), N0);
    9094             :   }
    9095             : 
    9096             :   // Look through FP_EXTEND nodes to do more combining.
    9097        4083 :   if (LookThroughFPExt) {
    9098             :     // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
    9099         428 :     if (N0.getOpcode() == ISD::FP_EXTEND) {
    9100          20 :       SDValue N00 = N0.getOperand(0);
    9101          10 :       if (isContractableFMUL(N00))
    9102           2 :         return DAG.getNode(PreferredFusedOpcode, SL, VT,
    9103           2 :                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
    9104           4 :                                        N00.getOperand(0)),
    9105           2 :                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
    9106           8 :                                        N00.getOperand(1)), N1);
    9107             :     }
    9108             : 
    9109             :     // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
    9110             :     // Note: Commutes FADD operands.
    9111         424 :     if (N1.getOpcode() == ISD::FP_EXTEND) {
    9112          20 :       SDValue N10 = N1.getOperand(0);
    9113          10 :       if (isContractableFMUL(N10))
    9114           2 :         return DAG.getNode(PreferredFusedOpcode, SL, VT,
    9115           2 :                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
    9116           4 :                                        N10.getOperand(0)),
    9117           2 :                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
    9118           8 :                                        N10.getOperand(1)), N0);
    9119             :     }
    9120             :   }
    9121             : 
    9122             :   // More folding opportunities when target permits.
    9123        4079 :   if (Aggressive) {
    9124             :     // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
    9125             :     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
    9126             :     // are currently only supported on binary nodes.
    9127         248 :     if (Options.UnsafeFPMath &&
    9128        2095 :         N0.getOpcode() == PreferredFusedOpcode &&
    9129          34 :         N0.getOperand(2).getOpcode() == ISD::FMUL &&
    9130        1626 :         N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
    9131           7 :       return DAG.getNode(PreferredFusedOpcode, SL, VT,
    9132          21 :                          N0.getOperand(0), N0.getOperand(1),
    9133             :                          DAG.getNode(PreferredFusedOpcode, SL, VT,
    9134          21 :                                      N0.getOperand(2).getOperand(0),
    9135          21 :                                      N0.getOperand(2).getOperand(1),
    9136          14 :                                      N1));
    9137             :     }
    9138             : 
    9139             :     // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
    9140             :     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
    9141             :     // are currently only supported on binary nodes.
    9142         241 :     if (Options.UnsafeFPMath &&
    9143         502 :         N1->getOpcode() == PreferredFusedOpcode &&
    9144          40 :         N1.getOperand(2).getOpcode() == ISD::FMUL &&
    9145        1599 :         N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
    9146           2 :       return DAG.getNode(PreferredFusedOpcode, SL, VT,
    9147           6 :                          N1.getOperand(0), N1.getOperand(1),
    9148             :                          DAG.getNode(PreferredFusedOpcode, SL, VT,
    9149           6 :                                      N1.getOperand(2).getOperand(0),
    9150           6 :                                      N1.getOperand(2).getOperand(1),
    9151           4 :                                      N0));
    9152             :     }
    9153             : 
    9154        1573 :     if (LookThroughFPExt) {
    9155             :       // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
    9156             :       //   -> (fma x, y, (fma (fpext u), (fpext v), z))
    9157             :       auto FoldFAddFMAFPExtFMul = [&] (
    9158           8 :           SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
    9159          48 :         return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
    9160             :                            DAG.getNode(PreferredFusedOpcode, SL, VT,
    9161           8 :                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
    9162           8 :                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
    9163          56 :                                        Z));
    9164         222 :       };
    9165         412 :       if (N0.getOpcode() == PreferredFusedOpcode) {
    9166          36 :         SDValue N02 = N0.getOperand(2);
    9167          36 :         if (N02.getOpcode() == ISD::FP_EXTEND) {
    9168           8 :           SDValue N020 = N02.getOperand(0);
    9169           4 :           if (isContractableFMUL(N020))
    9170          12 :             return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
    9171          12 :                                         N020.getOperand(0), N020.getOperand(1),
    9172           4 :                                         N1);
    9173             :         }
    9174             :       }
    9175             : 
    9176             :       // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
    9177             :       //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
    9178             :       // FIXME: This turns two single-precision and one double-precision
    9179             :       // operation into two double-precision operations, which might not be
    9180             :       // interesting for all targets, especially GPUs.
    9181             :       auto FoldFAddFPExtFMAFMul = [&] (
    9182           8 :           SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
    9183          80 :         return DAG.getNode(PreferredFusedOpcode, SL, VT,
    9184           8 :                            DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
    9185           8 :                            DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
    9186             :                            DAG.getNode(PreferredFusedOpcode, SL, VT,
    9187           8 :                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
    9188           8 :                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
    9189          88 :                                        Z));
    9190         218 :       };
    9191         404 :       if (N0.getOpcode() == ISD::FP_EXTEND) {
    9192          16 :         SDValue N00 = N0.getOperand(0);
    9193          16 :         if (N00.getOpcode() == PreferredFusedOpcode) {
    9194           8 :           SDValue N002 = N00.getOperand(2);
    9195           4 :           if (isContractableFMUL(N002))
    9196          12 :             return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
    9197          12 :                                         N002.getOperand(0), N002.getOperand(1),
    9198           4 :                                         N1);
    9199             :         }
    9200             :       }
    9201             : 
    9202             :       // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
    9203             :       //   -> (fma y, z, (fma (fpext u), (fpext v), x))
    9204         396 :       if (N1.getOpcode() == PreferredFusedOpcode) {
    9205          32 :         SDValue N12 = N1.getOperand(2);
    9206          32 :         if (N12.getOpcode() == ISD::FP_EXTEND) {
    9207           8 :           SDValue N120 = N12.getOperand(0);
    9208           4 :           if (isContractableFMUL(N120))
    9209          12 :             return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
    9210          12 :                                         N120.getOperand(0), N120.getOperand(1),
    9211           4 :                                         N0);
    9212             :         }
    9213             :       }
    9214             : 
    9215             :       // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
    9216             :       //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
    9217             :       // FIXME: This turns two single-precision and one double-precision
    9218             :       // operation into two double-precision operations, which might not be
    9219             :       // interesting for all targets, especially GPUs.
    9220         388 :       if (N1.getOpcode() == ISD::FP_EXTEND) {
    9221          16 :         SDValue N10 = N1.getOperand(0);
    9222          16 :         if (N10.getOpcode() == PreferredFusedOpcode) {
    9223           8 :           SDValue N102 = N10.getOperand(2);
    9224           4 :           if (isContractableFMUL(N102))
    9225          12 :             return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
    9226          12 :                                         N102.getOperand(0), N102.getOperand(1),
    9227           4 :                                         N0);
    9228             :         }
    9229             :       }
    9230             :     }
    9231             :   }
    9232             : 
    9233        4054 :   return SDValue();
    9234             : }
    9235             : 
    9236             : /// Try to perform FMA combining on a given FSUB node.
    9237        7305 : SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
    9238       14610 :   SDValue N0 = N->getOperand(0);
    9239       14610 :   SDValue N1 = N->getOperand(1);
    9240       14610 :   EVT VT = N->getValueType(0);
    9241       14610 :   SDLoc SL(N);
    9242             : 
    9243        7305 :   const TargetOptions &Options = DAG.getTarget().Options;
    9244             :   // Floating-point multiply-add with intermediate rounding.
    9245       14610 :   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
    9246             : 
    9247             :   // Floating-point multiply-add without intermediate rounding.
    9248             :   bool HasFMA =
    9249        9686 :       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
    9250       10480 :       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
    9251             : 
    9252             :   // No valid opcode, do not combine.
    9253        7305 :   if (!HasFMAD && !HasFMA)
    9254        4225 :     return SDValue();
    9255             : 
    9256        6477 :   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
    9257        4877 :                               Options.UnsafeFPMath || HasFMAD);
    9258             :   // If the subtraction is not contractable, do not combine.
    9259        1202 :   if (!AllowFusionGlobally && !isContractable(N))
    9260        1202 :     return SDValue();
    9261             : 
    9262        3756 :   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
    9263        3756 :   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
    9264          58 :     return SDValue();
    9265             : 
    9266             :   // Always prefer FMAD to FMA for precision.
    9267        3640 :   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
    9268        1820 :   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
    9269        1820 :   bool LookThroughFPExt = TLI.isFPExtFree(VT);
    9270             : 
    9271             :   // Is the node an FMUL and contractable either due to global flags or
    9272             :   // SDNodeFlags.
    9273             :   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
    9274        7068 :     if (N.getOpcode() != ISD::FMUL)
    9275             :       return false;
    9276         958 :     return AllowFusionGlobally || isContractable(N.getNode());
    9277        1820 :   };
    9278             : 
    9279             :   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
    9280        2155 :   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
    9281         283 :     return DAG.getNode(PreferredFusedOpcode, SL, VT,
    9282         849 :                        N0.getOperand(0), N0.getOperand(1),
    9283         849 :                        DAG.getNode(ISD::FNEG, SL, VT, N1));
    9284             :   }
    9285             : 
    9286             :   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
    9287             :   // Note: Commutes FSUB operands.
    9288        2029 :   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse()))
    9289         468 :     return DAG.getNode(PreferredFusedOpcode, SL, VT,
    9290         468 :                        DAG.getNode(ISD::FNEG, SL, VT,
    9291         936 :                                    N1.getOperand(0)),
    9292        1872 :                        N1.getOperand(1), N0);
    9293             : 
    9294             :   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
    9295        2336 :   if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
    9296         264 :       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
    9297         258 :     SDValue N00 = N0.getOperand(0).getOperand(0);
    9298         258 :     SDValue N01 = N0.getOperand(0).getOperand(1);
    9299          86 :     return DAG.getNode(PreferredFusedOpcode, SL, VT,
    9300          86 :                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
    9301         258 :                        DAG.getNode(ISD::FNEG, SL, VT, N1));
    9302             :   }
    9303             : 
    9304             :   // Look through FP_EXTEND nodes to do more combining.
    9305         983 :   if (LookThroughFPExt) {
    9306             :     // fold (fsub (fpext (fmul x, y)), z)
    9307             :     //   -> (fma (fpext x), (fpext y), (fneg z))
    9308         124 :     if (N0.getOpcode() == ISD::FP_EXTEND) {
    9309          24 :       SDValue N00 = N0.getOperand(0);
    9310          12 :       if (isContractableFMUL(N00))
    9311           2 :         return DAG.getNode(PreferredFusedOpcode, SL, VT,
    9312           2 :                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
    9313           4 :                                        N00.getOperand(0)),
    9314           2 :                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
    9315           4 :                                        N00.getOperand(1)),
    9316          12 :                            DAG.getNode(ISD::FNEG, SL, VT, N1));
    9317             :     }
    9318             : 
    9319             :     // fold (fsub x, (fpext (fmul y, z)))
    9320             :     //   -> (fma (fneg (fpext y)), (fpext z), x)
    9321             :     // Note: Commutes FSUB operands.
    9322         120 :     if (N1.getOpcode() == ISD::FP_EXTEND) {
    9323          20 :       SDValue N10 = N1.getOperand(0);
    9324          10 :       if (isContractableFMUL(N10))
    9325           2 :         return DAG.getNode(PreferredFusedOpcode, SL, VT,
    9326           2 :                            DAG.getNode(ISD::FNEG, SL, VT,
    9327           2 :                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
    9328           4 :                                                    N10.getOperand(0))),
    9329           2 :                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
    9330           4 :                                        N10.getOperand(1)),
    9331          10 :                            N0);
    9332             :     }
    9333             : 
    9334             :     // fold (fsub (fpext (fneg (fmul, x, y))), z)
    9335             :     //   -> (fneg (fma (fpext x), (fpext y), z))
    9336             :     // Note: This could be removed with appropriate canonicalization of the
    9337             :     // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
    9338             :     // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
    9339             :     // from implementing the canonicalization in visitFSUB.
    9340         116 :     if (N0.getOpcode() == ISD::FP_EXTEND) {
    9341          20 :       SDValue N00 = N0.getOperand(0);
    9342          20 :       if (N00.getOpcode() == ISD::FNEG) {
    9343           4 :         SDValue N000 = N00.getOperand(0);
    9344           2 :         if (isContractableFMUL(N000)) {
    9345           2 :           return DAG.getNode(ISD::FNEG, SL, VT,
    9346             :                              DAG.getNode(PreferredFusedOpcode, SL, VT,
    9347           2 :                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
    9348           4 :                                                      N000.getOperand(0)),
    9349           2 :                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
    9350           4 :                                                      N000.getOperand(1)),
    9351          10 :                                          N1));
    9352             :         }
    9353             :       }
    9354             :     }
    9355             : 
    9356             :     // fold (fsub (fneg (fpext (fmul, x, y))), z)
    9357             :     //   -> (fneg (fma (fpext x)), (fpext y), z)
    9358             :     // Note: This could be removed with appropriate canonicalization of the
    9359             :     // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
    9360             :     // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
    9361             :     // from implementing the canonicalization in visitFSUB.
    9362         112 :     if (N0.getOpcode() == ISD::FNEG) {
    9363           4 :       SDValue N00 = N0.getOperand(0);
    9364           4 :       if (N00.getOpcode() == ISD::FP_EXTEND) {
    9365           4 :         SDValue N000 = N00.getOperand(0);
    9366           2 :         if (isContractableFMUL(N000)) {
    9367           2 :           return DAG.getNode(ISD::FNEG, SL, VT,
    9368             :                              DAG.getNode(PreferredFusedOpcode, SL, VT,
    9369           2 :                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
    9370           4 :                                                      N000.getOperand(0)),
    9371           2 :                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
    9372           4 :                                                      N000.getOperand(1)),
    9373          10 :                                          N1));
    9374             :         }
    9375             :       }
    9376             :     }
    9377             : 
    9378             :   }
    9379             : 
    9380             :   // More folding opportunities when target permits.
    9381         975 :   if (Aggressive) {
    9382             :     // fold (fsub (fma x, y, (fmul u, v)), z)
    9383             :     //   -> (fma x, y (fma u, v, (fneg z)))
    9384             :     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
    9385             :     // are currently only supported on binary nodes.
    9386         787 :     if (Options.UnsafeFPMath && N0.getOpcode() == PreferredFusedOpcode &&
    9387         665 :         isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
    9388          29 :         N0.getOperand(2)->hasOneUse()) {
    9389           7 :       return DAG.getNode(PreferredFusedOpcode, SL, VT,
    9390          21 :                          N0.getOperand(0), N0.getOperand(1),
    9391             :                          DAG.getNode(PreferredFusedOpcode, SL, VT,
    9392          21 :                                      N0.getOperand(2).getOperand(0),
    9393          21 :                                      N0.getOperand(2).getOperand(1),
    9394           7 :                                      DAG.getNode(ISD::FNEG, SL, VT,
    9395          21 :                                                  N1)));
    9396             :     }
    9397             : 
    9398             :     // fold (fsub x, (fma y, z, (fmul u, v)))
    9399             :     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
    9400             :     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
    9401             :     // are currently only supported on binary nodes.
    9402         766 :     if (Options.UnsafeFPMath && N1.getOpcode() == PreferredFusedOpcode &&
    9403          12 :         isContractableFMUL(N1.getOperand(2))) {
    9404          12 :       SDValue N20 = N1.getOperand(2).getOperand(0);
    9405          12 :       SDValue N21 = N1.getOperand(2).getOperand(1);
    9406           4 :       return DAG.getNode(PreferredFusedOpcode, SL, VT,
    9407           4 :                          DAG.getNode(ISD::FNEG, SL, VT,
    9408           8 :                                      N1.getOperand(0)),
    9409           8 :                          N1.getOperand(1),
    9410             :                          DAG.getNode(PreferredFusedOpcode, SL, VT,
    9411           4 :                                      DAG.getNode(ISD::FNEG, SL, VT, N20),
    9412             : 
    9413          12 :                                      N21, N0));
    9414             :     }
    9415             : 
    9416         594 :     if (LookThroughFPExt) {
    9417             :       // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
    9418             :       //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
    9419         100 :       if (N0.getOpcode() == PreferredFusedOpcode) {
    9420          16 :         SDValue N02 = N0.getOperand(2);
    9421          16 :         if (N02.getOpcode() == ISD::FP_EXTEND) {
    9422           8 :           SDValue N020 = N02.getOperand(0);
    9423           4 :           if (isContractableFMUL(N020))
    9424           4 :             return DAG.getNode(PreferredFusedOpcode, SL, VT,
    9425          12 :                                N0.getOperand(0), N0.getOperand(1),
    9426             :                                DAG.getNode(PreferredFusedOpcode, SL, VT,
    9427           4 :                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
    9428           8 :                                                        N020.getOperand(0)),
    9429           4 :                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
    9430           8 :                                                        N020.getOperand(1)),
    9431           4 :                                            DAG.getNode(ISD::FNEG, SL, VT,
    9432          24 :                                                        N1)));
    9433             :         }
    9434             :       }
    9435             : 
    9436             :       // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
    9437             :       //   -> (fma (fpext x), (fpext y),
    9438             :       //           (fma (fpext u), (fpext v), (fneg z)))
    9439             :       // FIXME: This turns two single-precision and one double-precision
    9440             :       // operation into two double-precision operations, which might not be
    9441             :       // interesting for all targets, especially GPUs.
    9442          92 :       if (N0.getOpcode() == ISD::FP_EXTEND) {
    9443          16 :         SDValue N00 = N0.getOperand(0);
    9444          16 :         if (N00.getOpcode() == PreferredFusedOpcode) {
    9445           8 :           SDValue N002 = N00.getOperand(2);
    9446           4 :           if (isContractableFMUL(N002))
    9447           4 :             return DAG.getNode(PreferredFusedOpcode, SL, VT,
    9448           4 :                                DAG.getNode(ISD::FP_EXTEND, SL, VT,
    9449           8 :                                            N00.getOperand(0)),
    9450           4 :                                DAG.getNode(ISD::FP_EXTEND, SL, VT,
    9451           8 :                                            N00.getOperand(1)),
    9452             :                                DAG.getNode(PreferredFusedOpcode, SL, VT,
    9453           4 :                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
    9454           8 :                                                        N002.getOperand(0)),
    9455           4 :                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
    9456           8 :                                                        N002.getOperand(1)),
    9457           4 :                                            DAG.getNode(ISD::FNEG, SL, VT,
    9458          36 :                                                        N1)));
    9459             :         }
    9460             :       }
    9461             : 
    9462             :       // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
    9463             :       //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
    9464          92 :       if (N1.getOpcode() == PreferredFusedOpcode &&
    9465          16 :         N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
    9466          12 :         SDValue N120 = N1.getOperand(2).getOperand(0);
    9467           4 :         if (isContractableFMUL(N120)) {
    9468           8 :           SDValue N1200 = N120.getOperand(0);
    9469           8 :           SDValue N1201 = N120.getOperand(1);
    9470           4 :           return DAG.getNode(PreferredFusedOpcode, SL, VT,
    9471          12 :                              DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
    9472           8 :                              N1.getOperand(1),
    9473             :                              DAG.getNode(PreferredFusedOpcode, SL, VT,
    9474           4 :                                          DAG.getNode(ISD::FNEG, SL, VT,
    9475           4 :                                              DAG.getNode(ISD::FP_EXTEND, SL,
    9476             :                                                          VT, N1200)),
    9477           4 :                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
    9478             :                                                      N1201),
    9479          20 :                                          N0));
    9480             :         }
    9481             :       }
    9482             : 
    9483             :       // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
    9484             :       //   -> (fma (fneg (fpext y)), (fpext z),
    9485             :       //           (fma (fneg (fpext u)), (fpext v), x))
    9486             :       // FIXME: This turns two single-precision and one double-precision
    9487             :       // operation into two double-precision operations, which might not be
    9488             :       // interesting for all targets, especially GPUs.
    9489          84 :       if (N1.getOpcode() == ISD::FP_EXTEND &&
    9490          16 :         N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
    9491          12 :         SDValue N100 = N1.getOperand(0).getOperand(0);
    9492          12 :         SDValue N101 = N1.getOperand(0).getOperand(1);
    9493          12 :         SDValue N102 = N1.getOperand(0).getOperand(2);
    9494           4 :         if (isContractableFMUL(N102)) {
    9495           8 :           SDValue N1020 = N102.getOperand(0);
    9496           8 :           SDValue N1021 = N102.getOperand(1);
    9497           4 :           return DAG.getNode(PreferredFusedOpcode, SL, VT,
    9498           4 :                              DAG.getNode(ISD::FNEG, SL, VT,
    9499           4 :                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
    9500             :                                                      N100)),
    9501           4 :                              DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
    9502             :                              DAG.getNode(PreferredFusedOpcode, SL, VT,
    9503           4 :                                          DAG.getNode(ISD::FNEG, SL, VT,
    9504           4 :                                              DAG.getNode(ISD::FP_EXTEND, SL,
    9505             :                                                          VT, N1020)),
    9506           4 :                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
    9507             :                                                      N1021),
    9508          24 :                                          N0));
    9509             :         }
    9510             :       }
    9511             :     }
    9512             :   }
    9513             : 
    9514         948 :   return SDValue();
    9515             : }
    9516             : 
    9517             : /// Try to perform FMA combining on a given FMUL node based on the distributive
    9518             : /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
    9519             : /// subtraction instead of addition).
    9520       16142 : SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
    9521       32284 :   SDValue N0 = N->getOperand(0);
    9522       32284 :   SDValue N1 = N->getOperand(1);
    9523       32284 :   EVT VT = N->getValueType(0);
    9524       32284 :   SDLoc SL(N);
    9525             : 
    9526             :   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
    9527             : 
    9528       16142 :   const TargetOptions &Options = DAG.getTarget().Options;
    9529             : 
    9530             :   // The transforms below are incorrect when x == 0 and y == inf, because the
    9531             :   // intermediate multiplication produces a nan.
    9532       16142 :   if (!Options.NoInfsFPMath)
    9533       15691 :     return SDValue();
    9534             : 
    9535             :   // Floating-point multiply-add without intermediate rounding.
    9536             :   bool HasFMA =
    9537        1128 :       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
    9538        1228 :       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
    9539         851 :       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
    9540             : 
    9541             :   // Floating-point multiply-add with intermediate rounding. This can result
    9542             :   // in a less precise result due to the changed rounding order.
    9543         774 :   bool HasFMAD = Options.UnsafeFPMath &&
    9544        1350 :                  (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
    9545             : 
    9546             :   // No valid opcode, do not combine.
    9547         451 :   if (!HasFMAD && !HasFMA)
    9548         125 :     return SDValue();
    9549             : 
    9550             :   // Always prefer FMAD to FMA for precision.
    9551         326 :   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
    9552         326 :   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
    9553             : 
    9554             :   // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y)
    9555             :   // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y))
    9556         634 :   auto FuseFADD = [&](SDValue X, SDValue Y) {
    9557        1268 :     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
    9558          94 :       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
    9559          47 :       if (XC1 && XC1->isExactlyValue(+1.0))
    9560          90 :         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
    9561          48 :       if (XC1 && XC1->isExactlyValue(-1.0))
    9562          54 :         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
    9563          72 :                            DAG.getNode(ISD::FNEG, SL, VT, Y));
    9564             :     }
    9565         598 :     return SDValue();
    9566         326 :   };
    9567             : 
    9568         326 :   if (SDValue FMA = FuseFADD(N0, N1))
    9569          18 :     return FMA;
    9570         308 :   if (SDValue FMA = FuseFADD(N1, N0))
    9571          18 :     return FMA;
    9572             : 
    9573             :   // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y)
    9574             :   // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y))
    9575             :   // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y))
    9576             :   // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y)
    9577         544 :   auto FuseFSUB = [&](SDValue X, SDValue Y) {
    9578        1088 :     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
    9579         216 :       auto XC0 = isConstOrConstSplatFP(X.getOperand(0));
    9580         108 :       if (XC0 && XC0->isExactlyValue(+1.0))
    9581         336 :         return DAG.getNode(PreferredFusedOpcode, SL, VT,
    9582         156 :                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
    9583         156 :                            Y);
    9584          76 :       if (XC0 && XC0->isExactlyValue(-1.0))
    9585          18 :         return DAG.getNode(PreferredFusedOpcode, SL, VT,
    9586          54 :                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
    9587         108 :                            DAG.getNode(ISD::FNEG, SL, VT, Y));
    9588             : 
    9589          76 :       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
    9590          74 :       if (XC1 && XC1->isExactlyValue(+1.0))
    9591          54 :         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
    9592          72 :                            DAG.getNode(ISD::FNEG, SL, VT, Y));
    9593          38 :       if (XC1 && XC1->isExactlyValue(-1.0))
    9594          54 :         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
    9595             :     }
    9596         438 :     return SDValue();
    9597         290 :   };
    9598             : 
    9599         290 :   if (SDValue FMA = FuseFSUB(N0, N1))
    9600          36 :     return FMA;
    9601         254 :   if (SDValue FMA = FuseFSUB(N1, N0))
    9602          70 :     return FMA;
    9603             : 
    9604         184 :   return SDValue();
    9605             : }
    9606             : 
    9607       53465 : static bool isFMulNegTwo(SDValue &N) {
    9608      106930 :   if (N.getOpcode() != ISD::FMUL)
    9609             :     return false;
    9610       12296 :   if (ConstantFPSDNode *CFP = isConstOrConstSplatFP(N.getOperand(1)))
    9611         621 :     return CFP->isExactlyValue(-2.0);
    9612             :   return false;
    9613             : }
    9614             : 
    9615       27222 : SDValue DAGCombiner::visitFADD(SDNode *N) {
    9616       54444 :   SDValue N0 = N->getOperand(0);
    9617       54444 :   SDValue N1 = N->getOperand(1);
    9618       54444 :   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
    9619       54444 :   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
    9620       54444 :   EVT VT = N->getValueType(0);
    9621       54444 :   SDLoc DL(N);
    9622       27222 :   const TargetOptions &Options = DAG.getTarget().Options;
    9623       54444 :   const SDNodeFlags Flags = N->getFlags();
    9624             : 
    9625             :   // fold vector ops
    9626       27222 :   if (VT.isVector())
    9627        9492 :     if (SDValue FoldedVOp = SimplifyVBinOp(N))
    9628           2 :       return FoldedVOp;
    9629             : 
    9630             :   // fold (fadd c1, c2) -> c1 + c2
    9631       27220 :   if (N0CFP && N1CFP)
    9632           3 :     return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
    9633             : 
    9634             :   // canonicalize constant to RHS
    9635       27217 :   if (N0CFP && !N1CFP)
    9636         205 :     return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
    9637             : 
    9638       27012 :   if (SDValue NewSel = foldBinOpIntoSelect(N))
    9639           5 :     return NewSel;
    9640             : 
    9641             :   // fold (fadd A, (fneg B)) -> (fsub A, B)
    9642       51366 :   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
    9643       24359 :       isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
    9644         123 :     return DAG.getNode(ISD::FSUB, DL, VT, N0,
    9645         123 :                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
    9646             : 
    9647             :   // fold (fadd (fneg A), B) -> (fsub B, A)
    9648       63911 :   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
    9649       24236 :       isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
    9650         159 :     return DAG.getNode(ISD::FSUB, DL, VT, N1,
    9651         159 :                        GetNegatedExpression(N0, DAG, LegalOperations), Flags);
    9652             : 
    9653             :   // fold (fadd A, (fmul B, -2.0)) -> (fsub A, (fadd B, B))
    9654             :   // fold (fadd (fmul B, -2.0), A) -> (fsub A, (fadd B, B))
    9655       53464 :   if ((isFMulNegTwo(N0) && N0.hasOneUse()) ||
    9656       26745 :       (isFMulNegTwo(N1) && N1.hasOneUse())) {
    9657          29 :     bool N1IsFMul = isFMulNegTwo(N1);
    9658          73 :     SDValue AddOp = N1IsFMul ? N1.getOperand(0) : N0.getOperand(0);
    9659          29 :     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, AddOp, AddOp, Flags);