LCOV - code coverage report
Current view: top level - lib/CodeGen/SelectionDAG - DAGCombiner.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 7106 8489 83.7 %
Date: 2018-10-20 13:21:21 Functions: 217 282 77.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
      11             : // both before and after the DAG is legalized.
      12             : //
      13             : // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
      14             : // primarily intended to handle simplification opportunities that are implicit
      15             : // in the LLVM IR and exposed by the various codegen lowering phases.
      16             : //
      17             : //===----------------------------------------------------------------------===//
      18             : 
      19             : #include "llvm/ADT/APFloat.h"
      20             : #include "llvm/ADT/APInt.h"
      21             : #include "llvm/ADT/ArrayRef.h"
      22             : #include "llvm/ADT/DenseMap.h"
      23             : #include "llvm/ADT/None.h"
      24             : #include "llvm/ADT/Optional.h"
      25             : #include "llvm/ADT/STLExtras.h"
      26             : #include "llvm/ADT/SetVector.h"
      27             : #include "llvm/ADT/SmallBitVector.h"
      28             : #include "llvm/ADT/SmallPtrSet.h"
      29             : #include "llvm/ADT/SmallSet.h"
      30             : #include "llvm/ADT/SmallVector.h"
      31             : #include "llvm/ADT/Statistic.h"
      32             : #include "llvm/Analysis/AliasAnalysis.h"
      33             : #include "llvm/Analysis/MemoryLocation.h"
      34             : #include "llvm/CodeGen/DAGCombine.h"
      35             : #include "llvm/CodeGen/ISDOpcodes.h"
      36             : #include "llvm/CodeGen/MachineFrameInfo.h"
      37             : #include "llvm/CodeGen/MachineFunction.h"
      38             : #include "llvm/CodeGen/MachineMemOperand.h"
      39             : #include "llvm/CodeGen/RuntimeLibcalls.h"
      40             : #include "llvm/CodeGen/SelectionDAG.h"
      41             : #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
      42             : #include "llvm/CodeGen/SelectionDAGNodes.h"
      43             : #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
      44             : #include "llvm/CodeGen/TargetLowering.h"
      45             : #include "llvm/CodeGen/TargetRegisterInfo.h"
      46             : #include "llvm/CodeGen/TargetSubtargetInfo.h"
      47             : #include "llvm/CodeGen/ValueTypes.h"
      48             : #include "llvm/IR/Attributes.h"
      49             : #include "llvm/IR/Constant.h"
      50             : #include "llvm/IR/DataLayout.h"
      51             : #include "llvm/IR/DerivedTypes.h"
      52             : #include "llvm/IR/Function.h"
      53             : #include "llvm/IR/LLVMContext.h"
      54             : #include "llvm/IR/Metadata.h"
      55             : #include "llvm/Support/Casting.h"
      56             : #include "llvm/Support/CodeGen.h"
      57             : #include "llvm/Support/CommandLine.h"
      58             : #include "llvm/Support/Compiler.h"
      59             : #include "llvm/Support/Debug.h"
      60             : #include "llvm/Support/ErrorHandling.h"
      61             : #include "llvm/Support/KnownBits.h"
      62             : #include "llvm/Support/MachineValueType.h"
      63             : #include "llvm/Support/MathExtras.h"
      64             : #include "llvm/Support/raw_ostream.h"
      65             : #include "llvm/Target/TargetMachine.h"
      66             : #include "llvm/Target/TargetOptions.h"
      67             : #include <algorithm>
      68             : #include <cassert>
      69             : #include <cstdint>
      70             : #include <functional>
      71             : #include <iterator>
      72             : #include <string>
      73             : #include <tuple>
      74             : #include <utility>
      75             : 
      76             : using namespace llvm;
      77             : 
      78             : #define DEBUG_TYPE "dagcombine"
      79             : 
      80             : STATISTIC(NodesCombined   , "Number of dag nodes combined");
      81             : STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
      82             : STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
      83             : STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
      84             : STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
      85             : STATISTIC(SlicedLoads, "Number of load sliced");
      86             : STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
      87             : 
      88             : static cl::opt<bool>
      89             : CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
      90             :                  cl::desc("Enable DAG combiner's use of IR alias analysis"));
      91             : 
      92             : static cl::opt<bool>
      93             : UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
      94             :         cl::desc("Enable DAG combiner's use of TBAA"));
      95             : 
      96             : #ifndef NDEBUG
      97             : static cl::opt<std::string>
      98             : CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
      99             :                    cl::desc("Only use DAG-combiner alias analysis in this"
     100             :                             " function"));
     101             : #endif
     102             : 
     103             : /// Hidden option to stress test load slicing, i.e., when this option
     104             : /// is enabled, load slicing bypasses most of its profitability guards.
     105             : static cl::opt<bool>
     106             : StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
     107             :                   cl::desc("Bypass the profitability model of load slicing"),
     108             :                   cl::init(false));
     109             : 
     110             : static cl::opt<bool>
     111             :   MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
     112             :                     cl::desc("DAG combiner may split indexing from loads"));
     113             : 
     114             : namespace {
     115             : 
     116             :   class DAGCombiner {
     117             :     SelectionDAG &DAG;
     118             :     const TargetLowering &TLI;
     119             :     CombineLevel Level;
     120             :     CodeGenOpt::Level OptLevel;
     121             :     bool LegalOperations = false;
     122             :     bool LegalTypes = false;
     123             :     bool ForCodeSize;
     124             : 
     125             :     /// Worklist of all of the nodes that need to be simplified.
     126             :     ///
     127             :     /// This must behave as a stack -- new nodes to process are pushed onto the
     128             :     /// back and when processing we pop off of the back.
     129             :     ///
     130             :     /// The worklist will not contain duplicates but may contain null entries
     131             :     /// due to nodes being deleted from the underlying DAG.
     132             :     SmallVector<SDNode *, 64> Worklist;
     133             : 
     134             :     /// Mapping from an SDNode to its position on the worklist.
     135             :     ///
     136             :     /// This is used to find and remove nodes from the worklist (by nulling
     137             :     /// them) when they are deleted from the underlying DAG. It relies on
     138             :     /// stable indices of nodes within the worklist.
     139             :     DenseMap<SDNode *, unsigned> WorklistMap;
     140             : 
     141             :     /// Set of nodes which have been combined (at least once).
     142             :     ///
     143             :     /// This is used to allow us to reliably add any operands of a DAG node
     144             :     /// which have not yet been combined to the worklist.
     145             :     SmallPtrSet<SDNode *, 32> CombinedNodes;
     146             : 
     147             :     // AA - Used for DAG load/store alias analysis.
     148             :     AliasAnalysis *AA;
     149             : 
     150             :     /// When an instruction is simplified, add all users of the instruction to
     151             :     /// the work lists because they might get more simplified now.
     152             :     void AddUsersToWorklist(SDNode *N) {
     153     9474537 :       for (SDNode *Node : N->uses())
     154     6277668 :         AddToWorklist(Node);
     155             :     }
     156             : 
     157             :     /// Call the node-specific routine that folds each particular type of node.
     158             :     SDValue visit(SDNode *N);
     159             : 
     160             :   public:
     161     2767992 :     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
     162     5535984 :         : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
     163     5535984 :           OptLevel(OL), AA(AA) {
     164     2767992 :       ForCodeSize = DAG.getMachineFunction().getFunction().optForSize();
     165             : 
     166     2767992 :       MaximumLegalStoreInBits = 0;
     167   315551088 :       for (MVT VT : MVT::all_valuetypes())
     168   312783096 :         if (EVT(VT).isSimple() && VT != MVT::Other &&
     169   310015104 :             TLI.isTypeLegal(EVT(VT)) &&
     170    41626409 :             VT.getSizeInBits() >= MaximumLegalStoreInBits)
     171    34327350 :           MaximumLegalStoreInBits = VT.getSizeInBits();
     172     2767992 :     }
     173             : 
     174             :     /// Add to the worklist making sure its instance is at the back (next to be
     175             :     /// processed.)
     176   249948491 :     void AddToWorklist(SDNode *N) {
     177             :       assert(N->getOpcode() != ISD::DELETED_NODE &&
     178             :              "Deleted Node added to Worklist");
     179             : 
     180             :       // Skip handle nodes as they can't usefully be combined and confuse the
     181             :       // zero-use deletion strategy.
     182   249948491 :       if (N->getOpcode() == ISD::HANDLENODE)
     183             :         return;
     184             : 
     185   249922932 :       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
     186    93035691 :         Worklist.push_back(N);
     187             :     }
     188             : 
     189             :     /// Remove all instances of N from the worklist.
     190     7859353 :     void removeFromWorklist(SDNode *N) {
     191             :       CombinedNodes.erase(N);
     192             : 
     193     7859353 :       auto It = WorklistMap.find(N);
     194     7859353 :       if (It == WorklistMap.end())
     195     5403160 :         return; // Not in the worklist.
     196             : 
     197             :       // Null out the entry rather than erasing it to avoid a linear operation.
     198     4912386 :       Worklist[It->second] = nullptr;
     199             :       WorklistMap.erase(It);
     200             :     }
     201             : 
     202             :     void deleteAndRecombine(SDNode *N);
     203             :     bool recursivelyDeleteUnusedNodes(SDNode *N);
     204             : 
     205             :     /// Replaces all uses of the results of one DAG node with new values.
     206             :     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
     207             :                       bool AddTo = true);
     208             : 
     209             :     /// Replaces all uses of the results of one DAG node with new values.
     210             :     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
     211      457951 :       return CombineTo(N, &Res, 1, AddTo);
     212             :     }
     213             : 
     214             :     /// Replaces all uses of the results of one DAG node with new values.
     215             :     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
     216             :                       bool AddTo = true) {
     217      234659 :       SDValue To[] = { Res0, Res1 };
     218        7250 :       return CombineTo(N, To, 2, AddTo);
     219             :     }
     220             : 
     221             :     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
     222             : 
     223             :   private:
     224             :     unsigned MaximumLegalStoreInBits;
     225             : 
     226             :     /// Check the specified integer node value to see if it can be simplified or
     227             :     /// if things it uses can be simplified by bit propagation.
     228             :     /// If so, return true.
     229     4835936 :     bool SimplifyDemandedBits(SDValue Op) {
     230     4835936 :       unsigned BitWidth = Op.getScalarValueSizeInBits();
     231     4835936 :       APInt Demanded = APInt::getAllOnesValue(BitWidth);
     232     4835936 :       return SimplifyDemandedBits(Op, Demanded);
     233             :     }
     234             : 
     235             :     /// Check the specified vector node value to see if it can be simplified or
     236             :     /// if things it uses can be simplified as it only uses some of the
     237             :     /// elements. If so, return true.
     238      135492 :     bool SimplifyDemandedVectorElts(SDValue Op) {
     239      270984 :       unsigned NumElts = Op.getValueType().getVectorNumElements();
     240      135492 :       APInt Demanded = APInt::getAllOnesValue(NumElts);
     241      135492 :       return SimplifyDemandedVectorElts(Op, Demanded);
     242             :     }
     243             : 
     244             :     bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
     245             :     bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
     246             :                                     bool AssumeSingleUse = false);
     247             : 
     248             :     bool CombineToPreIndexedLoadStore(SDNode *N);
     249             :     bool CombineToPostIndexedLoadStore(SDNode *N);
     250             :     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
     251             :     bool SliceUpLoad(SDNode *N);
     252             : 
     253             :     // Scalars have size 0 to distinguish from singleton vectors.
     254             :     SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
     255             :     bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
     256             :     bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
     257             : 
     258             :     /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
     259             :     ///   load.
     260             :     ///
     261             :     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
     262             :     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
     263             :     /// \param EltNo index of the vector element to load.
     264             :     /// \param OriginalLoad load that EVE came from to be replaced.
     265             :     /// \returns EVE on success SDValue() on failure.
     266             :     SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
     267             :         SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
     268             :     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
     269             :     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
     270             :     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
     271             :     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
     272             :     SDValue PromoteIntBinOp(SDValue Op);
     273             :     SDValue PromoteIntShiftOp(SDValue Op);
     274             :     SDValue PromoteExtend(SDValue Op);
     275             :     bool PromoteLoad(SDValue Op);
     276             : 
     277             :     /// Call the node-specific routine that knows how to fold each
     278             :     /// particular type of node. If that doesn't do anything, try the
     279             :     /// target-specific DAG combines.
     280             :     SDValue combine(SDNode *N);
     281             : 
     282             :     // Visitation implementation - Implement dag node combining for different
     283             :     // node types.  The semantics are as follows:
     284             :     // Return Value:
     285             :     //   SDValue.getNode() == 0 - No change was made
     286             :     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
     287             :     //   otherwise              - N should be replaced by the returned Operand.
     288             :     //
     289             :     SDValue visitTokenFactor(SDNode *N);
     290             :     SDValue visitMERGE_VALUES(SDNode *N);
     291             :     SDValue visitADD(SDNode *N);
     292             :     SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
     293             :     SDValue visitSUB(SDNode *N);
     294             :     SDValue visitADDC(SDNode *N);
     295             :     SDValue visitUADDO(SDNode *N);
     296             :     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
     297             :     SDValue visitSUBC(SDNode *N);
     298             :     SDValue visitUSUBO(SDNode *N);
     299             :     SDValue visitADDE(SDNode *N);
     300             :     SDValue visitADDCARRY(SDNode *N);
     301             :     SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
     302             :     SDValue visitSUBE(SDNode *N);
     303             :     SDValue visitSUBCARRY(SDNode *N);
     304             :     SDValue visitMUL(SDNode *N);
     305             :     SDValue useDivRem(SDNode *N);
     306             :     SDValue visitSDIV(SDNode *N);
     307             :     SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
     308             :     SDValue visitUDIV(SDNode *N);
     309             :     SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
     310             :     SDValue visitREM(SDNode *N);
     311             :     SDValue visitMULHU(SDNode *N);
     312             :     SDValue visitMULHS(SDNode *N);
     313             :     SDValue visitSMUL_LOHI(SDNode *N);
     314             :     SDValue visitUMUL_LOHI(SDNode *N);
     315             :     SDValue visitSMULO(SDNode *N);
     316             :     SDValue visitUMULO(SDNode *N);
     317             :     SDValue visitIMINMAX(SDNode *N);
     318             :     SDValue visitAND(SDNode *N);
     319             :     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
     320             :     SDValue visitOR(SDNode *N);
     321             :     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
     322             :     SDValue visitXOR(SDNode *N);
     323             :     SDValue SimplifyVBinOp(SDNode *N);
     324             :     SDValue visitSHL(SDNode *N);
     325             :     SDValue visitSRA(SDNode *N);
     326             :     SDValue visitSRL(SDNode *N);
     327             :     SDValue visitRotate(SDNode *N);
     328             :     SDValue visitABS(SDNode *N);
     329             :     SDValue visitBSWAP(SDNode *N);
     330             :     SDValue visitBITREVERSE(SDNode *N);
     331             :     SDValue visitCTLZ(SDNode *N);
     332             :     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
     333             :     SDValue visitCTTZ(SDNode *N);
     334             :     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
     335             :     SDValue visitCTPOP(SDNode *N);
     336             :     SDValue visitSELECT(SDNode *N);
     337             :     SDValue visitVSELECT(SDNode *N);
     338             :     SDValue visitSELECT_CC(SDNode *N);
     339             :     SDValue visitSETCC(SDNode *N);
     340             :     SDValue visitSETCCCARRY(SDNode *N);
     341             :     SDValue visitSIGN_EXTEND(SDNode *N);
     342             :     SDValue visitZERO_EXTEND(SDNode *N);
     343             :     SDValue visitANY_EXTEND(SDNode *N);
     344             :     SDValue visitAssertExt(SDNode *N);
     345             :     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
     346             :     SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
     347             :     SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
     348             :     SDValue visitTRUNCATE(SDNode *N);
     349             :     SDValue visitBITCAST(SDNode *N);
     350             :     SDValue visitBUILD_PAIR(SDNode *N);
     351             :     SDValue visitFADD(SDNode *N);
     352             :     SDValue visitFSUB(SDNode *N);
     353             :     SDValue visitFMUL(SDNode *N);
     354             :     SDValue visitFMA(SDNode *N);
     355             :     SDValue visitFDIV(SDNode *N);
     356             :     SDValue visitFREM(SDNode *N);
     357             :     SDValue visitFSQRT(SDNode *N);
     358             :     SDValue visitFCOPYSIGN(SDNode *N);
     359             :     SDValue visitFPOW(SDNode *N);
     360             :     SDValue visitSINT_TO_FP(SDNode *N);
     361             :     SDValue visitUINT_TO_FP(SDNode *N);
     362             :     SDValue visitFP_TO_SINT(SDNode *N);
     363             :     SDValue visitFP_TO_UINT(SDNode *N);
     364             :     SDValue visitFP_ROUND(SDNode *N);
     365             :     SDValue visitFP_ROUND_INREG(SDNode *N);
     366             :     SDValue visitFP_EXTEND(SDNode *N);
     367             :     SDValue visitFNEG(SDNode *N);
     368             :     SDValue visitFABS(SDNode *N);
     369             :     SDValue visitFCEIL(SDNode *N);
     370             :     SDValue visitFTRUNC(SDNode *N);
     371             :     SDValue visitFFLOOR(SDNode *N);
     372             :     SDValue visitFMINNUM(SDNode *N);
     373             :     SDValue visitFMAXNUM(SDNode *N);
     374             :     SDValue visitBRCOND(SDNode *N);
     375             :     SDValue visitBR_CC(SDNode *N);
     376             :     SDValue visitLOAD(SDNode *N);
     377             : 
     378             :     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
     379             :     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
     380             : 
     381             :     SDValue visitSTORE(SDNode *N);
     382             :     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
     383             :     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
     384             :     SDValue visitBUILD_VECTOR(SDNode *N);
     385             :     SDValue visitCONCAT_VECTORS(SDNode *N);
     386             :     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
     387             :     SDValue visitVECTOR_SHUFFLE(SDNode *N);
     388             :     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
     389             :     SDValue visitINSERT_SUBVECTOR(SDNode *N);
     390             :     SDValue visitMLOAD(SDNode *N);
     391             :     SDValue visitMSTORE(SDNode *N);
     392             :     SDValue visitMGATHER(SDNode *N);
     393             :     SDValue visitMSCATTER(SDNode *N);
     394             :     SDValue visitFP_TO_FP16(SDNode *N);
     395             :     SDValue visitFP16_TO_FP(SDNode *N);
     396             : 
     397             :     SDValue visitFADDForFMACombine(SDNode *N);
     398             :     SDValue visitFSUBForFMACombine(SDNode *N);
     399             :     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
     400             : 
     401             :     SDValue XformToShuffleWithZero(SDNode *N);
     402             :     SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
     403             :                            SDValue N1, SDNodeFlags Flags);
     404             : 
     405             :     SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
     406             : 
     407             :     SDValue foldSelectOfConstants(SDNode *N);
     408             :     SDValue foldVSelectOfConstants(SDNode *N);
     409             :     SDValue foldBinOpIntoSelect(SDNode *BO);
     410             :     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
     411             :     SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
     412             :     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
     413             :     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
     414             :                              SDValue N2, SDValue N3, ISD::CondCode CC,
     415             :                              bool NotExtCompare = false);
     416             :     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
     417             :                                    SDValue N2, SDValue N3, ISD::CondCode CC);
     418             :     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
     419             :                               const SDLoc &DL);
     420             :     SDValue unfoldMaskedMerge(SDNode *N);
     421             :     SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
     422             :     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
     423             :                           const SDLoc &DL, bool foldBooleans);
     424             :     SDValue rebuildSetCC(SDValue N);
     425             : 
     426             :     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
     427             :                            SDValue &CC) const;
     428             :     bool isOneUseSetCC(SDValue N) const;
     429             : 
     430             :     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
     431             :                                          unsigned HiOp);
     432             :     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
     433             :     SDValue CombineExtLoad(SDNode *N);
     434             :     SDValue CombineZExtLogicopShiftLoad(SDNode *N);
     435             :     SDValue combineRepeatedFPDivisors(SDNode *N);
     436             :     SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
     437             :     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
     438             :     SDValue BuildSDIV(SDNode *N);
     439             :     SDValue BuildSDIVPow2(SDNode *N);
     440             :     SDValue BuildUDIV(SDNode *N);
     441             :     SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
     442             :     SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
     443             :     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
     444             :     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
     445             :     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
     446             :     SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
     447             :                                 SDNodeFlags Flags, bool Reciprocal);
     448             :     SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
     449             :                                 SDNodeFlags Flags, bool Reciprocal);
     450             :     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
     451             :                                bool DemandHighBits = true);
     452             :     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
     453             :     SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
     454             :                               SDValue InnerPos, SDValue InnerNeg,
     455             :                               unsigned PosOpcode, unsigned NegOpcode,
     456             :                               const SDLoc &DL);
     457             :     SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
     458             :     SDValue MatchLoadCombine(SDNode *N);
     459             :     SDValue ReduceLoadWidth(SDNode *N);
     460             :     SDValue ReduceLoadOpStoreWidth(SDNode *N);
     461             :     SDValue splitMergedValStore(StoreSDNode *ST);
     462             :     SDValue TransformFPLoadStorePair(SDNode *N);
     463             :     SDValue convertBuildVecZextToZext(SDNode *N);
     464             :     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
     465             :     SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
     466             :     SDValue reduceBuildVecToShuffle(SDNode *N);
     467             :     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
     468             :                                   ArrayRef<int> VectorMask, SDValue VecIn1,
     469             :                                   SDValue VecIn2, unsigned LeftIdx);
     470             :     SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
     471             : 
     472             :     /// Walk up chain skipping non-aliasing memory nodes,
     473             :     /// looking for aliasing nodes and adding them to the Aliases vector.
     474             :     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
     475             :                           SmallVectorImpl<SDValue> &Aliases);
     476             : 
     477             :     /// Return true if there is any possibility that the two addresses overlap.
     478             :     bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
     479             : 
     480             :     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
     481             :     /// chain (aliasing node.)
     482             :     SDValue FindBetterChain(SDNode *N, SDValue Chain);
     483             : 
     484             :     /// Try to replace a store and any possibly adjacent stores on
     485             :     /// consecutive chains with better chains. Return true only if St is
     486             :     /// replaced.
     487             :     ///
     488             :     /// Notice that other chains may still be replaced even if the function
     489             :     /// returns false.
     490             :     bool findBetterNeighborChains(StoreSDNode *St);
     491             : 
     492             :     /// Holds a pointer to an LSBaseSDNode as well as information on where it
     493             :     /// is located in a sequence of memory operations connected by a chain.
     494             :     struct MemOpLink {
     495             :       // Ptr to the mem node.
     496             :       LSBaseSDNode *MemNode;
     497             : 
     498             :       // Offset from the base ptr.
     499             :       int64_t OffsetFromBase;
     500             : 
     501             :       MemOpLink(LSBaseSDNode *N, int64_t Offset)
     502     1004148 :           : MemNode(N), OffsetFromBase(Offset) {}
     503             :     };
     504             : 
     505             :     /// This is a helper function for visitMUL to check the profitability
     506             :     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
     507             :     /// MulNode is the original multiply, AddNode is (add x, c1),
     508             :     /// and ConstNode is c2.
     509             :     bool isMulAddWithConstProfitable(SDNode *MulNode,
     510             :                                      SDValue &AddNode,
     511             :                                      SDValue &ConstNode);
     512             : 
     513             :     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
     514             :     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
     515             :     /// the type of the loaded value to be extended.
     516             :     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
     517             :                           EVT LoadResultTy, EVT &ExtVT);
     518             : 
     519             :     /// Helper function to calculate whether the given Load/Store can have its
     520             :     /// width reduced to ExtVT.
     521             :     bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
     522             :                            EVT &MemVT, unsigned ShAmt = 0);
     523             : 
     524             :     /// Used by BackwardsPropagateMask to find suitable loads.
     525             :     bool SearchForAndLoads(SDNode *N, SmallPtrSetImpl<LoadSDNode*> &Loads,
     526             :                            SmallPtrSetImpl<SDNode*> &NodesWithConsts,
     527             :                            ConstantSDNode *Mask, SDNode *&NodeToMask);
     528             :     /// Attempt to propagate a given AND node back to load leaves so that they
     529             :     /// can be combined into narrow loads.
     530             :     bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
     531             : 
     532             :     /// Helper function for MergeConsecutiveStores which merges the
     533             :     /// component store chains.
     534             :     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
     535             :                                 unsigned NumStores);
     536             : 
     537             :     /// This is a helper function for MergeConsecutiveStores. When the
     538             :     /// source elements of the consecutive stores are all constants or
     539             :     /// all extracted vector elements, try to merge them into one
     540             :     /// larger store introducing bitcasts if necessary.  \return True
     541             :     /// if a merged store was created.
     542             :     bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
     543             :                                          EVT MemVT, unsigned NumStores,
     544             :                                          bool IsConstantSrc, bool UseVector,
     545             :                                          bool UseTrunc);
     546             : 
     547             :     /// This is a helper function for MergeConsecutiveStores. Stores
     548             :     /// that potentially may be merged with St are placed in
     549             :     /// StoreNodes. RootNode is a chain predecessor to all store
     550             :     /// candidates.
     551             :     void getStoreMergeCandidates(StoreSDNode *St,
     552             :                                  SmallVectorImpl<MemOpLink> &StoreNodes,
     553             :                                  SDNode *&Root);
     554             : 
     555             :     /// Helper function for MergeConsecutiveStores. Checks if
     556             :     /// candidate stores have indirect dependency through their
     557             :     /// operands. RootNode is the predecessor to all stores calculated
     558             :     /// by getStoreMergeCandidates and is used to prune the dependency check.
     559             :     /// \return True if safe to merge.
     560             :     bool checkMergeStoreCandidatesForDependencies(
     561             :         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
     562             :         SDNode *RootNode);
     563             : 
     564             :     /// Merge consecutive store operations into a wide store.
     565             :     /// This optimization uses wide integers or vectors when possible.
     566             :     /// \return number of stores that were merged into a merged store (the
     567             :     /// affected nodes are stored as a prefix in \p StoreNodes).
     568             :     bool MergeConsecutiveStores(StoreSDNode *St);
     569             : 
     570             :     /// Try to transform a truncation where C is a constant:
     571             :     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
     572             :     ///
     573             :     /// \p N needs to be a truncation and its first operand an AND. Other
     574             :     /// requirements are checked by the function (e.g. that trunc is
     575             :     /// single-use) and if missed an empty SDValue is returned.
     576             :     SDValue distributeTruncateThroughAnd(SDNode *N);
     577             : 
     578             :     /// Helper function to determine whether the target supports operation
     579             :     /// given by \p Opcode for type \p VT, that is, whether the operation
     580             :     /// is legal or custom before legalizing operations, and whether is
     581             :     /// legal (but not custom) after legalization.
     582           0 :     bool hasOperation(unsigned Opcode, EVT VT) {
     583           0 :       if (LegalOperations)
     584           0 :         return TLI.isOperationLegal(Opcode, VT);
     585           0 :       return TLI.isOperationLegalOrCustom(Opcode, VT);
     586             :     }
     587             : 
     588             :   public:
     589             :     /// Runs the dag combiner on all nodes in the work list
     590             :     void Run(CombineLevel AtLevel);
     591             : 
     592           0 :     SelectionDAG &getDAG() const { return DAG; }
     593             : 
     594             :     /// Returns a type large enough to hold any valid shift amount - before type
     595             :     /// legalization these can be huge.
     596        8588 :     EVT getShiftAmountTy(EVT LHSTy) {
     597             :       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
     598        8588 :       return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
     599             :     }
     600             : 
     601             :     /// This method returns true if we are running before type legalization or
     602             :     /// if the specified VT is legal.
     603           0 :     bool isTypeLegal(const EVT &VT) {
     604           0 :       if (!LegalTypes) return true;
     605       15639 :       return TLI.isTypeLegal(VT);
     606             :     }
     607             : 
     608             :     /// Convenience wrapper around TargetLowering::getSetCCResultType
     609           0 :     EVT getSetCCResultType(EVT VT) const {
     610           0 :       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
     611             :     }
     612             : 
     613             :     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
     614             :                          SDValue OrigLoad, SDValue ExtLoad,
     615             :                          ISD::NodeType ExtType);
     616             :   };
     617             : 
     618             : /// This class is a DAGUpdateListener that removes any deleted
     619             : /// nodes from the worklist.
     620        4652 : class WorklistRemover : public SelectionDAG::DAGUpdateListener {
     621             :   DAGCombiner &DC;
     622             : 
     623             : public:
     624             :   explicit WorklistRemover(DAGCombiner &dc)
     625    89098941 :     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
     626             : 
     627       24661 :   void NodeDeleted(SDNode *N, SDNode *E) override {
     628       24661 :     DC.removeFromWorklist(N);
     629       24661 :   }
     630             : };
     631             : 
     632             : } // end anonymous namespace
     633             : 
     634             : //===----------------------------------------------------------------------===//
     635             : //  TargetLowering::DAGCombinerInfo implementation
     636             : //===----------------------------------------------------------------------===//
     637             : 
     638       12454 : void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
     639       12454 :   ((DAGCombiner*)DC)->AddToWorklist(N);
     640       12454 : }
     641             : 
     642         864 : SDValue TargetLowering::DAGCombinerInfo::
     643             : CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
     644         864 :   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
     645             : }
     646             : 
     647        1079 : SDValue TargetLowering::DAGCombinerInfo::
     648             : CombineTo(SDNode *N, SDValue Res, bool AddTo) {
     649        1079 :   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
     650             : }
     651             : 
     652        5893 : SDValue TargetLowering::DAGCombinerInfo::
     653             : CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
     654        5893 :   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
     655             : }
     656             : 
     657        2445 : void TargetLowering::DAGCombinerInfo::
     658             : CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
     659        2445 :   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
     660             : }
     661             : 
     662             : //===----------------------------------------------------------------------===//
     663             : // Helper Functions
     664             : //===----------------------------------------------------------------------===//
     665             : 
     666     1452273 : void DAGCombiner::deleteAndRecombine(SDNode *N) {
     667     1452273 :   removeFromWorklist(N);
     668             : 
     669             :   // If the operands of this node are only used by the node, they will now be
     670             :   // dead. Make sure to re-visit them and recursively delete dead nodes.
     671     5512790 :   for (const SDValue &Op : N->ops())
     672             :     // For an operand generating multiple values, one of the values may
     673             :     // become dead allowing further simplification (e.g. split index
     674             :     // arithmetic from an indexed load).
     675     7900142 :     if (Op->hasOneUse() || Op->getNumValues() > 1)
     676     1096096 :       AddToWorklist(Op.getNode());
     677             : 
     678     1452273 :   DAG.DeleteNode(N);
     679     1452273 : }
     680             : 
     681             : /// Return 1 if we can compute the negated form of the specified expression for
     682             : /// the same cost as the expression itself, or 2 if we can compute the negated
     683             : /// form more cheaply than the expression itself.
     684      214881 : static char isNegatibleForFree(SDValue Op, bool LegalOperations,
     685             :                                const TargetLowering &TLI,
     686             :                                const TargetOptions *Options,
     687             :                                unsigned Depth = 0) {
     688             :   // fneg is removable even if it has multiple uses.
     689      214881 :   if (Op.getOpcode() == ISD::FNEG) return 2;
     690             : 
     691             :   // Don't allow anything with multiple uses unless we know it is free.
     692      213930 :   EVT VT = Op.getValueType();
     693      213930 :   const SDNodeFlags Flags = Op->getFlags();
     694      213930 :   if (!Op.hasOneUse())
     695       59743 :     if (!(Op.getOpcode() == ISD::FP_EXTEND &&
     696         957 :           TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
     697       59418 :       return 0;
     698             : 
     699             :   // Don't recurse exponentially.
     700      154512 :   if (Depth > 6) return 0;
     701             : 
     702      151467 :   switch (Op.getOpcode()) {
     703             :   default: return false;
     704        4578 :   case ISD::ConstantFP: {
     705        4578 :     if (!LegalOperations)
     706             :       return 1;
     707             : 
     708             :     // Don't invert constant FP values after legalization unless the target says
     709             :     // the negated constant is legal.
     710         209 :     return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
     711        1800 :       TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
     712             :   }
     713        8935 :   case ISD::FADD:
     714        8935 :     if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros())
     715             :       return 0;
     716             : 
     717             :     // After operation legalization, it might not be legal to create new FSUBs.
     718        1545 :     if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
     719           0 :       return 0;
     720             : 
     721             :     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
     722        3090 :     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
     723             :                                     Options, Depth + 1))
     724        1523 :       return V;
     725             :     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
     726        1523 :     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
     727        1523 :                               Depth + 1);
     728        2571 :   case ISD::FSUB:
     729             :     // We can't turn -(A-B) into B-A when we honor signed zeros.
     730        2571 :     if (!Options->NoSignedZerosFPMath &&
     731             :         !Flags.hasNoSignedZeros())
     732        1706 :       return 0;
     733             : 
     734             :     // fold (fneg (fsub A, B)) -> (fsub B, A)
     735             :     return 1;
     736             : 
     737       36030 :   case ISD::FMUL:
     738             :   case ISD::FDIV:
     739             :     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
     740       72060 :     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
     741             :                                     Options, Depth + 1))
     742       34963 :       return V;
     743             : 
     744       34963 :     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
     745       34963 :                               Depth + 1);
     746             : 
     747        2957 :   case ISD::FP_EXTEND:
     748             :   case ISD::FP_ROUND:
     749             :   case ISD::FSIN:
     750        5914 :     return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
     751        2957 :                               Depth + 1);
     752             :   }
     753             : }
     754             : 
     755             : /// If isNegatibleForFree returns true, return the newly negated expression.
     756        1032 : static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
     757             :                                     bool LegalOperations, unsigned Depth = 0) {
     758        1032 :   const TargetOptions &Options = DAG.getTarget().Options;
     759             :   // fneg is removable even if it has multiple uses.
     760        1032 :   if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
     761             : 
     762             :   assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
     763             : 
     764         537 :   const SDNodeFlags Flags = Op.getNode()->getFlags();
     765             : 
     766         537 :   switch (Op.getOpcode()) {
     767           0 :   default: llvm_unreachable("Unknown code");
     768             :   case ISD::ConstantFP: {
     769         211 :     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
     770         211 :     V.changeSign();
     771         422 :     return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
     772             :   }
     773          19 :   case ISD::FADD:
     774             :     assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros());
     775             : 
     776             :     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
     777          38 :     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
     778             :                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
     779          19 :       return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
     780             :                          GetNegatedExpression(Op.getOperand(0), DAG,
     781             :                                               LegalOperations, Depth+1),
     782          38 :                          Op.getOperand(1), Flags);
     783             :     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
     784           0 :     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
     785             :                        GetNegatedExpression(Op.getOperand(1), DAG,
     786             :                                             LegalOperations, Depth+1),
     787           0 :                        Op.getOperand(0), Flags);
     788          21 :   case ISD::FSUB:
     789             :     // fold (fneg (fsub 0, B)) -> B
     790             :     if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
     791          10 :       if (N0CFP->isZero())
     792           5 :         return Op.getOperand(1);
     793             : 
     794             :     // fold (fneg (fsub A, B)) -> (fsub B, A)
     795          16 :     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
     796          16 :                        Op.getOperand(1), Op.getOperand(0), Flags);
     797             : 
     798         242 :   case ISD::FMUL:
     799             :   case ISD::FDIV:
     800             :     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
     801         484 :     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
     802             :                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
     803         115 :       return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
     804             :                          GetNegatedExpression(Op.getOperand(0), DAG,
     805             :                                               LegalOperations, Depth+1),
     806         230 :                          Op.getOperand(1), Flags);
     807             : 
     808             :     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
     809         127 :     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
     810             :                        Op.getOperand(0),
     811             :                        GetNegatedExpression(Op.getOperand(1), DAG,
     812         254 :                                             LegalOperations, Depth+1), Flags);
     813             : 
     814             :   case ISD::FP_EXTEND:
     815             :   case ISD::FSIN:
     816          20 :     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
     817             :                        GetNegatedExpression(Op.getOperand(0), DAG,
     818          80 :                                             LegalOperations, Depth+1));
     819             :   case ISD::FP_ROUND:
     820          24 :       return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
     821             :                          GetNegatedExpression(Op.getOperand(0), DAG,
     822             :                                               LegalOperations, Depth+1),
     823          96 :                          Op.getOperand(1));
     824             :   }
     825             : }
     826             : 
     827             : // APInts must be the same size for most operations, this helper
     828             : // function zero extends the shorter of the pair so that they match.
     829             : // We provide an Offset so that we can create bitwidths that won't overflow.
     830        9664 : static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
     831        9664 :   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
     832        9664 :   LHS = LHS.zextOrSelf(Bits);
     833        9664 :   RHS = RHS.zextOrSelf(Bits);
     834        9664 : }
     835             : 
     836             : // Return true if this node is a setcc, or is a select_cc
     837             : // that selects between the target values used for true and false, making it
     838             : // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
     839             : // the appropriate nodes based on the type of node we are checking. This
     840             : // simplifies life a bit for the callers.
     841           0 : bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
     842             :                                     SDValue &CC) const {
     843           0 :   if (N.getOpcode() == ISD::SETCC) {
     844           0 :     LHS = N.getOperand(0);
     845           0 :     RHS = N.getOperand(1);
     846           0 :     CC  = N.getOperand(2);
     847           0 :     return true;
     848             :   }
     849             : 
     850           0 :   if (N.getOpcode() != ISD::SELECT_CC ||
     851           0 :       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
     852           0 :       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
     853           0 :     return false;
     854             : 
     855           0 :   if (TLI.getBooleanContents(N.getValueType()) ==
     856             :       TargetLowering::UndefinedBooleanContent)
     857           0 :     return false;
     858             : 
     859           0 :   LHS = N.getOperand(0);
     860           0 :   RHS = N.getOperand(1);
     861           0 :   CC  = N.getOperand(4);
     862           0 :   return true;
     863             : }
     864             : 
     865             : /// Return true if this is a SetCC-equivalent operation with only one use.
     866             : /// If this is true, it allows the users to invert the operation for free when
     867             : /// it is profitable to do so.
     868           0 : bool DAGCombiner::isOneUseSetCC(SDValue N) const {
     869           0 :   SDValue N0, N1, N2;
     870           0 :   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
     871           0 :     return true;
     872             :   return false;
     873             : }
     874             : 
     875             : // Returns the SDNode if it is a constant float BuildVector
     876             : // or constant float.
     877           0 : static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
     878             :   if (isa<ConstantFPSDNode>(N))
     879             :     return N.getNode();
     880      149586 :   if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
     881           0 :     return N.getNode();
     882             :   return nullptr;
     883             : }
     884             : 
     885             : // Determines if it is a constant integer or a build vector of constant
     886             : // integers (and undefs).
     887             : // Do not permit build vector implicit truncation.
     888     3407251 : static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
     889             :   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
     890     2892631 :     return !(Const->isOpaque() && NoOpaques);
     891      514620 :   if (N.getOpcode() != ISD::BUILD_VECTOR)
     892             :     return false;
     893      104759 :   unsigned BitWidth = N.getScalarValueSizeInBits();
     894      357702 :   for (const SDValue &Op : N->op_values()) {
     895      507308 :     if (Op.isUndef())
     896             :       continue;
     897             :     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
     898      252069 :     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
     899      251965 :         (Const->isOpaque() && NoOpaques))
     900             :       return false;
     901             :   }
     902             :   return true;
     903             : }
     904             : 
     905             : // Determines if it is a constant null integer or a splatted vector of a
     906             : // constant null integer (with no undefs).
     907             : // Build vector implicit truncation is not an issue for null values.
     908      831862 : static bool isNullConstantOrNullSplatConstant(SDValue N) {
     909             :   // TODO: may want to use peekThroughBitcast() here.
     910      831862 :   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
     911       27452 :     return Splat->isNullValue();
     912             :   return false;
     913             : }
     914             : 
     915             : // Determines if it is a constant integer of one or a splatted vector of a
     916             : // constant integer of one (with no undefs).
     917             : // Do not permit build vector implicit truncation.
     918        1093 : static bool isOneConstantOrOneSplatConstant(SDValue N) {
     919             :   // TODO: may want to use peekThroughBitcast() here.
     920        1093 :   unsigned BitWidth = N.getScalarValueSizeInBits();
     921        1093 :   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
     922        2150 :     return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth;
     923             :   return false;
     924             : }
     925             : 
     926             : // Determines if it is a constant integer of all ones or a splatted vector of a
     927             : // constant integer of all ones (with no undefs).
     928             : // Do not permit build vector implicit truncation.
     929      618075 : static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) {
     930      618075 :   N = peekThroughBitcasts(N);
     931      618075 :   unsigned BitWidth = N.getScalarValueSizeInBits();
     932      618075 :   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
     933      155004 :     return Splat->isAllOnesValue() &&
     934             :            Splat->getAPIntValue().getBitWidth() == BitWidth;
     935             :   return false;
     936             : }
     937             : 
     938             : // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
     939             : // undef's.
     940       25434 : static bool isAnyConstantBuildVector(const SDNode *N) {
     941       48272 :   return ISD::isBuildVectorOfConstantSDNodes(N) ||
     942       22838 :          ISD::isBuildVectorOfConstantFPSDNodes(N);
     943             : }
     944             : 
     945     4018534 : SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
     946             :                                     SDValue N1, SDNodeFlags Flags) {
     947             :   // Don't reassociate reductions.
     948     4018534 :   if (Flags.hasVectorReduction())
     949         468 :     return SDValue();
     950             : 
     951     4018066 :   EVT VT = N0.getValueType();
     952     4018066 :   if (N0.getOpcode() == Opc && !N0->getFlags().hasVectorReduction()) {
     953     1023756 :     if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
     954      226574 :       if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
     955             :         // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
     956      223392 :         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
     957      446726 :           return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
     958          29 :         return SDValue();
     959             :       }
     960        3182 :       if (N0.hasOneUse()) {
     961             :         // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
     962             :         // use
     963        3301 :         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
     964        2047 :         if (!OpNode.getNode())
     965           0 :           return SDValue();
     966        2047 :         AddToWorklist(OpNode.getNode());
     967        4094 :         return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
     968             :       }
     969             :     }
     970             :   }
     971             : 
     972     3792627 :   if (N1.getOpcode() == Opc && !N1->getFlags().hasVectorReduction()) {
     973       14972 :     if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
     974        1645 :       if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
     975             :         // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
     976           0 :         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
     977           0 :           return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
     978           0 :         return SDValue();
     979             :       }
     980        1645 :       if (N1.hasOneUse()) {
     981             :         // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
     982             :         // use
     983        1197 :         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
     984        1019 :         if (!OpNode.getNode())
     985           0 :           return SDValue();
     986        1019 :         AddToWorklist(OpNode.getNode());
     987        2038 :         return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
     988             :       }
     989             :     }
     990             :   }
     991             : 
     992     3791608 :   return SDValue();
     993             : }
     994             : 
     995      731867 : SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
     996             :                                bool AddTo) {
     997             :   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
     998             :   ++NodesCombined;
     999             :   LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
    1000             :              To[0].getNode()->dump(&DAG);
    1001             :              dbgs() << " and " << NumTo - 1 << " other values\n");
    1002             :   for (unsigned i = 0, e = NumTo; i != e; ++i)
    1003             :     assert((!To[i].getNode() ||
    1004             :             N->getValueType(i) == To[i].getValueType()) &&
    1005             :            "Cannot combine value to value of different type!");
    1006             : 
    1007             :   WorklistRemover DeadNodes(*this);
    1008      731867 :   DAG.ReplaceAllUsesWith(N, To);
    1009      731867 :   if (AddTo) {
    1010             :     // Push the new nodes and any users onto the worklist
    1011      859501 :     for (unsigned i = 0, e = NumTo; i != e; ++i) {
    1012      547753 :       if (To[i].getNode()) {
    1013      547753 :         AddToWorklist(To[i].getNode());
    1014      547753 :         AddUsersToWorklist(To[i].getNode());
    1015             :       }
    1016             :     }
    1017             :   }
    1018             : 
    1019             :   // Finally, if the node is now dead, remove it from the graph.  The node
    1020             :   // may not be dead if the replacement process recursively simplified to
    1021             :   // something else needing this node.
    1022      731867 :   if (N->use_empty())
    1023      731825 :     deleteAndRecombine(N);
    1024     1463734 :   return SDValue(N, 0);
    1025             : }
    1026             : 
    1027      160144 : void DAGCombiner::
    1028             : CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
    1029             :   // Replace all uses.  If any nodes become isomorphic to other nodes and
    1030             :   // are deleted, make sure to remove them from our worklist.
    1031             :   WorklistRemover DeadNodes(*this);
    1032      160144 :   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
    1033             : 
    1034             :   // Push the new node and any (possibly new) users onto the worklist.
    1035      160144 :   AddToWorklist(TLO.New.getNode());
    1036      160144 :   AddUsersToWorklist(TLO.New.getNode());
    1037             : 
    1038             :   // Finally, if the node is now dead, remove it from the graph.  The node
    1039             :   // may not be dead if the replacement process recursively simplified to
    1040             :   // something else needing this node.
    1041      160144 :   if (TLO.Old.getNode()->use_empty())
    1042      159243 :     deleteAndRecombine(TLO.Old.getNode());
    1043      160144 : }
    1044             : 
    1045             : /// Check the specified integer node value to see if it can be simplified or if
    1046             : /// things it uses can be simplified by bit propagation. If so, return true.
    1047     4887719 : bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
    1048     4887719 :   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
    1049     4887719 :   KnownBits Known;
    1050     4887719 :   if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO))
    1051             :     return false;
    1052             : 
    1053             :   // Revisit the node.
    1054      155126 :   AddToWorklist(Op.getNode());
    1055             : 
    1056             :   // Replace the old value with the new one.
    1057             :   ++NodesCombined;
    1058             :   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
    1059             :              dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
    1060             :              dbgs() << '\n');
    1061             : 
    1062      155126 :   CommitTargetLoweringOpt(TLO);
    1063      155126 :   return true;
    1064             : }
    1065             : 
    1066             : /// Check the specified vector node value to see if it can be simplified or
    1067             : /// if things it uses can be simplified as it only uses some of the elements.
    1068             : /// If so, return true.
    1069      407069 : bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
    1070             :                                              bool AssumeSingleUse) {
    1071      407069 :   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
    1072             :   APInt KnownUndef, KnownZero;
    1073      407069 :   if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO,
    1074             :                                       0, AssumeSingleUse))
    1075             :     return false;
    1076             : 
    1077             :   // Revisit the node.
    1078        2573 :   AddToWorklist(Op.getNode());
    1079             : 
    1080             :   // Replace the old value with the new one.
    1081             :   ++NodesCombined;
    1082             :   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
    1083             :              dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
    1084             :              dbgs() << '\n');
    1085             : 
    1086        2573 :   CommitTargetLoweringOpt(TLO);
    1087        2573 :   return true;
    1088             : }
    1089             : 
    1090         309 : void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
    1091             :   SDLoc DL(Load);
    1092         309 :   EVT VT = Load->getValueType(0);
    1093         618 :   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
    1094             : 
    1095             :   LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
    1096             :              Trunc.getNode()->dump(&DAG); dbgs() << '\n');
    1097             :   WorklistRemover DeadNodes(*this);
    1098         618 :   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
    1099         618 :   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
    1100         309 :   deleteAndRecombine(Load);
    1101         309 :   AddToWorklist(Trunc.getNode());
    1102         309 : }
    1103             : 
    1104        8689 : SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
    1105        8689 :   Replace = false;
    1106             :   SDLoc DL(Op);
    1107             :   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
    1108             :     LoadSDNode *LD = cast<LoadSDNode>(Op);
    1109         399 :     EVT MemVT = LD->getMemoryVT();
    1110             :     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
    1111             :                                                       : LD->getExtensionType();
    1112         399 :     Replace = true;
    1113         399 :     return DAG.getExtLoad(ExtType, DL, PVT,
    1114             :                           LD->getChain(), LD->getBasePtr(),
    1115         798 :                           MemVT, LD->getMemOperand());
    1116             :   }
    1117             : 
    1118             :   unsigned Opc = Op.getOpcode();
    1119        8290 :   switch (Opc) {
    1120             :   default: break;
    1121           0 :   case ISD::AssertSext:
    1122           0 :     if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
    1123           0 :       return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
    1124           0 :     break;
    1125          36 :   case ISD::AssertZext:
    1126          36 :     if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
    1127          72 :       return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
    1128           0 :     break;
    1129        1703 :   case ISD::Constant: {
    1130             :     unsigned ExtOpc =
    1131        3406 :       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
    1132        3406 :     return DAG.getNode(ExtOpc, DL, PVT, Op);
    1133             :   }
    1134             :   }
    1135             : 
    1136        6551 :   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
    1137           0 :     return SDValue();
    1138       13102 :   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
    1139             : }
    1140             : 
    1141           0 : SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
    1142           0 :   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
    1143           0 :     return SDValue();
    1144           0 :   EVT OldVT = Op.getValueType();
    1145             :   SDLoc DL(Op);
    1146           0 :   bool Replace = false;
    1147           0 :   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
    1148           0 :   if (!NewOp.getNode())
    1149           0 :     return SDValue();
    1150           0 :   AddToWorklist(NewOp.getNode());
    1151             : 
    1152           0 :   if (Replace)
    1153           0 :     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
    1154           0 :   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
    1155           0 :                      DAG.getValueType(OldVT));
    1156             : }
    1157             : 
    1158         532 : SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
    1159         532 :   EVT OldVT = Op.getValueType();
    1160             :   SDLoc DL(Op);
    1161         532 :   bool Replace = false;
    1162         532 :   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
    1163         532 :   if (!NewOp.getNode())
    1164           0 :     return SDValue();
    1165         532 :   AddToWorklist(NewOp.getNode());
    1166             : 
    1167         532 :   if (Replace)
    1168          78 :     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
    1169         532 :   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
    1170             : }
    1171             : 
    1172             : /// Promote the specified integer binary operation if the target indicates it is
    1173             : /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
    1174             : /// i32 since i16 instructions are longer.
    1175     3975682 : SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
    1176     3975682 :   if (!LegalOperations)
    1177     2255948 :     return SDValue();
    1178             : 
    1179     1719734 :   EVT VT = Op.getValueType();
    1180     1719734 :   if (VT.isVector() || !VT.isInteger())
    1181      182037 :     return SDValue();
    1182             : 
    1183             :   // If operation type is 'undesirable', e.g. i16 on x86, consider
    1184             :   // promoting it.
    1185             :   unsigned Opc = Op.getOpcode();
    1186     1537697 :   if (TLI.isTypeDesirableForOp(Opc, VT))
    1187     1533219 :     return SDValue();
    1188             : 
    1189        4478 :   EVT PVT = VT;
    1190             :   // Consult target whether it is a good idea to promote this operation and
    1191             :   // what's the right type to promote it to.
    1192        4478 :   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
    1193             :     assert(PVT != VT && "Don't know what type to promote to!");
    1194             : 
    1195             :     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
    1196             : 
    1197        3519 :     bool Replace0 = false;
    1198        3519 :     SDValue N0 = Op.getOperand(0);
    1199        3519 :     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
    1200             : 
    1201        3519 :     bool Replace1 = false;
    1202        3519 :     SDValue N1 = Op.getOperand(1);
    1203        3519 :     SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
    1204             :     SDLoc DL(Op);
    1205             : 
    1206             :     SDValue RV =
    1207        7038 :         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
    1208             : 
    1209             :     // We are always replacing N0/N1's use in N and only need
    1210             :     // additional replacements if there are additional uses.
    1211        3519 :     Replace0 &= !N0->hasOneUse();
    1212        3519 :     Replace1 &= (N0 != N1) && !N1->hasOneUse();
    1213             : 
    1214             :     // Combine Op here so it is preserved past replacements.
    1215        3519 :     CombineTo(Op.getNode(), RV);
    1216             : 
    1217             :     // If operands have a use ordering, make sure we deal with
    1218             :     // predecessor first.
    1219        3521 :     if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
    1220             :       std::swap(N0, N1);
    1221             :       std::swap(NN0, NN1);
    1222             :     }
    1223             : 
    1224        3519 :     if (Replace0) {
    1225          89 :       AddToWorklist(NN0.getNode());
    1226          89 :       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
    1227             :     }
    1228        3519 :     if (Replace1) {
    1229          88 :       AddToWorklist(NN1.getNode());
    1230          88 :       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
    1231             :     }
    1232        3519 :     return Op;
    1233             :   }
    1234         959 :   return SDValue();
    1235             : }
    1236             : 
    1237             : /// Promote the specified integer shift operation if the target indicates it is
    1238             : /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
    1239             : /// i32 since i16 instructions are longer.
    1240      259809 : SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
    1241      259809 :   if (!LegalOperations)
    1242      111529 :     return SDValue();
    1243             : 
    1244      148280 :   EVT VT = Op.getValueType();
    1245      148280 :   if (VT.isVector() || !VT.isInteger())
    1246        2580 :     return SDValue();
    1247             : 
    1248             :   // If operation type is 'undesirable', e.g. i16 on x86, consider
    1249             :   // promoting it.
    1250             :   unsigned Opc = Op.getOpcode();
    1251      145700 :   if (TLI.isTypeDesirableForOp(Opc, VT))
    1252      140181 :     return SDValue();
    1253             : 
    1254        5519 :   EVT PVT = VT;
    1255             :   // Consult target whether it is a good idea to promote this operation and
    1256             :   // what's the right type to promote it to.
    1257        5519 :   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
    1258             :     assert(PVT != VT && "Don't know what type to promote to!");
    1259             : 
    1260             :     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
    1261             : 
    1262        1615 :     bool Replace = false;
    1263        1615 :     SDValue N0 = Op.getOperand(0);
    1264        1615 :     SDValue N1 = Op.getOperand(1);
    1265        1615 :     if (Opc == ISD::SRA)
    1266           0 :       N0 = SExtPromoteOperand(N0, PVT);
    1267        1615 :     else if (Opc == ISD::SRL)
    1268         496 :       N0 = ZExtPromoteOperand(N0, PVT);
    1269             :     else
    1270        1119 :       N0 = PromoteOperand(N0, PVT, Replace);
    1271             : 
    1272        1615 :     if (!N0.getNode())
    1273           0 :       return SDValue();
    1274             : 
    1275             :     SDLoc DL(Op);
    1276             :     SDValue RV =
    1277        3230 :         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
    1278             : 
    1279        1615 :     AddToWorklist(N0.getNode());
    1280        1615 :     if (Replace)
    1281          54 :       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
    1282             : 
    1283             :     // Deal with Op being deleted.
    1284        1615 :     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
    1285        1615 :       return RV;
    1286             :   }
    1287        3904 :   return SDValue();
    1288             : }
    1289             : 
    1290      189609 : SDValue DAGCombiner::PromoteExtend(SDValue Op) {
    1291      189609 :   if (!LegalOperations)
    1292      117783 :     return SDValue();
    1293             : 
    1294       71826 :   EVT VT = Op.getValueType();
    1295       71826 :   if (VT.isVector() || !VT.isInteger())
    1296        1469 :     return SDValue();
    1297             : 
    1298             :   // If operation type is 'undesirable', e.g. i16 on x86, consider
    1299             :   // promoting it.
    1300             :   unsigned Opc = Op.getOpcode();
    1301       70357 :   if (TLI.isTypeDesirableForOp(Opc, VT))
    1302       70090 :     return SDValue();
    1303             : 
    1304         267 :   EVT PVT = VT;
    1305             :   // Consult target whether it is a good idea to promote this operation and
    1306             :   // what's the right type to promote it to.
    1307         267 :   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
    1308             :     assert(PVT != VT && "Don't know what type to promote to!");
    1309             :     // fold (aext (aext x)) -> (aext x)
    1310             :     // fold (aext (zext x)) -> (zext x)
    1311             :     // fold (aext (sext x)) -> (sext x)
    1312             :     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
    1313         506 :     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
    1314             :   }
    1315          14 :   return SDValue();
    1316             : }
    1317             : 
    1318     6143598 : bool DAGCombiner::PromoteLoad(SDValue Op) {
    1319     6143598 :   if (!LegalOperations)
    1320             :     return false;
    1321             : 
    1322             :   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
    1323             :     return false;
    1324             : 
    1325     2619814 :   EVT VT = Op.getValueType();
    1326     2619814 :   if (VT.isVector() || !VT.isInteger())
    1327             :     return false;
    1328             : 
    1329             :   // If operation type is 'undesirable', e.g. i16 on x86, consider
    1330             :   // promoting it.
    1331             :   unsigned Opc = Op.getOpcode();
    1332     2338745 :   if (TLI.isTypeDesirableForOp(Opc, VT))
    1333             :     return false;
    1334             : 
    1335        4662 :   EVT PVT = VT;
    1336             :   // Consult target whether it is a good idea to promote this operation and
    1337             :   // what's the right type to promote it to.
    1338        4662 :   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
    1339             :     assert(PVT != VT && "Don't know what type to promote to!");
    1340             : 
    1341             :     SDLoc DL(Op);
    1342             :     SDNode *N = Op.getNode();
    1343             :     LoadSDNode *LD = cast<LoadSDNode>(N);
    1344           0 :     EVT MemVT = LD->getMemoryVT();
    1345             :     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
    1346             :                                                       : LD->getExtensionType();
    1347           0 :     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
    1348             :                                    LD->getChain(), LD->getBasePtr(),
    1349           0 :                                    MemVT, LD->getMemOperand());
    1350           0 :     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
    1351             : 
    1352             :     LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
    1353             :                Result.getNode()->dump(&DAG); dbgs() << '\n');
    1354             :     WorklistRemover DeadNodes(*this);
    1355           0 :     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
    1356           0 :     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
    1357           0 :     deleteAndRecombine(N);
    1358           0 :     AddToWorklist(Result.getNode());
    1359             :     return true;
    1360             :   }
    1361             :   return false;
    1362             : }
    1363             : 
    1364             : /// Recursively delete a node which has no uses and any operands for
    1365             : /// which it is the only use.
    1366             : ///
    1367             : /// Note that this both deletes the nodes and removes them from the worklist.
    1368             : /// It also adds any nodes who have had a user deleted to the worklist as they
    1369             : /// may now have only one use and subject to other combines.
    1370    92486438 : bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
    1371    92486438 :   if (!N->use_empty())
    1372             :     return false;
    1373             : 
    1374             :   SmallSetVector<SDNode *, 16> Nodes;
    1375     3949125 :   Nodes.insert(N);
    1376             :   do {
    1377    12725499 :     N = Nodes.pop_back_val();
    1378    12725499 :     if (!N)
    1379             :       continue;
    1380             : 
    1381    12725499 :     if (N->use_empty()) {
    1382    15971072 :       for (const SDValue &ChildN : N->op_values())
    1383     9588653 :         Nodes.insert(ChildN.getNode());
    1384             : 
    1385     6382419 :       removeFromWorklist(N);
    1386     6382419 :       DAG.DeleteNode(N);
    1387             :     } else {
    1388     6343080 :       AddToWorklist(N);
    1389             :     }
    1390    12725498 :   } while (!Nodes.empty());
    1391             :   return true;
    1392             : }
    1393             : 
    1394             : //===----------------------------------------------------------------------===//
    1395             : //  Main DAG Combiner implementation
    1396             : //===----------------------------------------------------------------------===//
    1397             : 
    1398     2767992 : void DAGCombiner::Run(CombineLevel AtLevel) {
    1399             :   // set the instance variables, so that the various visit routines may use it.
    1400     2767992 :   Level = AtLevel;
    1401     2767992 :   LegalOperations = Level >= AfterLegalizeVectorOps;
    1402     2767992 :   LegalTypes = Level >= AfterLegalizeTypes;
    1403             : 
    1404             :   // Add all the dag nodes to the worklist.
    1405    85262820 :   for (SDNode &Node : DAG.allnodes())
    1406    82494828 :     AddToWorklist(&Node);
    1407             : 
    1408             :   // Create a dummy node (which is not added to allnodes), that adds a reference
    1409             :   // to the root node, preventing it from being deleted, and tracking any
    1410             :   // changes of the root.
    1411     8303976 :   HandleSDNode Dummy(DAG.getRoot());
    1412             : 
    1413             :   // While the worklist isn't empty, find a node and try to combine it.
    1414    93347489 :   while (!WorklistMap.empty()) {
    1415             :     SDNode *N;
    1416             :     // The Worklist holds the SDNodes in order, but it may contain null entries.
    1417             :     do {
    1418    93035690 :       N = Worklist.pop_back_val();
    1419    93035690 :     } while (!N);
    1420             : 
    1421    90579497 :     bool GoodWorklistEntry = WorklistMap.erase(N);
    1422             :     (void)GoodWorklistEntry;
    1423             :     assert(GoodWorklistEntry &&
    1424             :            "Found a worklist entry without a corresponding map entry!");
    1425             : 
    1426             :     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
    1427             :     // N is deleted from the DAG, since they too may now be dead or may have a
    1428             :     // reduced number of uses, allowing other xforms.
    1429    90579498 :     if (recursivelyDeleteUnusedNodes(N))
    1430    88673233 :       continue;
    1431             : 
    1432             :     WorklistRemover DeadNodes(*this);
    1433             : 
    1434             :     // If this combine is running after legalizing the DAG, re-legalize any
    1435             :     // nodes pulled off the worklist.
    1436    88536543 :     if (Level == AfterLegalizeDAG) {
    1437             :       SmallSetVector<SDNode *, 16> UpdatedNodes;
    1438    39041762 :       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
    1439             : 
    1440    39066393 :       for (SDNode *LN : UpdatedNodes) {
    1441       24631 :         AddToWorklist(LN);
    1442             :         AddUsersToWorklist(LN);
    1443             :       }
    1444    39041762 :       if (!NIsValid)
    1445             :         continue;
    1446             :     }
    1447             : 
    1448             :     LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
    1449             : 
    1450             :     // Add any operands of the new node which have not yet been combined to the
    1451             :     // worklist as well. Because the worklist uniques things already, this
    1452             :     // won't repeatedly process the same operand.
    1453    88525903 :     CombinedNodes.insert(N);
    1454   250096475 :     for (const SDValue &ChildN : N->op_values())
    1455   161570572 :       if (!CombinedNodes.count(ChildN.getNode()))
    1456   149634654 :         AddToWorklist(ChildN.getNode());
    1457             : 
    1458    88525903 :     SDValue RV = combine(N);
    1459             : 
    1460    88525902 :     if (!RV.getNode())
    1461             :       continue;
    1462             : 
    1463             :     ++NodesCombined;
    1464             : 
    1465             :     // If we get back the same node we passed in, rather than a new node or
    1466             :     // zero, we know that the node must have defined multiple values and
    1467             :     // CombineTo was used.  Since CombineTo takes care of the worklist
    1468             :     // mechanics for us, we have no work to do in this case.
    1469     3305870 :     if (RV.getNode() == N)
    1470             :       continue;
    1471             : 
    1472             :     assert(N->getOpcode() != ISD::DELETED_NODE &&
    1473             :            RV.getOpcode() != ISD::DELETED_NODE &&
    1474             :            "Node was deleted but visit returned new node!");
    1475             : 
    1476             :     LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
    1477             : 
    1478     1906264 :     if (N->getNumValues() == RV.getNode()->getNumValues())
    1479     1718196 :       DAG.ReplaceAllUsesWith(N, RV.getNode());
    1480             :     else {
    1481             :       assert(N->getValueType(0) == RV.getValueType() &&
    1482             :              N->getNumValues() == 1 && "Type mismatch");
    1483      188068 :       DAG.ReplaceAllUsesWith(N, &RV);
    1484             :     }
    1485             : 
    1486             :     // Push the new node and any users onto the worklist
    1487     1906264 :     AddToWorklist(RV.getNode());
    1488     1906264 :     AddUsersToWorklist(RV.getNode());
    1489             : 
    1490             :     // Finally, if the node is now dead, remove it from the graph.  The node
    1491             :     // may not be dead if the replacement process recursively simplified to
    1492             :     // something else needing this node. This will also take care of adding any
    1493             :     // operands which have lost a user to the worklist.
    1494     1906264 :     recursivelyDeleteUnusedNodes(N);
    1495             :   }
    1496             : 
    1497             :   // If the root changed (e.g. it was a dead load, update the root).
    1498     2767992 :   DAG.setRoot(Dummy.getValue());
    1499     2767992 :   DAG.RemoveDeadNodes();
    1500     2767992 : }
    1501             : 
    1502    88526629 : SDValue DAGCombiner::visit(SDNode *N) {
    1503   177053258 :   switch (N->getOpcode()) {
    1504             :   default: break;
    1505     4725134 :   case ISD::TokenFactor:        return visitTokenFactor(N);
    1506      436984 :   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
    1507     3332748 :   case ISD::ADD:                return visitADD(N);
    1508      532128 :   case ISD::SUB:                return visitSUB(N);
    1509         522 :   case ISD::ADDC:               return visitADDC(N);
    1510      130962 :   case ISD::UADDO:              return visitUADDO(N);
    1511          89 :   case ISD::SUBC:               return visitSUBC(N);
    1512        1913 :   case ISD::USUBO:              return visitUSUBO(N);
    1513        2544 :   case ISD::ADDE:               return visitADDE(N);
    1514      156807 :   case ISD::ADDCARRY:           return visitADDCARRY(N);
    1515          79 :   case ISD::SUBE:               return visitSUBE(N);
    1516         839 :   case ISD::SUBCARRY:           return visitSUBCARRY(N);
    1517       39729 :   case ISD::MUL:                return visitMUL(N);
    1518        5883 :   case ISD::SDIV:               return visitSDIV(N);
    1519        4531 :   case ISD::UDIV:               return visitUDIV(N);
    1520        4801 :   case ISD::SREM:
    1521        4801 :   case ISD::UREM:               return visitREM(N);
    1522        6357 :   case ISD::MULHU:              return visitMULHU(N);
    1523        1513 :   case ISD::MULHS:              return visitMULHS(N);
    1524         723 :   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
    1525        5613 :   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
    1526          99 :   case ISD::SMULO:              return visitSMULO(N);
    1527         228 :   case ISD::UMULO:              return visitUMULO(N);
    1528       25437 :   case ISD::SMIN:
    1529             :   case ISD::SMAX:
    1530             :   case ISD::UMIN:
    1531       25437 :   case ISD::UMAX:               return visitIMINMAX(N);
    1532      400002 :   case ISD::AND:                return visitAND(N);
    1533      146434 :   case ISD::OR:                 return visitOR(N);
    1534       87480 :   case ISD::XOR:                return visitXOR(N);
    1535      123493 :   case ISD::SHL:                return visitSHL(N);
    1536       30600 :   case ISD::SRA:                return visitSRA(N);
    1537      164810 :   case ISD::SRL:                return visitSRL(N);
    1538        2760 :   case ISD::ROTR:
    1539        2760 :   case ISD::ROTL:               return visitRotate(N);
    1540        1036 :   case ISD::ABS:                return visitABS(N);
    1541        1476 :   case ISD::BSWAP:              return visitBSWAP(N);
    1542         559 :   case ISD::BITREVERSE:         return visitBITREVERSE(N);
    1543        1221 :   case ISD::CTLZ:               return visitCTLZ(N);
    1544        1564 :   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
    1545         500 :   case ISD::CTTZ:               return visitCTTZ(N);
    1546         652 :   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
    1547        2141 :   case ISD::CTPOP:              return visitCTPOP(N);
    1548       32115 :   case ISD::SELECT:             return visitSELECT(N);
    1549       39265 :   case ISD::VSELECT:            return visitVSELECT(N);
    1550       16226 :   case ISD::SELECT_CC:          return visitSELECT_CC(N);
    1551      298267 :   case ISD::SETCC:              return visitSETCC(N);
    1552         289 :   case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
    1553       51003 :   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
    1554      142725 :   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
    1555       88162 :   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
    1556      187971 :   case ISD::AssertSext:
    1557      187971 :   case ISD::AssertZext:         return visitAssertExt(N);
    1558       46974 :   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
    1559        3049 :   case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
    1560        5864 :   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
    1561      858614 :   case ISD::TRUNCATE:           return visitTRUNCATE(N);
    1562      821241 :   case ISD::BITCAST:            return visitBITCAST(N);
    1563             :   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
    1564       43573 :   case ISD::FADD:               return visitFADD(N);
    1565       12783 :   case ISD::FSUB:               return visitFSUB(N);
    1566       27386 :   case ISD::FMUL:               return visitFMUL(N);
    1567        9771 :   case ISD::FMA:                return visitFMA(N);
    1568       10233 :   case ISD::FDIV:               return visitFDIV(N);
    1569         283 :   case ISD::FREM:               return visitFREM(N);
    1570        2134 :   case ISD::FSQRT:              return visitFSQRT(N);
    1571        1440 :   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
    1572         250 :   case ISD::FPOW:               return visitFPOW(N);
    1573       21933 :   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
    1574       12560 :   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
    1575        8957 :   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
    1576        6297 :   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
    1577        4291 :   case ISD::FP_ROUND:           return visitFP_ROUND(N);
    1578           0 :   case ISD::FP_ROUND_INREG:     return visitFP_ROUND_INREG(N);
    1579        9165 :   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
    1580        6789 :   case ISD::FNEG:               return visitFNEG(N);
    1581        5148 :   case ISD::FABS:               return visitFABS(N);
    1582        1642 :   case ISD::FFLOOR:             return visitFFLOOR(N);
    1583        3382 :   case ISD::FMINNUM:            return visitFMINNUM(N);
    1584        3298 :   case ISD::FMAXNUM:            return visitFMAXNUM(N);
    1585        4301 :   case ISD::FCEIL:              return visitFCEIL(N);
    1586        1548 :   case ISD::FTRUNC:             return visitFTRUNC(N);
    1587      258065 :   case ISD::BRCOND:             return visitBRCOND(N);
    1588        6356 :   case ISD::BR_CC:              return visitBR_CC(N);
    1589     6474249 :   case ISD::LOAD:               return visitLOAD(N);
    1590     8005537 :   case ISD::STORE:              return visitSTORE(N);
    1591       52048 :   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
    1592      416312 :   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
    1593      698113 :   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
    1594       30650 :   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
    1595       74221 :   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
    1596       70837 :   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
    1597       23174 :   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
    1598       19322 :   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
    1599         994 :   case ISD::MGATHER:            return visitMGATHER(N);
    1600        1291 :   case ISD::MLOAD:              return visitMLOAD(N);
    1601         287 :   case ISD::MSCATTER:           return visitMSCATTER(N);
    1602         761 :   case ISD::MSTORE:             return visitMSTORE(N);
    1603             :   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
    1604        6740 :   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
    1605             :   }
    1606    59162749 :   return SDValue();
    1607             : }
    1608             : 
    1609    88526629 : SDValue DAGCombiner::combine(SDNode *N) {
    1610    88526629 :   SDValue RV = visit(N);
    1611             : 
    1612             :   // If nothing happened, try a target-specific DAG combine.
    1613    88526629 :   if (!RV.getNode()) {
    1614             :     assert(N->getOpcode() != ISD::DELETED_NODE &&
    1615             :            "Node was deleted but visit returned NULL!");
    1616             : 
    1617   171056554 :     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
    1618    80949613 :         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
    1619             : 
    1620             :       // Expose the DAG combiner to the target combiner impls.
    1621             :       TargetLowering::DAGCombinerInfo
    1622    24931877 :         DagCombineInfo(DAG, Level, false, this);
    1623             : 
    1624    24931877 :       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
    1625             :     }
    1626             :   }
    1627             : 
    1628             :   // If nothing happened still, try promoting the operation.
    1629    88526629 :   if (!RV.getNode()) {
    1630   170452332 :     switch (N->getOpcode()) {
    1631             :     default: break;
    1632             :     case ISD::ADD:
    1633             :     case ISD::SUB:
    1634             :     case ISD::MUL:
    1635             :     case ISD::AND:
    1636             :     case ISD::OR:
    1637             :     case ISD::XOR:
    1638     3975682 :       RV = PromoteIntBinOp(SDValue(N, 0));
    1639     3975682 :       break;
    1640             :     case ISD::SHL:
    1641             :     case ISD::SRA:
    1642             :     case ISD::SRL:
    1643      259809 :       RV = PromoteIntShiftOp(SDValue(N, 0));
    1644      259809 :       break;
    1645             :     case ISD::SIGN_EXTEND:
    1646             :     case ISD::ZERO_EXTEND:
    1647             :     case ISD::ANY_EXTEND:
    1648      189609 :       RV = PromoteExtend(SDValue(N, 0));
    1649      189609 :       break;
    1650             :     case ISD::LOAD:
    1651     6143598 :       if (PromoteLoad(SDValue(N, 0)))
    1652           0 :         RV = SDValue(N, 0);
    1653             :       break;
    1654             :     }
    1655             :   }
    1656             : 
    1657             :   // If N is a commutative binary node, try eliminate it if the commuted
    1658             :   // version is already present in the DAG.
    1659    88526629 :   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
    1660     3695149 :       N->getNumValues() == 1) {
    1661     3556035 :     SDValue N0 = N->getOperand(0);
    1662     3556035 :     SDValue N1 = N->getOperand(1);
    1663             : 
    1664             :     // Constant operands are canonicalized to RHS.
    1665             :     if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
    1666      635593 :       SDValue Ops[] = {N1, N0};
    1667     2542372 :       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
    1668             :                                             N->getFlags());
    1669      635593 :       if (CSENode)
    1670          20 :         return SDValue(CSENode, 0);
    1671             :     }
    1672             :   }
    1673             : 
    1674    88526609 :   return RV;
    1675             : }
    1676             : 
    1677             : /// Given a node, return its input chain if it has one, otherwise return a null
    1678             : /// sd operand.
    1679     7171608 : static SDValue getInputChainForNode(SDNode *N) {
    1680    14343216 :   if (unsigned NumOps = N->getNumOperands()) {
    1681     7150080 :     if (N->getOperand(0).getValueType() == MVT::Other)
    1682     7100244 :       return N->getOperand(0);
    1683       99672 :     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
    1684       49836 :       return N->getOperand(NumOps-1);
    1685           0 :     for (unsigned i = 1; i < NumOps-1; ++i)
    1686           0 :       if (N->getOperand(i).getValueType() == MVT::Other)
    1687           0 :         return N->getOperand(i);
    1688             :   }
    1689       21528 :   return SDValue();
    1690             : }
    1691             : 
    1692     4725134 : SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
    1693             :   // If N has two operands, where one has an input chain equal to the other,
    1694             :   // the 'other' chain is redundant.
    1695     4725134 :   if (N->getNumOperands() == 2) {
    1696     3593192 :     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
    1697       14776 :       return N->getOperand(0);
    1698     3578416 :     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
    1699       87635 :       return N->getOperand(1);
    1700             :   }
    1701             : 
    1702             :   // Don't simplify token factors if optnone.
    1703     4622723 :   if (OptLevel == CodeGenOpt::None)
    1704     2513835 :     return SDValue();
    1705             : 
    1706             :   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
    1707             :   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
    1708             :   SmallPtrSet<SDNode*, 16> SeenOps;
    1709     2108888 :   bool Changed = false;             // If we should replace this token factor.
    1710             : 
    1711             :   // Start out with this token factor.
    1712     2108888 :   TFs.push_back(N);
    1713             : 
    1714             :   // Iterate through token factors.  The TFs grows when new token factors are
    1715             :   // encountered.
    1716     4950900 :   for (unsigned i = 0; i < TFs.size(); ++i) {
    1717     2842012 :     SDNode *TF = TFs[i];
    1718             : 
    1719             :     // Check each of the operands.
    1720    11192010 :     for (const SDValue &Op : TF->op_values()) {
    1721    16699996 :       switch (Op.getOpcode()) {
    1722       32291 :       case ISD::EntryToken:
    1723             :         // Entry tokens don't need to be added to the list. They are
    1724             :         // redundant.
    1725       32291 :         Changed = true;
    1726       32291 :         break;
    1727             : 
    1728     1288988 :       case ISD::TokenFactor:
    1729     1288988 :         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
    1730             :           // Queue up for processing.
    1731      733124 :           TFs.push_back(Op.getNode());
    1732             :           // Clean up in case the token factor is removed.
    1733      733124 :           AddToWorklist(Op.getNode());
    1734      733124 :           Changed = true;
    1735      733124 :           break;
    1736             :         }
    1737             :         LLVM_FALLTHROUGH;
    1738             : 
    1739             :       default:
    1740             :         // Only add if it isn't already in the list.
    1741     7584583 :         if (SeenOps.insert(Op.getNode()).second)
    1742     7460977 :           Ops.push_back(Op);
    1743             :         else
    1744      123606 :           Changed = true;
    1745             :         break;
    1746             :       }
    1747             :     }
    1748             :   }
    1749             : 
    1750             :   // Remove Nodes that are chained to another node in the list. Do so
    1751             :   // by walking up chains breath-first stopping when we've seen
    1752             :   // another operand. In general we must climb to the EntryNode, but we can exit
    1753             :   // early if we find all remaining work is associated with just one operand as
    1754             :   // no further pruning is possible.
    1755             : 
    1756             :   // List of nodes to search through and original Ops from which they originate.
    1757             :   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
    1758             :   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
    1759             :   SmallPtrSet<SDNode *, 16> SeenChains;
    1760     2108888 :   bool DidPruneOps = false;
    1761             : 
    1762     2108888 :   unsigned NumLeftToConsider = 0;
    1763     9569865 :   for (const SDValue &Op : Ops) {
    1764     7460977 :     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
    1765     7460977 :     OpWorkCount.push_back(1);
    1766             :   }
    1767             : 
    1768             :   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
    1769             :     // If this is an Op, we can remove the op from the list. Remark any
    1770             :     // search associated with it as from the current OpNumber.
    1771             :     if (SeenOps.count(Op) != 0) {
    1772             :       Changed = true;
    1773             :       DidPruneOps = true;
    1774             :       unsigned OrigOpNumber = 0;
    1775             :       while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
    1776             :         OrigOpNumber++;
    1777             :       assert((OrigOpNumber != Ops.size()) &&
    1778             :              "expected to find TokenFactor Operand");
    1779             :       // Re-mark worklist from OrigOpNumber to OpNumber
    1780             :       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
    1781             :         if (Worklist[i].second == OrigOpNumber) {
    1782             :           Worklist[i].second = OpNumber;
    1783             :         }
    1784             :       }
    1785             :       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
    1786             :       OpWorkCount[OrigOpNumber] = 0;
    1787             :       NumLeftToConsider--;
    1788             :     }
    1789             :     // Add if it's a new chain
    1790             :     if (SeenChains.insert(Op).second) {
    1791             :       OpWorkCount[OpNumber]++;
    1792             :       Worklist.push_back(std::make_pair(Op, OpNumber));
    1793             :     }
    1794     2108888 :   };
    1795             : 
    1796    14624216 :   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
    1797             :     // We need at least be consider at least 2 Ops to prune.
    1798    14153618 :     if (NumLeftToConsider <= 1)
    1799             :       break;
    1800    12515328 :     auto CurNode = Worklist[i].first;
    1801    12515328 :     auto CurOpNumber = Worklist[i].second;
    1802             :     assert((OpWorkCount[CurOpNumber] > 0) &&
    1803             :            "Node should not appear in worklist");
    1804    25030656 :     switch (CurNode->getOpcode()) {
    1805      489576 :     case ISD::EntryToken:
    1806             :       // Hitting EntryToken is the only way for the search to terminate without
    1807             :       // hitting
    1808             :       // another operand's search. Prevent us from marking this operand
    1809             :       // considered.
    1810      489576 :       NumLeftToConsider++;
    1811      489576 :       break;
    1812             :     case ISD::TokenFactor:
    1813     4264940 :       for (const SDValue &Op : CurNode->op_values())
    1814     3051420 :         AddToWorklist(i, Op.getNode(), CurOpNumber);
    1815             :       break;
    1816      978552 :     case ISD::CopyFromReg:
    1817             :     case ISD::CopyToReg:
    1818      978552 :       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
    1819      978552 :       break;
    1820             :     default:
    1821             :       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
    1822     9063493 :         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
    1823             :       break;
    1824             :     }
    1825    12515328 :     OpWorkCount[CurOpNumber]--;
    1826    12515328 :     if (OpWorkCount[CurOpNumber] == 0)
    1827     5305132 :       NumLeftToConsider--;
    1828             :   }
    1829             : 
    1830             :   // If we've changed things around then replace token factor.
    1831     2108888 :   if (Changed) {
    1832             :     SDValue Result;
    1833      578983 :     if (Ops.empty()) {
    1834             :       // The entry token is the only possible outcome.
    1835         326 :       Result = DAG.getEntryNode();
    1836             :     } else {
    1837      578657 :       if (DidPruneOps) {
    1838             :         SmallVector<SDValue, 8> PrunedOps;
    1839             :         //
    1840     1811057 :         for (const SDValue &Op : Ops) {
    1841     1473956 :           if (SeenChains.count(Op.getNode()) == 0)
    1842      914705 :             PrunedOps.push_back(Op);
    1843             :         }
    1844     1287894 :         Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps);
    1845             :       } else {
    1846      798626 :         Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
    1847             :       }
    1848             :     }
    1849      578983 :     return Result;
    1850             :   }
    1851     1529905 :   return SDValue();
    1852             : }
    1853             : 
    1854             : /// MERGE_VALUES can always be eliminated.
    1855      436984 : SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
    1856             :   WorklistRemover DeadNodes(*this);
    1857             :   // Replacing results may cause a different MERGE_VALUES to suddenly
    1858             :   // be CSE'd with N, and carry its uses with it. Iterate until no
    1859             :   // uses remain, to ensure that the node can be safely deleted.
    1860             :   // First add the users of this node to the work list so that they
    1861             :   // can be tried again once they have new operands.
    1862             :   AddUsersToWorklist(N);
    1863             :   do {
    1864             :     // Do as a single replacement to avoid rewalking use lists.
    1865             :     SmallVector<SDValue, 8> Ops;
    1866     1316835 :     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
    1867     1759702 :       Ops.push_back(N->getOperand(i));
    1868      436984 :     DAG.ReplaceAllUsesWith(N, Ops.data());
    1869      436984 :   } while (!N->use_empty());
    1870      436984 :   deleteAndRecombine(N);
    1871      873968 :   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
    1872             : }
    1873             : 
    1874             : /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
    1875             : /// ConstantSDNode pointer else nullptr.
    1876           0 : static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
    1877             :   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
    1878      112653 :   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
    1879             : }
    1880             : 
    1881           0 : SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
    1882             :   assert(ISD::isBinaryOp(BO) && "Unexpected binary operator");
    1883             : 
    1884             :   // Don't do this unless the old select is going away. We want to eliminate the
    1885             :   // binary operator, not replace a binop with a select.
    1886             :   // TODO: Handle ISD::SELECT_CC.
    1887             :   unsigned SelOpNo = 0;
    1888           0 :   SDValue Sel = BO->getOperand(0);
    1889           0 :   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
    1890             :     SelOpNo = 1;
    1891           0 :     Sel = BO->getOperand(1);
    1892             :   }
    1893             : 
    1894           0 :   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
    1895           0 :     return SDValue();
    1896             : 
    1897           0 :   SDValue CT = Sel.getOperand(1);
    1898           0 :   if (!isConstantOrConstantVector(CT, true) &&
    1899             :       !isConstantFPBuildVectorOrConstantFP(CT))
    1900           0 :     return SDValue();
    1901             : 
    1902           0 :   SDValue CF = Sel.getOperand(2);
    1903           0 :   if (!isConstantOrConstantVector(CF, true) &&
    1904             :       !isConstantFPBuildVectorOrConstantFP(CF))
    1905           0 :     return SDValue();
    1906             : 
    1907             :   // Bail out if any constants are opaque because we can't constant fold those.
    1908             :   // The exception is "and" and "or" with either 0 or -1 in which case we can
    1909             :   // propagate non constant operands into select. I.e.:
    1910             :   // and (select Cond, 0, -1), X --> select Cond, 0, X
    1911             :   // or X, (select Cond, -1, 0) --> select Cond, -1, X
    1912           0 :   auto BinOpcode = BO->getOpcode();
    1913           0 :   bool CanFoldNonConst = (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
    1914           0 :                          (isNullConstantOrNullSplatConstant(CT) ||
    1915           0 :                           isAllOnesConstantOrAllOnesSplatConstant(CT)) &&
    1916           0 :                          (isNullConstantOrNullSplatConstant(CF) ||
    1917           0 :                           isAllOnesConstantOrAllOnesSplatConstant(CF));
    1918             : 
    1919           0 :   SDValue CBO = BO->getOperand(SelOpNo ^ 1);
    1920           0 :   if (!CanFoldNonConst &&
    1921           0 :       !isConstantOrConstantVector(CBO, true) &&
    1922             :       !isConstantFPBuildVectorOrConstantFP(CBO))
    1923           0 :     return SDValue();
    1924             : 
    1925           0 :   EVT VT = Sel.getValueType();
    1926             : 
    1927             :   // In case of shift value and shift amount may have different VT. For instance
    1928             :   // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
    1929             :   // swapped operands and value types do not match. NB: x86 is fine if operands
    1930             :   // are not swapped with shift amount VT being not bigger than shifted value.
    1931             :   // TODO: that is possible to check for a shift operation, correct VTs and
    1932             :   // still perform optimization on x86 if needed.
    1933           0 :   if (SelOpNo && VT != CBO.getValueType())
    1934           0 :     return SDValue();
    1935             : 
    1936             :   // We have a select-of-constants followed by a binary operator with a
    1937             :   // constant. Eliminate the binop by pulling the constant math into the select.
    1938             :   // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
    1939           0 :   SDLoc DL(Sel);
    1940           0 :   SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
    1941           0 :                           : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
    1942           0 :   if (!CanFoldNonConst && !NewCT.isUndef() &&
    1943           0 :       !isConstantOrConstantVector(NewCT, true) &&
    1944             :       !isConstantFPBuildVectorOrConstantFP(NewCT))
    1945           0 :     return SDValue();
    1946             : 
    1947           0 :   SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
    1948           0 :                           : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
    1949           0 :   if (!CanFoldNonConst && !NewCF.isUndef() &&
    1950           0 :       !isConstantOrConstantVector(NewCF, true) &&
    1951             :       !isConstantFPBuildVectorOrConstantFP(NewCF))
    1952           0 :     return SDValue();
    1953             : 
    1954           0 :   return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
    1955             : }
    1956             : 
    1957     3626163 : static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
    1958             :   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
    1959             :          "Expecting add or sub");
    1960             : 
    1961             :   // Match a constant operand and a zext operand for the math instruction:
    1962             :   // add Z, C
    1963             :   // sub C, Z
    1964     3626163 :   bool IsAdd = N->getOpcode() == ISD::ADD;
    1965     3626163 :   SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
    1966     3626163 :   SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
    1967             :   auto *CN = dyn_cast<ConstantSDNode>(C);
    1968     2641274 :   if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
    1969     3625337 :     return SDValue();
    1970             : 
    1971             :   // Match the zext operand as a setcc of a boolean.
    1972        1652 :   if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
    1973         143 :       Z.getOperand(0).getValueType() != MVT::i1)
    1974         702 :     return SDValue();
    1975             : 
    1976             :   // Match the compare as: setcc (X & 1), 0, eq.
    1977             :   SDValue SetCC = Z.getOperand(0);
    1978         124 :   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
    1979          37 :   if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
    1980         175 :       SetCC.getOperand(0).getOpcode() != ISD::AND ||
    1981          23 :       !isOneConstant(SetCC.getOperand(0).getOperand(1)))
    1982         102 :     return SDValue();
    1983             : 
    1984             :   // We are adding/subtracting a constant and an inverted low bit. Turn that
    1985             :   // into a subtract/add of the low bit with incremented/decremented constant:
    1986             :   // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
    1987             :   // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
    1988          22 :   EVT VT = C.getValueType();
    1989             :   SDLoc DL(N);
    1990          22 :   SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
    1991          61 :   SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
    1992          62 :                        DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
    1993          31 :   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
    1994             : }
    1995             : 
    1996             : /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
    1997             : /// a shift and add with a different constant.
    1998     3626141 : static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
    1999             :   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
    2000             :          "Expecting add or sub");
    2001             : 
    2002             :   // We need a constant operand for the add/sub, and the other operand is a
    2003             :   // logical shift right: add (srl), C or sub C, (srl).
    2004     3626141 :   bool IsAdd = N->getOpcode() == ISD::ADD;
    2005     3626141 :   SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
    2006     3626141 :   SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
    2007     3626141 :   ConstantSDNode *C = isConstOrConstSplat(ConstantOp);
    2008     3626141 :   if (!C || ShiftOp.getOpcode() != ISD::SRL)
    2009     3624375 :     return SDValue();
    2010             : 
    2011             :   // The shift must be of a 'not' value.
    2012        1766 :   SDValue Not = ShiftOp.getOperand(0);
    2013        1766 :   if (!Not.hasOneUse() || !isBitwiseNot(Not))
    2014        1747 :     return SDValue();
    2015             : 
    2016             :   // The shift must be moving the sign bit to the least-significant-bit.
    2017          19 :   EVT VT = ShiftOp.getValueType();
    2018          19 :   SDValue ShAmt = ShiftOp.getOperand(1);
    2019          19 :   ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
    2020          38 :   if (!ShAmtC || ShAmtC->getZExtValue() != VT.getScalarSizeInBits() - 1)
    2021           0 :     return SDValue();
    2022             : 
    2023             :   // Eliminate the 'not' by adjusting the shift and add/sub constant:
    2024             :   // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
    2025             :   // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
    2026             :   SDLoc DL(N);
    2027          19 :   auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
    2028          19 :   SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
    2029          48 :   APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1;
    2030          19 :   return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
    2031             : }
    2032             : 
    2033     3332748 : SDValue DAGCombiner::visitADD(SDNode *N) {
    2034     3332748 :   SDValue N0 = N->getOperand(0);
    2035     3332748 :   SDValue N1 = N->getOperand(1);
    2036     3332748 :   EVT VT = N0.getValueType();
    2037             :   SDLoc DL(N);
    2038             : 
    2039             :   // fold vector ops
    2040     3332748 :   if (VT.isVector()) {
    2041      141605 :     if (SDValue FoldedVOp = SimplifyVBinOp(N))
    2042          17 :       return FoldedVOp;
    2043             : 
    2044             :     // fold (add x, 0) -> x, vector edition
    2045      141588 :     if (ISD::isBuildVectorAllZeros(N1.getNode()))
    2046         137 :       return N0;
    2047      141451 :     if (ISD::isBuildVectorAllZeros(N0.getNode()))
    2048         158 :       return N1;
    2049             :   }
    2050             : 
    2051             :   // fold (add x, undef) -> undef
    2052     3332436 :   if (N0.isUndef())
    2053           3 :     return N0;
    2054             : 
    2055     3332433 :   if (N1.isUndef())
    2056           9 :     return N1;
    2057             : 
    2058     3332424 :   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
    2059             :     // canonicalize constant to RHS
    2060        2098 :     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
    2061        2926 :       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
    2062             :     // fold (add c1, c2) -> c1+c2
    2063         635 :     return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
    2064         635 :                                       N1.getNode());
    2065             :   }
    2066             : 
    2067             :   // fold (add x, 0) -> x
    2068     3330326 :   if (isNullConstant(N1))
    2069         327 :     return N0;
    2070             : 
    2071     3329999 :   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
    2072             :     // fold ((c1-A)+c2) -> (c1+c2)-A
    2073     2956591 :     if (N0.getOpcode() == ISD::SUB &&
    2074         644 :         isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
    2075             :       // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
    2076          31 :       return DAG.getNode(ISD::SUB, DL, VT,
    2077             :                          DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
    2078          31 :                          N0.getOperand(1));
    2079             :     }
    2080             : 
    2081             :     // add (sext i1 X), 1 -> zext (not i1 X)
    2082             :     // We don't transform this pattern:
    2083             :     //   add (zext i1 X), -1 -> sext (not i1 X)
    2084             :     // because most (?) targets generate better code for the zext form.
    2085     2957988 :     if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
    2086        1002 :         isOneConstantOrOneSplatConstant(N1)) {
    2087          41 :       SDValue X = N0.getOperand(0);
    2088          41 :       if ((!LegalOperations ||
    2089           3 :            (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
    2090          81 :             TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
    2091          40 :           X.getScalarValueSizeInBits() == 1) {
    2092          34 :         SDValue Not = DAG.getNOT(DL, X, X.getValueType());
    2093          34 :         return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
    2094             :       }
    2095             :     }
    2096             : 
    2097             :     // Undo the add -> or combine to merge constant offsets from a frame index.
    2098             :     if (N0.getOpcode() == ISD::OR &&
    2099             :         isa<FrameIndexSDNode>(N0.getOperand(0)) &&
    2100     2958723 :         isa<ConstantSDNode>(N0.getOperand(1)) &&
    2101        5648 :         DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
    2102        5648 :       SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
    2103        5648 :       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
    2104             :     }
    2105             :   }
    2106             : 
    2107     3327127 :   if (SDValue NewSel = foldBinOpIntoSelect(N))
    2108           8 :     return NewSel;
    2109             : 
    2110             :   // reassociate add
    2111     3327119 :   if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
    2112      223488 :     return RADD;
    2113             : 
    2114             :   // fold ((0-A) + B) -> B-A
    2115     3106447 :   if (N0.getOpcode() == ISD::SUB &&
    2116        2816 :       isNullConstantOrNullSplatConstant(N0.getOperand(0)))
    2117          54 :     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
    2118             : 
    2119             :   // fold (A + (0-B)) -> A-B
    2120     3109784 :   if (N1.getOpcode() == ISD::SUB &&
    2121        6180 :       isNullConstantOrNullSplatConstant(N1.getOperand(0)))
    2122         620 :     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
    2123             : 
    2124             :   // fold (A+(B-A)) -> B
    2125     3103294 :   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
    2126           4 :     return N1.getOperand(0);
    2127             : 
    2128             :   // fold ((B-A)+A) -> B
    2129     3103290 :   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
    2130          31 :     return N0.getOperand(0);
    2131             : 
    2132             :   // fold (A+(B-(A+C))) to (B-C)
    2133     3103259 :   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
    2134          63 :       N0 == N1.getOperand(1).getOperand(0))
    2135           2 :     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
    2136           2 :                        N1.getOperand(1).getOperand(1));
    2137             : 
    2138             :   // fold (A+(B-(C+A))) to (B-C)
    2139     3103257 :   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
    2140          61 :       N0 == N1.getOperand(1).getOperand(1))
    2141           3 :     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
    2142           3 :                        N1.getOperand(1).getOperand(0));
    2143             : 
    2144             :   // fold (A+((B-A)+or-C)) to (B+or-C)
    2145     3103254 :   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
    2146     3107248 :       N1.getOperand(0).getOpcode() == ISD::SUB &&
    2147         103 :       N0 == N1.getOperand(0).getOperand(1))
    2148           5 :     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
    2149           5 :                        N1.getOperand(1));
    2150             : 
    2151             :   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
    2152     3103249 :   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
    2153          18 :     SDValue N00 = N0.getOperand(0);
    2154          18 :     SDValue N01 = N0.getOperand(1);
    2155          18 :     SDValue N10 = N1.getOperand(0);
    2156          18 :     SDValue N11 = N1.getOperand(1);
    2157             : 
    2158          18 :     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
    2159           5 :       return DAG.getNode(ISD::SUB, DL, VT,
    2160           5 :                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
    2161          15 :                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
    2162             :   }
    2163             : 
    2164     3103244 :   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
    2165          13 :     return V;
    2166             : 
    2167     3103231 :   if (SDValue V = foldAddSubOfSignBit(N, DAG))
    2168          10 :     return V;
    2169             : 
    2170     3103221 :   if (SimplifyDemandedBits(SDValue(N, 0)))
    2171        5103 :     return SDValue(N, 0);
    2172             : 
    2173             :   // fold (a+b) -> (a|b) iff a and b share no bits.
    2174     6190470 :   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
    2175     3092352 :       DAG.haveNoCommonBitsSet(N0, N1))
    2176       61872 :     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
    2177             : 
    2178             :   // fold (add (xor a, -1), 1) -> (sub 0, a)
    2179     3067182 :   if (isBitwiseNot(N0) && isOneConstantOrOneSplatConstant(N1))
    2180           9 :     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
    2181           9 :                        N0.getOperand(0));
    2182             : 
    2183     3067173 :   if (SDValue Combined = visitADDLike(N0, N1, N))
    2184         804 :     return Combined;
    2185             : 
    2186     3066369 :   if (SDValue Combined = visitADDLike(N1, N0, N))
    2187          73 :     return Combined;
    2188             : 
    2189     3066296 :   return SDValue();
    2190             : }
    2191             : 
    2192     5826665 : static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
    2193             :   bool Masked = false;
    2194             : 
    2195             :   // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
    2196             :   while (true) {
    2197     5837978 :     if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
    2198        9114 :       V = V.getOperand(0);
    2199        9114 :       continue;
    2200             :     }
    2201             : 
    2202     5828864 :     if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
    2203             :       Masked = true;
    2204        2199 :       V = V.getOperand(0);
    2205        2199 :       continue;
    2206             :     }
    2207             : 
    2208             :     break;
    2209             :   }
    2210             : 
    2211             :   // If this is not a carry, return.
    2212     5826665 :   if (V.getResNo() != 1)
    2213     5813747 :     return SDValue();
    2214             : 
    2215       12646 :   if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
    2216       24932 :       V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
    2217       12014 :     return SDValue();
    2218             : 
    2219             :   // If the result is masked, then no matter what kind of bool it is we can
    2220             :   // return. If it isn't, then we need to make sure the bool type is either 0 or
    2221             :   // 1 and not other values.
    2222         904 :   if (Masked ||
    2223         926 :       TLI.getBooleanContents(V.getValueType()) ==
    2224             :           TargetLoweringBase::ZeroOrOneBooleanContent)
    2225         904 :     return V;
    2226             : 
    2227           0 :   return SDValue();
    2228             : }
    2229             : 
    2230           0 : SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
    2231           0 :   EVT VT = N0.getValueType();
    2232             :   SDLoc DL(LocReference);
    2233             : 
    2234             :   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
    2235           0 :   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
    2236           0 :       isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0)))
    2237           0 :     return DAG.getNode(ISD::SUB, DL, VT, N0,
    2238             :                        DAG.getNode(ISD::SHL, DL, VT,
    2239           0 :                                    N1.getOperand(0).getOperand(1),
    2240           0 :                                    N1.getOperand(1)));
    2241             : 
    2242           0 :   if (N1.getOpcode() == ISD::AND) {
    2243           0 :     SDValue AndOp0 = N1.getOperand(0);
    2244           0 :     unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
    2245             :     unsigned DestBits = VT.getScalarSizeInBits();
    2246             : 
    2247             :     // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
    2248             :     // and similar xforms where the inner op is either ~0 or 0.
    2249           0 :     if (NumSignBits == DestBits &&
    2250           0 :         isOneConstantOrOneSplatConstant(N1->getOperand(1)))
    2251           0 :       return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0);
    2252             :   }
    2253             : 
    2254             :   // add (sext i1), X -> sub X, (zext i1)
    2255             :   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
    2256           0 :       N0.getOperand(0).getValueType() == MVT::i1 &&
    2257           0 :       !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
    2258           0 :     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
    2259           0 :     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
    2260             :   }
    2261             : 
    2262             :   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
    2263           0 :   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
    2264             :     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
    2265           0 :     if (TN->getVT() == MVT::i1) {
    2266           0 :       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
    2267           0 :                                  DAG.getConstant(1, DL, VT));
    2268           0 :       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
    2269             :     }
    2270             :   }
    2271             : 
    2272             :   // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
    2273           0 :   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
    2274           0 :       N1.getResNo() == 0)
    2275           0 :     return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
    2276           0 :                        N0, N1.getOperand(0), N1.getOperand(2));
    2277             : 
    2278             :   // (add X, Carry) -> (addcarry X, 0, Carry)
    2279           0 :   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
    2280           0 :     if (SDValue Carry = getAsCarry(TLI, N1))
    2281           0 :       return DAG.getNode(ISD::ADDCARRY, DL,
    2282           0 :                          DAG.getVTList(VT, Carry.getValueType()), N0,
    2283           0 :                          DAG.getConstant(0, DL, VT), Carry);
    2284             : 
    2285           0 :   return SDValue();
    2286             : }
    2287             : 
    2288         522 : SDValue DAGCombiner::visitADDC(SDNode *N) {
    2289         522 :   SDValue N0 = N->getOperand(0);
    2290         522 :   SDValue N1 = N->getOperand(1);
    2291         522 :   EVT VT = N0.getValueType();
    2292             :   SDLoc DL(N);
    2293             : 
    2294             :   // If the flag result is dead, turn this into an ADD.
    2295         522 :   if (!N->hasAnyUseOfValue(1))
    2296          15 :     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
    2297          45 :                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
    2298             : 
    2299             :   // canonicalize constant to RHS.
    2300             :   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
    2301             :   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
    2302         507 :   if (N0C && !N1C)
    2303           0 :     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
    2304             : 
    2305             :   // fold (addc x, 0) -> x + no carry out
    2306         507 :   if (isNullConstant(N1))
    2307          11 :     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
    2308          22 :                                         DL, MVT::Glue));
    2309             : 
    2310             :   // If it cannot overflow, transform into an add.
    2311         496 :   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
    2312          11 :     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
    2313          33 :                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
    2314             : 
    2315         485 :   return SDValue();
    2316             : }
    2317             : 
    2318           7 : static SDValue flipBoolean(SDValue V, const SDLoc &DL, EVT VT,
    2319             :                            SelectionDAG &DAG, const TargetLowering &TLI) {
    2320           7 :   SDValue Cst;
    2321           7 :   switch (TLI.getBooleanContents(VT)) {
    2322           7 :   case TargetLowering::ZeroOrOneBooleanContent:
    2323             :   case TargetLowering::UndefinedBooleanContent:
    2324           7 :     Cst = DAG.getConstant(1, DL, VT);
    2325           7 :     break;
    2326           0 :   case TargetLowering::ZeroOrNegativeOneBooleanContent:
    2327           0 :     Cst = DAG.getConstant(-1, DL, VT);
    2328           0 :     break;
    2329             :   }
    2330             : 
    2331           7 :   return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
    2332             : }
    2333             : 
    2334           0 : static bool isBooleanFlip(SDValue V, EVT VT, const TargetLowering &TLI) {
    2335           0 :   if (V.getOpcode() != ISD::XOR) return false;
    2336             :   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V.getOperand(1));
    2337           0 :   if (!Const) return false;
    2338             : 
    2339           0 :   switch(TLI.getBooleanContents(VT)) {
    2340           0 :     case TargetLowering::ZeroOrOneBooleanContent:
    2341           0 :       return Const->isOne();
    2342           0 :     case TargetLowering::ZeroOrNegativeOneBooleanContent:
    2343           0 :       return Const->isAllOnesValue();
    2344           0 :     case TargetLowering::UndefinedBooleanContent:
    2345           0 :       return (Const->getAPIntValue() & 0x01) == 1;
    2346             :   }
    2347           0 :   llvm_unreachable("Unsupported boolean content");
    2348             : }
    2349             : 
    2350      130962 : SDValue DAGCombiner::visitUADDO(SDNode *N) {
    2351      130962 :   SDValue N0 = N->getOperand(0);
    2352      130962 :   SDValue N1 = N->getOperand(1);
    2353      130962 :   EVT VT = N0.getValueType();
    2354      130962 :   if (VT.isVector())
    2355           0 :     return SDValue();
    2356             : 
    2357      261924 :   EVT CarryVT = N->getValueType(1);
    2358             :   SDLoc DL(N);
    2359             : 
    2360             :   // If the flag result is dead, turn this into an ADD.
    2361      130962 :   if (!N->hasAnyUseOfValue(1))
    2362         405 :     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
    2363         810 :                      DAG.getUNDEF(CarryVT));
    2364             : 
    2365             :   // canonicalize constant to RHS.
    2366             :   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
    2367             :   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
    2368      130557 :   if (N0C && !N1C)
    2369          10 :     return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0);
    2370             : 
    2371             :   // fold (uaddo x, 0) -> x + no carry out
    2372      130552 :   if (isNullConstant(N1))
    2373         904 :     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
    2374             : 
    2375             :   // If it cannot overflow, transform into an add.
    2376      130100 :   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
    2377         103 :     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
    2378         206 :                      DAG.getConstant(0, DL, CarryVT));
    2379             : 
    2380             :   // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
    2381      129997 :   if (isBitwiseNot(N0) && isOneConstantOrOneSplatConstant(N1)) {
    2382           4 :     SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
    2383             :                               DAG.getConstant(0, DL, VT),
    2384           4 :                               N0.getOperand(0));
    2385             :     return CombineTo(N, Sub,
    2386           4 :                      flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
    2387             :   }
    2388             : 
    2389      129993 :   if (SDValue Combined = visitUADDOLike(N0, N1, N))
    2390         231 :     return Combined;
    2391             : 
    2392      129762 :   if (SDValue Combined = visitUADDOLike(N1, N0, N))
    2393           9 :     return Combined;
    2394             : 
    2395      129753 :   return SDValue();
    2396             : }
    2397             : 
    2398           0 : SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
    2399           0 :   auto VT = N0.getValueType();
    2400             : 
    2401             :   // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
    2402             :   // If Y + 1 cannot overflow.
    2403           0 :   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
    2404           0 :     SDValue Y = N1.getOperand(0);
    2405           0 :     SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
    2406           0 :     if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
    2407           0 :       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
    2408           0 :                          N1.getOperand(2));
    2409             :   }
    2410             : 
    2411             :   // (uaddo X, Carry) -> (addcarry X, 0, Carry)
    2412           0 :   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
    2413           0 :     if (SDValue Carry = getAsCarry(TLI, N1))
    2414           0 :       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
    2415           0 :                          DAG.getConstant(0, SDLoc(N), VT), Carry);
    2416             : 
    2417           0 :   return SDValue();
    2418             : }
    2419             : 
    2420           0 : SDValue DAGCombiner::visitADDE(SDNode *N) {
    2421           0 :   SDValue N0 = N->getOperand(0);
    2422           0 :   SDValue N1 = N->getOperand(1);
    2423           0 :   SDValue CarryIn = N->getOperand(2);
    2424             : 
    2425             :   // canonicalize constant to RHS
    2426             :   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
    2427             :   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
    2428           0 :   if (N0C && !N1C)
    2429           0 :     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
    2430           0 :                        N1, N0, CarryIn);
    2431             : 
    2432             :   // fold (adde x, y, false) -> (addc x, y)
    2433           0 :   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
    2434           0 :     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
    2435             : 
    2436           0 :   return SDValue();
    2437             : }
    2438             : 
    2439      156807 : SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
    2440      156807 :   SDValue N0 = N->getOperand(0);
    2441      156807 :   SDValue N1 = N->getOperand(1);
    2442      156807 :   SDValue CarryIn = N->getOperand(2);
    2443             :   SDLoc DL(N);
    2444             : 
    2445             :   // canonicalize constant to RHS
    2446             :   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
    2447             :   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
    2448      156807 :   if (N0C && !N1C)
    2449         720 :     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
    2450             : 
    2451             :   // fold (addcarry x, y, false) -> (uaddo x, y)
    2452      156447 :   if (isNullConstant(CarryIn)) {
    2453         565 :     if (!LegalOperations ||
    2454          76 :         TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
    2455        1130 :       return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
    2456             :   }
    2457             : 
    2458      155882 :   EVT CarryVT = CarryIn.getValueType();
    2459             : 
    2460             :   // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
    2461      155882 :   if (isNullConstant(N0) && isNullConstant(N1)) {
    2462         510 :     EVT VT = N0.getValueType();
    2463         510 :     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
    2464         510 :     AddToWorklist(CarryExt.getNode());
    2465         510 :     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
    2466             :                                     DAG.getConstant(1, DL, VT)),
    2467        1020 :                      DAG.getConstant(0, DL, CarryVT));
    2468             :   }
    2469             : 
    2470             :   // fold (addcarry (xor a, -1), 0, !b) -> (subcarry 0, a, b) and flip carry.
    2471      155380 :   if (isBitwiseNot(N0) && isNullConstant(N1) &&
    2472           8 :       isBooleanFlip(CarryIn, CarryVT, TLI)) {
    2473           3 :     SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(),
    2474             :                               DAG.getConstant(0, DL, N0.getValueType()),
    2475           3 :                               N0.getOperand(0), CarryIn.getOperand(0));
    2476             :     return CombineTo(N, Sub,
    2477           3 :                      flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
    2478             :   }
    2479             : 
    2480      155369 :   if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
    2481         234 :     return Combined;
    2482             : 
    2483      155135 :   if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
    2484           0 :     return Combined;
    2485             : 
    2486      155135 :   return SDValue();
    2487             : }
    2488             : 
    2489      310504 : SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
    2490             :                                        SDNode *N) {
    2491             :   // Iff the flag result is dead:
    2492             :   // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
    2493      309060 :   if ((N0.getOpcode() == ISD::ADD ||
    2494       16535 :        (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) &&
    2495      324322 :       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
    2496         464 :     return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
    2497         232 :                        N0.getOperand(0), N0.getOperand(1), CarryIn);
    2498             : 
    2499             :   /**
    2500             :    * When one of the addcarry argument is itself a carry, we may be facing
    2501             :    * a diamond carry propagation. In which case we try to transform the DAG
    2502             :    * to ensure linear carry propagation if that is possible.
    2503             :    *
    2504             :    * We are trying to get:
    2505             :    *   (addcarry X, 0, (addcarry A, B, Z):Carry)
    2506             :    */
    2507      310272 :   if (auto Y = getAsCarry(TLI, N1)) {
    2508             :     /**
    2509             :      *            (uaddo A, B)
    2510             :      *             /       \
    2511             :      *          Carry      Sum
    2512             :      *            |          \
    2513             :      *            | (addcarry *, 0, Z)
    2514             :      *            |       /
    2515             :      *             \   Carry
    2516             :      *              |   /
    2517             :      * (addcarry X, *, *)
    2518             :      */
    2519         621 :     if (Y.getOpcode() == ISD::UADDO &&
    2520         621 :         CarryIn.getResNo() == 1 &&
    2521          12 :         CarryIn.getOpcode() == ISD::ADDCARRY &&
    2522         900 :         isNullConstant(CarryIn.getOperand(1)) &&
    2523           4 :         CarryIn.getOperand(0) == Y.getValue(0)) {
    2524           2 :       auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
    2525             :                               Y.getOperand(0), Y.getOperand(1),
    2526           2 :                               CarryIn.getOperand(2));
    2527           2 :       AddToWorklist(NewY.getNode());
    2528           4 :       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
    2529           2 :                          DAG.getConstant(0, SDLoc(N), N0.getValueType()),
    2530           2 :                          NewY.getValue(1));
    2531             :     }
    2532             :   }
    2533             : 
    2534      310270 :   return SDValue();
    2535             : }
    2536             : 
    2537             : // Since it may not be valid to emit a fold to zero for vector initializers
    2538             : // check if we can before folding.
    2539          51 : static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
    2540             :                              SelectionDAG &DAG, bool LegalOperations,
    2541             :                              bool LegalTypes) {
    2542          51 :   if (!VT.isVector())
    2543          38 :     return DAG.getConstant(0, DL, VT);
    2544          13 :   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
    2545          13 :     return DAG.getConstant(0, DL, VT);
    2546           0 :   return SDValue();
    2547             : }
    2548             : 
    2549      532128 : SDValue DAGCombiner::visitSUB(SDNode *N) {
    2550      532128 :   SDValue N0 = N->getOperand(0);
    2551      532128 :   SDValue N1 = N->getOperand(1);
    2552      532128 :   EVT VT = N0.getValueType();
    2553             :   SDLoc DL(N);
    2554             : 
    2555             :   // fold vector ops
    2556      532128 :   if (VT.isVector()) {
    2557      483137 :     if (SDValue FoldedVOp = SimplifyVBinOp(N))
    2558           4 :       return FoldedVOp;
    2559             : 
    2560             :     // fold (sub x, 0) -> x, vector edition
    2561      483133 :     if (ISD::isBuildVectorAllZeros(N1.getNode()))
    2562           7 :       return N0;
    2563             :   }
    2564             : 
    2565             :   // fold (sub x, x) -> 0
    2566             :   // FIXME: Refactor this and xor and other similar operations together.
    2567      532117 :   if (N0 == N1)
    2568          32 :     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations, LegalTypes);
    2569      542117 :   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
    2570       10032 :       DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
    2571             :     // fold (sub c1, c2) -> c1-c2
    2572           3 :     return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
    2573           3 :                                       N1.getNode());
    2574             :   }
    2575             : 
    2576      532082 :   if (SDValue NewSel = foldBinOpIntoSelect(N))
    2577          11 :     return NewSel;
    2578             : 
    2579      532071 :   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
    2580             : 
    2581             :   // fold (sub x, c) -> (add x, -c)
    2582             :   if (N1C) {
    2583        8287 :     return DAG.getNode(ISD::ADD, DL, VT, N0,
    2584       24861 :                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
    2585             :   }
    2586             : 
    2587      523784 :   if (isNullConstantOrNullSplatConstant(N0)) {
    2588             :     unsigned BitWidth = VT.getScalarSizeInBits();
    2589             :     // Right-shifting everything out but the sign bit followed by negation is
    2590             :     // the same as flipping arithmetic/logical shift type without the negation:
    2591             :     // -(X >>u 31) -> (X >>s 31)
    2592             :     // -(X >>s 31) -> (X >>u 31)
    2593       10050 :     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
    2594         135 :       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
    2595         267 :       if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
    2596          11 :         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
    2597          11 :         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
    2598          22 :           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
    2599             :       }
    2600             :     }
    2601             : 
    2602             :     // 0 - X --> 0 if the sub is NUW.
    2603        5014 :     if (N->getFlags().hasNoUnsignedWrap())
    2604           2 :       return N0;
    2605             : 
    2606       15036 :     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
    2607             :       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
    2608             :       // N1 must be 0 because negating the minimum signed value is undefined.
    2609           8 :       if (N->getFlags().hasNoSignedWrap())
    2610           2 :         return N0;
    2611             : 
    2612             :       // 0 - X --> X if X is 0 or the minimum signed value.
    2613           6 :       return N1;
    2614             :     }
    2615             :   }
    2616             : 
    2617             :   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
    2618      523763 :   if (isAllOnesConstantOrAllOnesSplatConstant(N0))
    2619         134 :     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
    2620             : 
    2621             :   // fold (A - (0-B)) -> A+B
    2622     1048146 :   if (N1.getOpcode() == ISD::SUB &&
    2623         754 :       isNullConstantOrNullSplatConstant(N1.getOperand(0)))
    2624          87 :     return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
    2625             : 
    2626             :   // fold A-(A-B) -> B
    2627     1047334 :   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
    2628         212 :     return N1.getOperand(1);
    2629             : 
    2630             :   // fold (A+B)-A -> B
    2631      523455 :   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
    2632          22 :     return N0.getOperand(1);
    2633             : 
    2634             :   // fold (A+B)-B -> A
    2635      523433 :   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
    2636           3 :     return N0.getOperand(0);
    2637             : 
    2638             :   // fold C2-(A+C1) -> (C2-C1)-A
    2639      523430 :   if (N1.getOpcode() == ISD::ADD) {
    2640         597 :     SDValue N11 = N1.getOperand(1);
    2641         784 :     if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
    2642         187 :         isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
    2643         288 :       SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
    2644         432 :       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
    2645             :     }
    2646             :   }
    2647             : 
    2648             :   // fold ((A+(B+or-C))-B) -> A+or-C
    2649        1777 :   if (N0.getOpcode() == ISD::ADD &&
    2650        1777 :       (N0.getOperand(1).getOpcode() == ISD::SUB ||
    2651      523286 :        N0.getOperand(1).getOpcode() == ISD::ADD) &&
    2652          46 :       N0.getOperand(1).getOperand(0) == N1)
    2653           7 :     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
    2654           7 :                        N0.getOperand(1).getOperand(1));
    2655             : 
    2656             :   // fold ((A+(C+B))-B) -> A+C
    2657      523279 :   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
    2658           7 :       N0.getOperand(1).getOperand(1) == N1)
    2659           1 :     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
    2660           1 :                        N0.getOperand(1).getOperand(0));
    2661             : 
    2662             :   // fold ((A-(B-C))-C) -> A-B
    2663      523278 :   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
    2664           3 :       N0.getOperand(1).getOperand(1) == N1)
    2665           3 :     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
    2666           3 :                        N0.getOperand(1).getOperand(0));
    2667             : 
    2668             :   // fold (A-(B-C)) -> A+(C-B)
    2669     1046550 :   if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
    2670         347 :     return DAG.getNode(ISD::ADD, DL, VT, N0,
    2671             :                        DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
    2672         694 :                                    N1.getOperand(0)));
    2673             : 
    2674             :   // fold (X - (-Y * Z)) -> (X + (Y * Z))
    2675     1045856 :   if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
    2676        6864 :     if (N1.getOperand(0).getOpcode() == ISD::SUB &&
    2677           9 :         isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0))) {
    2678           9 :       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
    2679           9 :                                 N1.getOperand(0).getOperand(1),
    2680          18 :                                 N1.getOperand(1));
    2681          18 :       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
    2682             :     }
    2683        6828 :     if (N1.getOperand(1).getOpcode() == ISD::SUB &&
    2684           0 :         isNullConstantOrNullSplatConstant(N1.getOperand(1).getOperand(0))) {
    2685           0 :       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
    2686             :                                 N1.getOperand(0),
    2687           0 :                                 N1.getOperand(1).getOperand(1));
    2688           0 :       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
    2689             :     }
    2690             :   }
    2691             : 
    2692             :   // If either operand of a sub is undef, the result is undef
    2693      522919 :   if (N0.isUndef())
    2694           0 :     return N0;
    2695     1045838 :   if (N1.isUndef())
    2696           0 :     return N1;
    2697             : 
    2698      522919 :   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
    2699           9 :     return V;
    2700             : 
    2701      522910 :   if (SDValue V = foldAddSubOfSignBit(N, DAG))
    2702           9 :     return V;
    2703             : 
    2704             :   // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
    2705      522901 :   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
    2706       30009 :     if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
    2707           2 :       SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
    2708           2 :       SDValue S0 = N1.getOperand(0);
    2709             :       if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
    2710             :         unsigned OpSizeInBits = VT.getScalarSizeInBits();
    2711           4 :         if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
    2712           4 :           if (C->getAPIntValue() == (OpSizeInBits - 1))
    2713           4 :             return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
    2714             :       }
    2715             :     }
    2716             :   }
    2717             : 
    2718             :   // If the relocation model supports it, consider symbol offsets.
    2719             :   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
    2720          11 :     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
    2721             :       // fold (sub Sym, c) -> Sym-c
    2722             :       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
    2723             :         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
    2724             :                                     GA->getOffset() -
    2725             :                                         (uint64_t)N1C->getSExtValue());
    2726             :       // fold (sub Sym+c1, Sym+c2) -> c1-c2
    2727             :       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
    2728           0 :         if (GA->getGlobal() == GB->getGlobal())
    2729           0 :           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
    2730           0 :                                  DL, VT);
    2731             :     }
    2732             : 
    2733             :   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
    2734     1045798 :   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
    2735             :     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
    2736             :     if (TN->getVT() == MVT::i1) {
    2737          58 :       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
    2738          58 :                                  DAG.getConstant(1, DL, VT));
    2739         116 :       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
    2740             :     }
    2741             :   }
    2742             : 
    2743             :   // Prefer an add for more folding potential and possibly better codegen:
    2744             :   // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
    2745      522841 :   if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
    2746          75 :     SDValue ShAmt = N1.getOperand(1);
    2747          75 :     ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
    2748         150 :     if (ShAmtC && ShAmtC->getZExtValue() == N1.getScalarValueSizeInBits() - 1) {
    2749          45 :       SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
    2750          30 :       return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
    2751             :     }
    2752             :   }
    2753             : 
    2754      522826 :   return SDValue();
    2755             : }
    2756             : 
    2757          89 : SDValue DAGCombiner::visitSUBC(SDNode *N) {
    2758          89 :   SDValue N0 = N->getOperand(0);
    2759          89 :   SDValue N1 = N->getOperand(1);
    2760          89 :   EVT VT = N0.getValueType();
    2761             :   SDLoc DL(N);
    2762             : 
    2763             :   // If the flag result is dead, turn this into an SUB.
    2764          89 :   if (!N->hasAnyUseOfValue(1))
    2765          24 :     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
    2766          72 :                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
    2767             : 
    2768             :   // fold (subc x, x) -> 0 + no borrow
    2769             :   if (N0 == N1)
    2770           0 :     return CombineTo(N, DAG.getConstant(0, DL, VT),
    2771           0 :                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
    2772             : 
    2773             :   // fold (subc x, 0) -> x + no borrow
    2774          65 :   if (isNullConstant(N1))
    2775           0 :     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
    2776             : 
    2777             :   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
    2778          65 :   if (isAllOnesConstant(N0))
    2779           0 :     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
    2780           0 :                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
    2781             : 
    2782          65 :   return SDValue();
    2783             : }
    2784             : 
    2785        1913 : SDValue DAGCombiner::visitUSUBO(SDNode *N) {
    2786        1913 :   SDValue N0 = N->getOperand(0);
    2787        1913 :   SDValue N1 = N->getOperand(1);
    2788        1913 :   EVT VT = N0.getValueType();
    2789        1913 :   if (VT.isVector())
    2790           0 :     return SDValue();
    2791             : 
    2792        3826 :   EVT CarryVT = N->getValueType(1);
    2793             :   SDLoc DL(N);
    2794             : 
    2795             :   // If the flag result is dead, turn this into an SUB.
    2796        1913 :   if (!N->hasAnyUseOfValue(1))
    2797         106 :     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
    2798         212 :                      DAG.getUNDEF(CarryVT));
    2799             : 
    2800             :   // fold (usubo x, x) -> 0 + no borrow
    2801             :   if (N0 == N1)
    2802           7 :     return CombineTo(N, DAG.getConstant(0, DL, VT),
    2803          14 :                      DAG.getConstant(0, DL, CarryVT));
    2804             : 
    2805             :   // fold (usubo x, 0) -> x + no borrow
    2806        1800 :   if (isNullConstant(N1))
    2807          32 :     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
    2808             : 
    2809             :   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
    2810        1784 :   if (isAllOnesConstant(N0))
    2811           4 :     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
    2812           8 :                      DAG.getConstant(0, DL, CarryVT));
    2813             : 
    2814        1780 :   return SDValue();
    2815             : }
    2816             : 
    2817           0 : SDValue DAGCombiner::visitSUBE(SDNode *N) {
    2818           0 :   SDValue N0 = N->getOperand(0);
    2819           0 :   SDValue N1 = N->getOperand(1);
    2820           0 :   SDValue CarryIn = N->getOperand(2);
    2821             : 
    2822             :   // fold (sube x, y, false) -> (subc x, y)
    2823           0 :   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
    2824           0 :     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
    2825             : 
    2826           0 :   return SDValue();
    2827             : }
    2828             : 
    2829         839 : SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
    2830         839 :   SDValue N0 = N->getOperand(0);
    2831         839 :   SDValue N1 = N->getOperand(1);
    2832         839 :   SDValue CarryIn = N->getOperand(2);
    2833             : 
    2834             :   // fold (subcarry x, y, false) -> (usubo x, y)
    2835         839 :   if (isNullConstant(CarryIn)) {
    2836         120 :     if (!LegalOperations ||
    2837         109 :         TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
    2838         240 :       return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
    2839             :   }
    2840             : 
    2841         719 :   return SDValue();
    2842             : }
    2843             : 
    2844       39729 : SDValue DAGCombiner::visitMUL(SDNode *N) {
    2845       39729 :   SDValue N0 = N->getOperand(0);
    2846       39729 :   SDValue N1 = N->getOperand(1);
    2847       79458 :   EVT VT = N0.getValueType();
    2848             : 
    2849             :   // fold (mul x, undef) -> 0
    2850       39729 :   if (N0.isUndef() || N1.isUndef())
    2851          56 :     return DAG.getConstant(0, SDLoc(N), VT);
    2852             : 
    2853             :   bool N0IsConst = false;
    2854             :   bool N1IsConst = false;
    2855             :   bool N1IsOpaqueConst = false;
    2856             :   bool N0IsOpaqueConst = false;
    2857             :   APInt ConstValue0, ConstValue1;
    2858             :   // fold vector ops
    2859       39701 :   if (VT.isVector()) {
    2860        9791 :     if (SDValue FoldedVOp = SimplifyVBinOp(N))
    2861          12 :       return FoldedVOp;
    2862             : 
    2863        9779 :     N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
    2864        9779 :     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
    2865             :     assert((!N0IsConst ||
    2866             :             ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
    2867             :            "Splat APInt should be element width");
    2868             :     assert((!N1IsConst ||
    2869             :             ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
    2870             :            "Splat APInt should be element width");
    2871             :   } else {
    2872             :     N0IsConst = isa<ConstantSDNode>(N0);
    2873             :     if (N0IsConst) {
    2874          12 :       ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
    2875           6 :       N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
    2876             :     }
    2877             :     N1IsConst = isa<ConstantSDNode>(N1);
    2878             :     if (N1IsConst) {
    2879       35058 :       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
    2880       17529 :       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
    2881             :     }
    2882             :   }
    2883             : 
    2884             :   // fold (mul c1, c2) -> c1*c2
    2885       39689 :   if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
    2886           6 :     return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
    2887           6 :                                       N0.getNode(), N1.getNode());
    2888             : 
    2889             :   // canonicalize constant to RHS (vector doesn't have to splat)
    2890       39709 :   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
    2891          26 :      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
    2892          52 :     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
    2893             :   // fold (mul x, 0) -> 0
    2894       58438 :   if (N1IsConst && ConstValue1.isNullValue())
    2895          25 :     return N1;
    2896             :   // fold (mul x, 1) -> x
    2897       58388 :   if (N1IsConst && ConstValue1.isOneValue())
    2898        2143 :     return N0;
    2899             : 
    2900       37489 :   if (SDValue NewSel = foldBinOpIntoSelect(N))
    2901           3 :     return NewSel;
    2902             : 
    2903             :   // fold (mul x, -1) -> 0-x
    2904       54096 :   if (N1IsConst && ConstValue1.isAllOnesValue()) {
    2905             :     SDLoc DL(N);
    2906          97 :     return DAG.getNode(ISD::SUB, DL, VT,
    2907          97 :                        DAG.getConstant(0, DL, VT), N0);
    2908             :   }
    2909             :   // fold (mul x, (1 << c)) -> x << c
    2910       54572 :   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
    2911       41555 :       DAG.isKnownToBeAPowerOfTwo(N1) &&
    2912         286 :       (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
    2913             :     SDLoc DL(N);
    2914        4166 :     SDValue LogBase2 = BuildLogBase2(N1, DL);
    2915        8332 :     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
    2916        4166 :     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
    2917        8332 :     return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
    2918             :   }
    2919             :   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
    2920       82682 :   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
    2921          85 :     unsigned Log2Val = (-ConstValue1).logBase2();
    2922             :     SDLoc DL(N);
    2923             :     // FIXME: If the input is something that is easily negated (e.g. a
    2924             :     // single-use add), we should put the negate there.
    2925          85 :     return DAG.getNode(ISD::SUB, DL, VT,
    2926          85 :                        DAG.getConstant(0, DL, VT),
    2927             :                        DAG.getNode(ISD::SHL, DL, VT, N0,
    2928             :                             DAG.getConstant(Log2Val, DL,
    2929         170 :                                       getShiftAmountTy(N0.getValueType()))));
    2930             :   }
    2931             : 
    2932             :   // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub.
    2933             :   // mul x, (2^N + 1) --> add (shl x, N), x
    2934             :   // mul x, (2^N - 1) --> sub (shl x, N), x
    2935             :   // Examples: x * 33 --> (x << 5) + x
    2936             :   //           x * 15 --> (x << 4) - x
    2937             :   //           x * -33 --> -((x << 5) + x)
    2938             :   //           x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
    2939       33138 :   if (N1IsConst && TLI.decomposeMulByConstant(VT, N1)) {
    2940             :     // TODO: We could handle more general decomposition of any constant by
    2941             :     //       having the target set a limit on number of ops and making a
    2942             :     //       callback to determine that sequence (similar to sqrt expansion).
    2943             :     unsigned MathOp = ISD::DELETED_NODE;
    2944          66 :     APInt MulC = ConstValue1.abs();
    2945          66 :     if ((MulC - 1).isPowerOf2())
    2946             :       MathOp = ISD::ADD;
    2947          44 :     else if ((MulC + 1).isPowerOf2())
    2948             :       MathOp = ISD::SUB;
    2949             : 
    2950             :     if (MathOp != ISD::DELETED_NODE) {
    2951         154 :       unsigned ShAmt = MathOp == ISD::ADD ? (MulC - 1).logBase2()
    2952         154 :                                           : (MulC + 1).logBase2();
    2953             :       assert(ShAmt > 0 && ShAmt < VT.getScalarSizeInBits() &&
    2954             :              "Not expecting multiply-by-constant that could have simplified");
    2955             :       SDLoc DL(N);
    2956          66 :       SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, N0,
    2957          66 :                                 DAG.getConstant(ShAmt, DL, VT));
    2958         132 :       SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
    2959          66 :       if (ConstValue1.isNegative())
    2960          32 :         R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
    2961          66 :       return R;
    2962             :     }
    2963             :   }
    2964             : 
    2965             :   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
    2966       33094 :   if (N0.getOpcode() == ISD::SHL &&
    2967       33077 :       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
    2968          10 :       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
    2969          10 :     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
    2970           5 :     if (isConstantOrConstantVector(C3))
    2971          15 :       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
    2972             :   }
    2973             : 
    2974             :   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
    2975             :   // use.
    2976             :   {
    2977             :     SDValue Sh(nullptr, 0), Y(nullptr, 0);
    2978             : 
    2979             :     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
    2980       33084 :     if (N0.getOpcode() == ISD::SHL &&
    2981       33067 :         isConstantOrConstantVector(N0.getOperand(1)) &&
    2982          15 :         N0.getNode()->hasOneUse()) {
    2983          12 :       Sh = N0; Y = N1;
    2984       33064 :     } else if (N1.getOpcode() == ISD::SHL &&
    2985       33055 :                isConstantOrConstantVector(N1.getOperand(1)) &&
    2986           6 :                N1.getNode()->hasOneUse()) {
    2987           3 :       Sh = N1; Y = N0;
    2988             :     }
    2989             : 
    2990       33067 :     if (Sh.getNode()) {
    2991          16 :       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
    2992          31 :       return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
    2993             :     }
    2994             :   }
    2995             : 
    2996             :   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
    2997       46132 :   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
    2998       13581 :       N0.getOpcode() == ISD::ADD &&
    2999       34245 :       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
    3000         191 :       isMulAddWithConstProfitable(N, N0, N1))
    3001         184 :       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
    3002          92 :                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
    3003          92 :                                      N0.getOperand(0), N1),
    3004          92 :                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
    3005         368 :                                      N0.getOperand(1), N1));
    3006             : 
    3007             :   // reassociate mul
    3008       41538 :   if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
    3009         542 :     return RMUL;
    3010             : 
    3011       32418 :   return SDValue();
    3012             : }
    3013             : 
    3014             : /// Return true if divmod libcall is available.
    3015        1431 : static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
    3016             :                                      const TargetLowering &TLI) {
    3017             :   RTLIB::Libcall LC;
    3018        1431 :   EVT NodeType = Node->getValueType(0);
    3019        1431 :   if (!NodeType.isSimple())
    3020             :     return false;
    3021        1431 :   switch (NodeType.getSimpleVT().SimpleTy) {
    3022             :   default: return false; // No libcall for vector types.
    3023           0 :   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
    3024           4 :   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
    3025        1232 :   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
    3026         188 :   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
    3027           7 :   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
    3028             :   }
    3029             : 
    3030        1431 :   return TLI.getLibcallName(LC) != nullptr;
    3031             : }
    3032             : 
    3033             : /// Issue divrem if both quotient and remainder are needed.
    3034        5726 : SDValue DAGCombiner::useDivRem(SDNode *Node) {
    3035        5726 :   if (Node->use_empty())
    3036           0 :     return SDValue(); // This is a dead node, leave it alone.
    3037             : 
    3038        5726 :   unsigned Opcode = Node->getOpcode();
    3039        5726 :   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
    3040        5726 :   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
    3041             : 
    3042             :   // DivMod lib calls can still work on non-legal types if using lib-calls.
    3043       11452 :   EVT VT = Node->getValueType(0);
    3044        5726 :   if (VT.isVector() || !VT.isInteger())
    3045         499 :     return SDValue();
    3046             : 
    3047        6059 :   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
    3048         801 :     return SDValue();
    3049             : 
    3050             :   // If DIVREM is going to get expanded into a libcall,
    3051             :   // but there is no libcall available, then don't combine.
    3052        4426 :   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
    3053        1431 :       !isDivRemLibcallAvailable(Node, isSigned, TLI))
    3054        1309 :     return SDValue();
    3055             : 
    3056             :   // If div is legal, it's better to do the normal expansion
    3057             :   unsigned OtherOpcode = 0;
    3058        3117 :   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
    3059        1634 :     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
    3060        1634 :     if (TLI.isOperationLegalOrCustom(Opcode, VT))
    3061         211 :       return SDValue();
    3062             :   } else {
    3063        1483 :     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
    3064        1483 :     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
    3065         176 :       return SDValue();
    3066             :   }
    3067             : 
    3068        2730 :   SDValue Op0 = Node->getOperand(0);
    3069        2730 :   SDValue Op1 = Node->getOperand(1);
    3070             :   SDValue combined;
    3071        2730 :   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
    3072        6389 :          UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
    3073             :     SDNode *User = *UI;
    3074        3659 :     if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
    3075         929 :         User->use_empty())
    3076             :       continue;
    3077             :     // Convert the other matching node(s), too;
    3078             :     // otherwise, the DIVREM may get target-legalized into something
    3079             :     // target-specific that we won't be able to recognize.
    3080             :     unsigned UserOpc = User->getOpcode();
    3081         410 :     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
    3082         948 :         User->getOperand(0) == Op0 &&
    3083         384 :         User->getOperand(1) == Op1) {
    3084         168 :       if (!combined) {
    3085         168 :         if (UserOpc == OtherOpcode) {
    3086         168 :           SDVTList VTs = DAG.getVTList(VT, VT);
    3087         182 :           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
    3088           0 :         } else if (UserOpc == DivRemOpc) {
    3089             :           combined = SDValue(User, 0);
    3090             :         } else {
    3091             :           assert(UserOpc == Opcode);
    3092             :           continue;
    3093             :         }
    3094             :       }
    3095         168 :       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
    3096         138 :         CombineTo(User, combined);
    3097          30 :       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
    3098          30 :         CombineTo(User, combined.getValue(1));
    3099             :     }
    3100             :   }
    3101        2730 :   return combined;
    3102             : }
    3103             : 
    3104       15164 : static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
    3105       15164 :   SDValue N0 = N->getOperand(0);
    3106       15164 :   SDValue N1 = N->getOperand(1);
    3107       30328 :   EVT VT = N->getValueType(0);
    3108             :   SDLoc DL(N);
    3109             : 
    3110       15164 :   unsigned Opc = N->getOpcode();
    3111       15164 :   bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
    3112       15164 :   ConstantSDNode *N1C = isConstOrConstSplat(N1);
    3113             : 
    3114             :   // X / undef -> undef
    3115             :   // X % undef -> undef
    3116             :   // X / 0 -> undef
    3117             :   // X % 0 -> undef
    3118             :   // NOTE: This includes vectors where any divisor element is zero/undef.
    3119       30328 :   if (DAG.isUndef(Opc, {N0, N1}))
    3120           1 :     return DAG.getUNDEF(VT);
    3121             : 
    3122             :   // undef / X -> 0
    3123             :   // undef % X -> 0
    3124       15163 :   if (N0.isUndef())
    3125           0 :     return DAG.getConstant(0, DL, VT);
    3126             : 
    3127             :   // TODO: 0 / X -> 0
    3128             :   // TODO: 0 % X -> 0
    3129             : 
    3130             :   // X / X -> 1
    3131             :   // X % X -> 0
    3132             :   if (N0 == N1)
    3133          62 :     return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
    3134             : 
    3135             :   // X / 1 -> X
    3136             :   // X % 1 -> 0
    3137       24497 :   if (N1C && N1C->isOne())
    3138          48 :     return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
    3139             :   // If this is a boolean op (single-bit element type), we can't have
    3140             :   // division-by-zero or remainder-by-zero, so assume the divisor is 1.
    3141             :   // Similarly, if we're zero-extending a boolean divisor, then assume it's a 1.
    3142             : 
    3143       15071 :   return SDValue();
    3144             : }
    3145             : 
    3146        5883 : SDValue DAGCombiner::visitSDIV(SDNode *N) {
    3147        5883 :   SDValue N0 = N->getOperand(0);
    3148        5883 :   SDValue N1 = N->getOperand(1);
    3149        5883 :   EVT VT = N->getValueType(0);
    3150        5883 :   EVT CCVT = getSetCCResultType(VT);
    3151             : 
    3152             :   // fold vector ops
    3153        5883 :   if (VT.isVector())
    3154         425 :     if (SDValue FoldedVOp = SimplifyVBinOp(N))
    3155           1 :       return FoldedVOp;
    3156             : 
    3157             :   SDLoc DL(N);
    3158             : 
    3159             :   // fold (sdiv c1, c2) -> c1/c2
    3160        5882 :   ConstantSDNode *N0C = isConstOrConstSplat(N0);
    3161        5882 :   ConstantSDNode *N1C = isConstOrConstSplat(N1);
    3162        5882 :   if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
    3163           0 :     return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
    3164             :   // fold (sdiv X, -1) -> 0-X
    3165       10084 :   if (N1C && N1C->isAllOnesValue())
    3166          28 :     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
    3167             :   // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
    3168        5868 :   if (N1C && N1C->getAPIntValue().isMinSignedValue())
    3169          28 :     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
    3170          14 :                          DAG.getConstant(1, DL, VT),
    3171          14 :                          DAG.getConstant(0, DL, VT));
    3172             : 
    3173        5854 :   if (SDValue V = simplifyDivRem(N, DAG))
    3174          29 :     return V;
    3175             : 
    3176        5825 :   if (SDValue NewSel = foldBinOpIntoSelect(N))
    3177           7 :     return NewSel;
    3178             : 
    3179             :   // If we know the sign bits of both operands are zero, strength reduce to a
    3180             :   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
    3181        5818 :   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
    3182          50 :     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
    3183             : 
    3184        5793 :   if (SDValue V = visitSDIVLike(N0, N1, N))
    3185        4190 :     return V;
    3186             : 
    3187             :   // sdiv, srem -> sdivrem
    3188             :   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
    3189             :   // true.  Otherwise, we break the simplification logic in visitREM().
    3190        1603 :   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
    3191        1603 :   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
    3192        1531 :     if (SDValue DivRem = useDivRem(N))
    3193          14 :         return DivRem;
    3194             : 
    3195        1589 :   return SDValue();
    3196             : }
    3197             : 
    3198        6155 : SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
    3199             :   SDLoc DL(N);
    3200        6155 :   EVT VT = N->getValueType(0);
    3201        6155 :   EVT CCVT = getSetCCResultType(VT);
    3202             :   unsigned BitWidth = VT.getScalarSizeInBits();
    3203             : 
    3204             :   // Helper for determining whether a value is a power-2 constant scalar or a
    3205             :   // vector of such elements.
    3206             :   auto IsPowerOfTwo = [](ConstantSDNode *C) {
    3207             :     if (C->isNullValue() || C->isOpaque())
    3208             :       return false;
    3209             :     if (C->getAPIntValue().isPowerOf2())
    3210             :       return true;
    3211             :     if ((-C->getAPIntValue()).isPowerOf2())
    3212             :       return true;
    3213             :     return false;
    3214             :   };
    3215             : 
    3216             :   // fold (sdiv X, pow2) -> simple ops after legalize
    3217             :   // FIXME: We check for the exact bit here because the generic lowering gives
    3218             :   // better results in that case. The target-specific lowering should learn how
    3219             :   // to handle exact sdivs efficiently.
    3220        8822 :   if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
    3221             :     // Target-specific implementation of sdiv x, pow2.
    3222         595 :     if (SDValue Res = BuildSDIVPow2(N))
    3223          32 :       return Res;
    3224             : 
    3225             :     // Create constants that are functions of the shift amount value.
    3226        1126 :     EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
    3227         563 :     SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
    3228        1126 :     SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
    3229         563 :     C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
    3230        1126 :     SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
    3231         563 :     if (!isConstantOrConstantVector(Inexact))
    3232           0 :       return SDValue();
    3233             : 
    3234             :     // Splat the sign bit into the register
    3235         563 :     SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
    3236         563 :                                DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
    3237         563 :     AddToWorklist(Sign.getNode());
    3238             : 
    3239             :     // Add (N0 < 0) ? abs2 - 1 : 0;
    3240        1126 :     SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
    3241         563 :     AddToWorklist(Srl.getNode());
    3242        1126 :     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
    3243         563 :     AddToWorklist(Add.getNode());
    3244        1126 :     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
    3245         563 :     AddToWorklist(Sra.getNode());
    3246             : 
    3247             :     // Special case: (sdiv X, 1) -> X
    3248             :     // Special Case: (sdiv X, -1) -> 0-X
    3249         563 :     SDValue One = DAG.getConstant(1, DL, VT);
    3250         563 :     SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
    3251         563 :     SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
    3252         563 :     SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
    3253        1126 :     SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
    3254         563 :     Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
    3255             : 
    3256             :     // If dividing by a positive value, we're done. Otherwise, the result must
    3257             :     // be negated.
    3258         563 :     SDValue Zero = DAG.getConstant(0, DL, VT);
    3259        1126 :     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
    3260             : 
    3261             :     // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
    3262         563 :     SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
    3263         563 :     SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
    3264         563 :     return Res;
    3265             :   }
    3266             : 
    3267             :   // If integer divide is expensive and we satisfy the requirements, emit an
    3268             :   // alternate sequence.  Targets may check function attributes for size/speed
    3269             :   // trade-offs.
    3270        5560 :   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
    3271        9629 :   if (isConstantOrConstantVector(N1) &&
    3272        8138 :       !TLI.isIntDivCheap(N->getValueType(0), Attr))
    3273        4025 :     if (SDValue Op = BuildSDIV(N))
    3274        3895 :       return Op;
    3275             : 
    3276        1665 :   return SDValue();
    3277             : }
    3278             : 
    3279        4531 : SDValue DAGCombiner::visitUDIV(SDNode *N) {
    3280        4531 :   SDValue N0 = N->getOperand(0);
    3281        4531 :   SDValue N1 = N->getOperand(1);
    3282        4531 :   EVT VT = N->getValueType(0);
    3283        4531 :   EVT CCVT = getSetCCResultType(VT);
    3284             : 
    3285             :   // fold vector ops
    3286        4531 :   if (VT.isVector())
    3287         268 :     if (SDValue FoldedVOp = SimplifyVBinOp(N))
    3288           0 :       return FoldedVOp;
    3289             : 
    3290             :   SDLoc DL(N);
    3291             : 
    3292             :   // fold (udiv c1, c2) -> c1/c2
    3293        4531 :   ConstantSDNode *N0C = isConstOrConstSplat(N0);
    3294        4531 :   ConstantSDNode *N1C = isConstOrConstSplat(N1);
    3295        4531 :   if (N0C && N1C)
    3296           0 :     if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
    3297           0 :                                                     N0C, N1C))
    3298           0 :       return Folded;
    3299             :   // fold (udiv X, -1) -> select(X == -1, 1, 0)
    3300        7414 :   if (N1C && N1C->getAPIntValue().isAllOnesValue())
    3301          22 :     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
    3302          11 :                          DAG.getConstant(1, DL, VT),
    3303          11 :                          DAG.getConstant(0, DL, VT));
    3304             : 
    3305        4520 :   if (SDValue V = simplifyDivRem(N, DAG))
    3306          21 :     return V;
    3307             : 
    3308        4499 :   if (SDValue NewSel = foldBinOpIntoSelect(N))
    3309           7 :     return NewSel;
    3310             : 
    3311        4492 :   if (SDValue V = visitUDIVLike(N0, N1, N))
    3312        2805 :     return V;
    3313             : 
    3314             :   // sdiv, srem -> sdivrem
    3315             :   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
    3316             :   // true.  Otherwise, we break the simplification logic in visitREM().
    3317        1687 :   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
    3318        1687 :   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
    3319        1623 :     if (SDValue DivRem = useDivRem(N))
    3320          16 :         return DivRem;
    3321             : 
    3322        1671 :   return SDValue();
    3323             : }
    3324             : 
    3325        5033 : SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
    3326             :   SDLoc DL(N);
    3327        5033 :   EVT VT = N->getValueType(0);
    3328             : 
    3329             :   // fold (udiv x, (1 << c)) -> x >>u c
    3330        8471 :   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
    3331        3438 :       DAG.isKnownToBeAPowerOfTwo(N1)) {
    3332        2438 :     SDValue LogBase2 = BuildLogBase2(N1, DL);
    3333        2438 :     AddToWorklist(LogBase2.getNode());
    3334             : 
    3335        4876 :     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
    3336        2438 :     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
    3337        2438 :     AddToWorklist(Trunc.getNode());
    3338        4876 :     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
    3339             :   }
    3340             : 
    3341             :   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
    3342        2595 :   if (N1.getOpcode() == ISD::SHL) {
    3343          15 :     SDValue N10 = N1.getOperand(0);
    3344          30 :     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
    3345          15 :         DAG.isKnownToBeAPowerOfTwo(N10)) {
    3346          15 :       SDValue LogBase2 = BuildLogBase2(N10, DL);
    3347          15 :       AddToWorklist(LogBase2.getNode());
    3348             : 
    3349          15 :       EVT ADDVT = N1.getOperand(1).getValueType();
    3350          15 :       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
    3351          15 :       AddToWorklist(Trunc.getNode());
    3352          30 :       SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
    3353          15 :       AddToWorklist(Add.getNode());
    3354          30 :       return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
    3355             :     }
    3356             :   }
    3357             : 
    3358             :   // fold (udiv x, c) -> alternate
    3359        2580 :   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
    3360        3580 :   if (isConstantOrConstantVector(N1) &&
    3361        2000 :       !TLI.isIntDivCheap(N->getValueType(0), Attr))
    3362         957 :     if (SDValue Op = BuildUDIV(N))
    3363         812 :       return Op;
    3364             : 
    3365        1768 :   return SDValue();
    3366             : }
    3367             : 
    3368             : // handles ISD::SREM and ISD::UREM
    3369        4801 : SDValue DAGCombiner::visitREM(SDNode *N) {
    3370        4801 :   unsigned Opcode = N->getOpcode();
    3371        4801 :   SDValue N0 = N->getOperand(0);
    3372        4801 :   SDValue N1 = N->getOperand(1);
    3373        4801 :   EVT VT = N->getValueType(0);
    3374        4801 :   EVT CCVT = getSetCCResultType(VT);
    3375             : 
    3376             :   bool isSigned = (Opcode == ISD::SREM);
    3377             :   SDLoc DL(N);
    3378             : 
    3379             :   // fold (rem c1, c2) -> c1%c2
    3380        4801 :   ConstantSDNode *N0C = isConstOrConstSplat(N0);
    3381        4801 :   ConstantSDNode *N1C = isConstOrConstSplat(N1);
    3382        4801 :   if (N0C && N1C)
    3383           2 :     if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
    3384           2 :       return Folded;
    3385             :   // fold (urem X, -1) -> select(X == -1, 0, x)
    3386        6732 :   if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
    3387          18 :     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
    3388           9 :                          DAG.getConstant(0, DL, VT), N0);
    3389             : 
    3390        4790 :   if (SDValue V = simplifyDivRem(N, DAG))
    3391          43 :     return V;
    3392             : 
    3393        4747 :   if (SDValue NewSel = foldBinOpIntoSelect(N))
    3394          14 :     return NewSel;
    3395             : 
    3396        4733 :   if (isSigned) {
    3397             :     // If we know the sign bits of both operands are zero, strength reduce to a
    3398             :     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
    3399        1577 :     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
    3400          40 :       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
    3401             :   } else {
    3402        3156 :     SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
    3403        3156 :     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
    3404             :       // fold (urem x, pow2) -> (and x, pow2-1)
    3405        2750 :       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
    3406        1375 :       AddToWorklist(Add.getNode());
    3407        2750 :       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
    3408             :     }
    3409        1789 :     if (N1.getOpcode() == ISD::SHL &&
    3410          16 :         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
    3411             :       // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
    3412          12 :       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
    3413           6 :       AddToWorklist(Add.getNode());
    3414          12 :       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
    3415             :     }
    3416             :   }
    3417             : 
    3418        3332 :   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
    3419             : 
    3420             :   // If X/C can be simplified by the division-by-constant logic, lower
    3421             :   // X%C to the equivalent of X-X/C*C.
    3422             :   // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
    3423             :   // speculative DIV must not cause a DIVREM conversion.  We guard against this
    3424             :   // by skipping the simplification if isIntDivCheap().  When div is not cheap,
    3425             :   // combine will not return a DIVREM.  Regardless, checking cheapness here
    3426             :   // makes sense since the simplification results in fatter code.
    3427        3332 :   if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
    3428             :     SDValue OptimizedDiv =
    3429         903 :         isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
    3430         903 :     if (OptimizedDiv.getNode()) {
    3431        1520 :       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
    3432        1520 :       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
    3433         760 :       AddToWorklist(OptimizedDiv.getNode());
    3434         760 :       AddToWorklist(Mul.getNode());
    3435         760 :       return Sub;
    3436             :     }
    3437             :   }
    3438             : 
    3439             :   // sdiv, srem -> sdivrem
    3440        2572 :   if (SDValue DivRem = useDivRem(N))
    3441         138 :     return DivRem.getValue(1);
    3442             : 
    3443        2434 :   return SDValue();
    3444             : }
    3445             : 
    3446        1513 : SDValue DAGCombiner::visitMULHS(SDNode *N) {
    3447        1513 :   SDValue N0 = N->getOperand(0);
    3448        1513 :   SDValue N1 = N->getOperand(1);
    3449        3026 :   EVT VT = N->getValueType(0);
    3450             :   SDLoc DL(N);
    3451             : 
    3452        1513 :   if (VT.isVector()) {
    3453             :     // fold (mulhs x, 0) -> 0
    3454         808 :     if (ISD::isBuildVectorAllZeros(N1.getNode()))
    3455           0 :       return N1;
    3456         808 :     if (ISD::isBuildVectorAllZeros(N0.getNode()))
    3457           2 :       return N0;
    3458             :   }
    3459             : 
    3460             :   // fold (mulhs x, 0) -> 0
    3461        1511 :   if (isNullConstant(N1))
    3462           0 :     return N1;
    3463             :   // fold (mulhs x, 1) -> (sra x, size(x)-1)
    3464        1511 :   if (isOneConstant(N1))
    3465           0 :     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
    3466           0 :                        DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
    3467           0 :                                        getShiftAmountTy(N0.getValueType())));
    3468             : 
    3469             :   // fold (mulhs x, undef) -> 0
    3470        3022 :   if (N0.isUndef() || N1.isUndef())
    3471           0 :     return DAG.getConstant(0, DL, VT);
    3472             : 
    3473             :   // If the type twice as wide is legal, transform the mulhs to a wider multiply
    3474             :   // plus a shift.
    3475        1511 :   if (VT.isSimple() && !VT.isVector()) {
    3476         705 :     MVT Simple = VT.getSimpleVT();
    3477         705 :     unsigned SimpleSize = Simple.getSizeInBits();
    3478         705 :     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
    3479         705 :     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
    3480         138 :       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
    3481         138 :       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
    3482         138 :       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
    3483          69 :       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
    3484             :             DAG.getConstant(SimpleSize, DL,
    3485          69 :                             getShiftAmountTy(N1.getValueType())));
    3486         138 :       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
    3487             :     }
    3488             :   }
    3489             : 
    3490        1442 :   return SDValue();
    3491             : }
    3492             : 
    3493        6357 : SDValue DAGCombiner::visitMULHU(SDNode *N) {
    3494        6357 :   SDValue N0 = N->getOperand(0);
    3495        6357 :   SDValue N1 = N->getOperand(1);
    3496       12714 :   EVT VT = N->getValueType(0);
    3497             :   SDLoc DL(N);
    3498             : 
    3499        6357 :   if (VT.isVector()) {
    3500             :     // fold (mulhu x, 0) -> 0
    3501        1177 :     if (ISD::isBuildVectorAllZeros(N1.getNode()))
    3502           0 :       return N1;
    3503        1177 :     if (ISD::isBuildVectorAllZeros(N0.getNode()))
    3504           0 :       return N0;
    3505             :   }
    3506             : 
    3507             :   // fold (mulhu x, 0) -> 0
    3508        6357 :   if (isNullConstant(N1))
    3509           0 :     return N1;
    3510             :   // fold (mulhu x, 1) -> 0
    3511        6357 :   if (isOneConstant(N1))
    3512           0 :     return DAG.getConstant(0, DL, N0.getValueType());
    3513             :   // fold (mulhu x, undef) -> 0
    3514        6357 :   if (N0.isUndef() || N1.isUndef())
    3515           0 :     return DAG.getConstant(0, DL, VT);
    3516             : 
    3517             :   // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
    3518        8279 :   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
    3519        6495 :       DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
    3520             :     SDLoc DL(N);
    3521             :     unsigned NumEltBits = VT.getScalarSizeInBits();
    3522          79 :     SDValue LogBase2 = BuildLogBase2(N1, DL);
    3523          79 :     SDValue SRLAmt = DAG.getNode(
    3524          79 :         ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
    3525          79 :     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
    3526          79 :     SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
    3527         158 :     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
    3528             :   }
    3529             : 
    3530             :   // If the type twice as wide is legal, transform the mulhu to a wider multiply
    3531             :   // plus a shift.
    3532        6278 :   if (VT.isSimple() && !VT.isVector()) {
    3533        5180 :     MVT Simple = VT.getSimpleVT();
    3534        5180 :     unsigned SimpleSize = Simple.getSizeInBits();
    3535        5180 :     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
    3536        5180 :     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
    3537         484 :       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
    3538         484 :       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
    3539         484 :       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
    3540         242 :       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
    3541             :             DAG.getConstant(SimpleSize, DL,
    3542         242 :                             getShiftAmountTy(N1.getValueType())));
    3543         484 :       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
    3544             :     }
    3545             :   }
    3546             : 
    3547        6036 :   return SDValue();
    3548             : }
    3549             : 
    3550             : /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
    3551             : /// give the opcodes for the two computations that are being performed. Return
    3552             : /// true if a simplification was made.
    3553        6336 : SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
    3554             :                                                 unsigned HiOp) {
    3555             :   // If the high half is not needed, just compute the low half.
    3556        6336 :   bool HiExists = N->hasAnyUseOfValue(1);
    3557        6336 :   if (!HiExists &&
    3558          36 :       (!LegalOperations ||
    3559          20 :        TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
    3560          48 :     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
    3561             :     return CombineTo(N, Res, Res);
    3562             :   }
    3563             : 
    3564             :   // If the low half is not needed, just compute the high half.
    3565        6320 :   bool LoExists = N->hasAnyUseOfValue(0);
    3566        6320 :   if (!LoExists &&
    3567        1411 :       (!LegalOperations ||
    3568         716 :        TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
    3569        2276 :     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
    3570             :     return CombineTo(N, Res, Res);
    3571             :   }
    3572             : 
    3573             :   // If both halves are used, return as it is.
    3574        5625 :   if (LoExists && HiExists)
    3575        4899 :     return SDValue();
    3576             : 
    3577             :   // If the two computed results can be simplified separately, separate them.
    3578         726 :   if (LoExists) {
    3579          20 :     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
    3580          10 :     AddToWorklist(Lo.getNode());
    3581          10 :     SDValue LoOpt = combine(Lo.getNode());
    3582          10 :     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
    3583           0 :         (!LegalOperations ||
    3584           0 :          TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
    3585           0 :       return CombineTo(N, LoOpt, LoOpt);
    3586             :   }
    3587             : 
    3588         726 :   if (HiExists) {
    3589        1580 :     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
    3590         716 :     AddToWorklist(Hi.getNode());
    3591         716 :     SDValue HiOpt = combine(Hi.getNode());
    3592         716 :     if (HiOpt.getNode() && HiOpt != Hi &&
    3593           0 :         (!LegalOperations ||
    3594           0 :          TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
    3595           0 :       return CombineTo(N, HiOpt, HiOpt);
    3596             :   }
    3597             : 
    3598         726 :   return SDValue();
    3599             : }
    3600             : 
    3601         723 : SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
    3602         723 :   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
    3603         179 :     return Res;
    3604             : 
    3605        1088 :   EVT VT = N->getValueType(0);
    3606             :   SDLoc DL(N);
    3607             : 
    3608             :   // If the type is twice as wide is legal, transform the mulhu to a wider
    3609             :   // multiply plus a shift.
    3610         544 :   if (VT.isSimple() && !VT.isVector()) {
    3611         544 :     MVT Simple = VT.getSimpleVT();
    3612         544 :     unsigned SimpleSize = Simple.getSizeInBits();
    3613         544 :     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
    3614         544 :     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
    3615           0 :       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
    3616           0 :       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
    3617           0 :       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
    3618             :       // Compute the high part as N1.
    3619           0 :       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
    3620             :             DAG.getConstant(SimpleSize, DL,
    3621           0 :                             getShiftAmountTy(Lo.getValueType())));
    3622           0 :       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
    3623             :       // Compute the low part as N0.
    3624           0 :       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
    3625             :       return CombineTo(N, Lo, Hi);
    3626             :     }
    3627             :   }
    3628             : 
    3629         544 :   return SDValue();
    3630             : }
    3631             : 
    3632        5613 : SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
    3633        5613 :   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
    3634         532 :     return Res;
    3635             : 
    3636       10162 :   EVT VT = N->getValueType(0);
    3637             :   SDLoc DL(N);
    3638             : 
    3639             :   // If the type is twice as wide is legal, transform the mulhu to a wider
    3640             :   // multiply plus a shift.
    3641        5081 :   if (VT.isSimple() && !VT.isVector()) {
    3642        5081 :     MVT Simple = VT.getSimpleVT();
    3643        5081 :     unsigned SimpleSize = Simple.getSizeInBits();
    3644        5081 :     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
    3645        5081 :     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
    3646           0 :       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
    3647           0 :       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
    3648           0 :       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
    3649             :       // Compute the high part as N1.
    3650           0 :       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
    3651             :             DAG.getConstant(SimpleSize, DL,
    3652           0 :                             getShiftAmountTy(Lo.getValueType())));
    3653           0 :       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
    3654             :       // Compute the low part as N0.
    3655           0 :       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
    3656             :       return CombineTo(N, Lo, Hi);
    3657             :     }
    3658             :   }
    3659             : 
    3660        5081 :   return SDValue();
    3661             : }
    3662             : 
    3663           0 : SDValue DAGCombiner::visitSMULO(SDNode *N) {
    3664             :   // (smulo x, 2) -> (saddo x, x)
    3665           0 :   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
    3666           0 :     if (C2->getAPIntValue() == 2)
    3667           0 :       return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
    3668           0 :                          N->getOperand(0), N->getOperand(0));
    3669             : 
    3670           0 :   return SDValue();
    3671             : }
    3672             : 
    3673           0 : SDValue DAGCombiner::visitUMULO(SDNode *N) {
    3674             :   // (umulo x, 2) -> (uaddo x, x)
    3675           0 :   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
    3676           0 :     if (C2->getAPIntValue() == 2)
    3677           0 :       return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
    3678           0 :                          N->getOperand(0), N->getOperand(0));
    3679             : 
    3680           0 :   return SDValue();
    3681             : }
    3682             : 
    3683       25437 : SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
    3684       25437 :   SDValue N0 = N->getOperand(0);
    3685       25437 :   SDValue N1 = N->getOperand(1);
    3686       25437 :   EVT VT = N0.getValueType();
    3687             : 
    3688             :   // fold vector ops
    3689       25437 :   if (VT.isVector())
    3690       20293 :     if (SDValue FoldedVOp = SimplifyVBinOp(N))
    3691         392 :       return FoldedVOp;
    3692             : 
    3693             :   // fold operation with constant operands.
    3694             :   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
    3695             :   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
    3696       25045 :   if (N0C && N1C)
    3697           0 :     return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
    3698             : 
    3699             :   // canonicalize constant to RHS
    3700       25055 :   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
    3701          10 :      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
    3702          20 :     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
    3703             : 
    3704             :   // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
    3705             :   // Only do this if the current op isn't legal and the flipped is.
    3706       25035 :   unsigned Opcode = N->getOpcode();
    3707       25035 :   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
    3708        5455 :   if (!TLI.isOperationLegal(Opcode, VT) &&
    3709        5455 :       (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
    3710          52 :       (N1.isUndef() || DAG.SignBitIsZero(N1))) {
    3711             :     unsigned AltOpcode;
    3712             :     switch (Opcode) {
    3713             :     case ISD::SMIN: AltOpcode = ISD::UMIN; break;
    3714             :     case ISD::SMAX: AltOpcode = ISD::UMAX; break;
    3715             :     case ISD::UMIN: AltOpcode = ISD::SMIN; break;
    3716             :     case ISD::UMAX: AltOpcode = ISD::SMAX; break;
    3717           0 :     default: llvm_unreachable("Unknown MINMAX opcode");
    3718             :     }
    3719             :     if (TLI.isOperationLegal(AltOpcode, VT))
    3720          10 :       return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
    3721             :   }
    3722             : 
    3723       25030 :   return SDValue();
    3724             : }
    3725             : 
    3726             : /// If this is a binary operator with two operands of the same opcode, try to
    3727             : /// simplify it.
    3728       66791 : SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
    3729       66791 :   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
    3730       66791 :   EVT VT = N0.getValueType();
    3731             :   assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
    3732             : 
    3733             :   // Bail early if none of these transforms apply.
    3734       66791 :   if (N0.getNumOperands() == 0) return SDValue();
    3735             : 
    3736             :   // For each of OP in AND/OR/XOR:
    3737             :   // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
    3738             :   // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
    3739             :   // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
    3740             :   // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y))
    3741             :   // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
    3742             :   //
    3743             :   // do not sink logical op inside of a vector extend, since it may combine
    3744             :   // into a vsetcc.
    3745      133546 :   EVT Op0VT = N0.getOperand(0).getValueType();
    3746       66611 :   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
    3747       66530 :        N0.getOpcode() == ISD::SIGN_EXTEND ||
    3748       66530 :        N0.getOpcode() == ISD::BSWAP ||
    3749             :        // Avoid infinite looping with PromoteIntBinOp.
    3750        1255 :        (N0.getOpcode() == ISD::ANY_EXTEND &&
    3751       66530 :         (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
    3752        1093 :        (N0.getOpcode() == ISD::TRUNCATE &&
    3753        1374 :         (!TLI.isZExtFree(VT, Op0VT) ||
    3754         281 :          !TLI.isTruncateFree(Op0VT, VT)) &&
    3755        1859 :         TLI.isTypeLegal(Op0VT))) &&
    3756           9 :       !VT.isVector() &&
    3757       67629 :       Op0VT == N1.getOperand(0).getValueType() &&
    3758         834 :       (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
    3759         834 :     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
    3760             :                                  N0.getOperand(0).getValueType(),
    3761         852 :                                  N0.getOperand(0), N1.getOperand(0));
    3762         834 :     AddToWorklist(ORNode.getNode());
    3763        1681 :     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
    3764             :   }
    3765             : 
    3766             :   // For each of OP in SHL/SRL/SRA/AND...
    3767             :   //   fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
    3768             :   //   fold (or  (OP x, z), (OP y, z)) -> (OP (or  x, y), z)
    3769             :   //   fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
    3770       65690 :   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
    3771      131344 :        N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
    3772        8717 :       N0.getOperand(1) == N1.getOperand(1)) {
    3773          99 :     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
    3774             :                                  N0.getOperand(0).getValueType(),
    3775         103 :                                  N0.getOperand(0), N1.getOperand(0));
    3776          99 :     AddToWorklist(ORNode.getNode());
    3777          99 :     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
    3778         198 :                        ORNode, N0.getOperand(1));
    3779             :   }
    3780             : 
    3781             :   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
    3782             :   // Only perform this optimization up until type legalization, before
    3783             :   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
    3784             :   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
    3785             :   // we don't want to undo this promotion.
    3786             :   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
    3787             :   // on scalars.
    3788       37630 :   if ((N0.getOpcode() == ISD::BITCAST ||
    3789       65840 :        N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
    3790       28215 :        Level <= AfterLegalizeTypes) {
    3791         697 :     SDValue In0 = N0.getOperand(0);
    3792         697 :     SDValue In1 = N1.getOperand(0);
    3793         697 :     EVT In0Ty = In0.getValueType();
    3794         697 :     EVT In1Ty = In1.getValueType();
    3795             :     SDLoc DL(N);
    3796             :     // If both incoming values are integers, and the original types are the
    3797             :     // same.
    3798         727 :     if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
    3799         354 :       SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
    3800         236 :       SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
    3801         118 :       AddToWorklist(Op.getNode());
    3802         118 :       return BC;
    3803             :     }
    3804             :   }
    3805             : 
    3806             :   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
    3807             :   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
    3808             :   // If both shuffles use the same mask, and both shuffle within a single
    3809             :   // vector, then it is worthwhile to move the swizzle after the operation.
    3810             :   // The type-legalizer generates this pattern when loading illegal
    3811             :   // vector types from memory. In many cases this allows additional shuffle
    3812             :   // optimizations.
    3813             :   // There are other cases where moving the shuffle after the xor/and/or
    3814             :   // is profitable even if shuffles don't perform a swizzle.
    3815             :   // If both shuffles use the same mask, and both shuffles have the same first
    3816             :   // or second operand, then it might still be profitable to move the shuffle
    3817             :   // after the xor/and/or operation.
    3818       65722 :   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
    3819             :     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
    3820             :     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
    3821             : 
    3822             :     assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
    3823             :            "Inputs to shuffles are not the same type");
    3824             : 
    3825             :     // Check that both shuffles use the same mask. The masks are known to be of
    3826             :     // the same length because the result vector type is the same.
    3827             :     // Check also that shuffles have only one use to avoid introducing extra
    3828             :     // instructions.
    3829        1065 :     if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
    3830        1124 :         SVN0->getMask().equals(SVN1->getMask())) {
    3831         166 :       SDValue ShOp = N0->getOperand(1);
    3832             : 
    3833             :       // Don't try to fold this node if it requires introducing a
    3834             :       // build vector of all zeros that might be illegal at this stage.
    3835         166 :       if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
    3836          26 :         if (!LegalTypes)
    3837          24 :           ShOp = DAG.getConstant(0, SDLoc(N), VT);
    3838             :         else
    3839           2 :           ShOp = SDValue();
    3840             :       }
    3841             : 
    3842             :       // (AND (shuf (A, C), shuf (B, C))) -> shuf (AND (A, B), C)
    3843             :       // (OR  (shuf (A, C), shuf (B, C))) -> shuf (OR  (A, B), C)
    3844             :       // (XOR (shuf (A, C), shuf (B, C))) -> shuf (XOR (A, B), V_0)
    3845         240 :       if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
    3846          74 :         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
    3847         148 :                                       N0->getOperand(0), N1->getOperand(0));
    3848          74 :         AddToWorklist(NewNode.getNode());
    3849          74 :         return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
    3850         148 :                                     SVN0->getMask());
    3851             :       }
    3852             : 
    3853             :       // Don't try to fold this node if it requires introducing a
    3854             :       // build vector of all zeros that might be illegal at this stage.
    3855          92 :       ShOp = N0->getOperand(0);
    3856          92 :       if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
    3857          14 :         if (!LegalTypes)
    3858          12 :           ShOp = DAG.getConstant(0, SDLoc(N), VT);
    3859             :         else
    3860           2 :           ShOp = SDValue();
    3861             :       }
    3862             : 
    3863             :       // (AND (shuf (C, A), shuf (C, B))) -> shuf (C, AND (A, B))
    3864             :       // (OR  (shuf (C, A), shuf (C, B))) -> shuf (C, OR  (A, B))
    3865             :       // (XOR (shuf (C, A), shuf (C, B))) -> shuf (V_0, XOR (A, B))
    3866         128 :       if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
    3867          36 :         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
    3868          72 :                                       N0->getOperand(1), N1->getOperand(1));
    3869          36 :         AddToWorklist(NewNode.getNode());
    3870          36 :         return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
    3871          72 :                                     SVN0->getMask());
    3872             :       }
    3873             :     }
    3874             :   }
    3875             : 
    3876       65612 :   return SDValue();
    3877             : }
    3878             : 
    3879             : /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
    3880      521342 : SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
    3881             :                                        const SDLoc &DL) {
    3882      521342 :   SDValue LL, LR, RL, RR, N0CC, N1CC;
    3883      609702 :   if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
    3884       88360 :       !isSetCCEquivalent(N1, RL, RR, N1CC))
    3885      518381 :     return SDValue();
    3886             : 
    3887             :   assert(N0.getValueType() == N1.getValueType() &&
    3888             :          "Unexpected operand types for bitwise logic op");
    3889             :   assert(LL.getValueType() == LR.getValueType() &&
    3890             :          RL.getValueType() == RR.getValueType() &&
    3891             :          "Unexpected operand types for setcc");
    3892             : 
    3893             :   // If we're here post-legalization or the logic op type is not i1, the logic
    3894             :   // op type must match a setcc result type. Also, all folds require new
    3895             :   // operations on the left and right operands, so those types must match.
    3896        2961 :   EVT VT = N0.getValueType();
    3897        2961 :   EVT OpVT = LL.getValueType();
    3898        2961 :   if (LegalOperations || VT.getScalarType() != MVT::i1)
    3899        1500 :     if (VT != getSetCCResultType(OpVT))
    3900          24 :       return SDValue();
    3901        5882 :   if (OpVT != RL.getValueType())
    3902         109 :     return SDValue();
    3903             : 
    3904        2828 :   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
    3905        2828 :   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
    3906        2828 :   bool IsInteger = OpVT.isInteger();
    3907        3337 :   if (LR == RR && CC0 == CC1 && IsInteger) {
    3908         271 :     bool IsZero = isNullConstantOrNullSplatConstant(LR);
    3909         271 :     bool IsNeg1 = isAllOnesConstantOrAllOnesSplatConstant(LR);
    3910             : 
    3911             :     // All bits clear?
    3912         271 :     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
    3913             :     // All sign bits clear?
    3914         271 :     bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
    3915             :     // Any bits set?
    3916         271 :     bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
    3917             :     // Any sign bits set?
    3918         271 :     bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
    3919             : 
    3920             :     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
    3921             :     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
    3922             :     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
    3923             :     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
    3924         271 :     if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
    3925          72 :       SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
    3926          69 :       AddToWorklist(Or.getNode());
    3927          69 :       return DAG.getSetCC(DL, VT, Or, LR, CC1);
    3928             :     }
    3929             : 
    3930             :     // All bits set?
    3931         202 :     bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
    3932             :     // All sign bits set?
    3933         202 :     bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
    3934             :     // Any bits clear?
    3935         202 :     bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
    3936             :     // Any sign bits clear?
    3937         202 :     bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
    3938             : 
    3939             :     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
    3940             :     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
    3941             :     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
    3942             :     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
    3943         202 :     if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
    3944          22 :       SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
    3945          22 :       AddToWorklist(And.getNode());
    3946          22 :       return DAG.getSetCC(DL, VT, And, LR, CC1);
    3947             :     }
    3948             :   }
    3949             : 
    3950             :   // TODO: What is the 'or' equivalent of this fold?
    3951             :   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
    3952        2115 :   if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
    3953        2824 :       IsInteger && CC0 == ISD::SETNE &&
    3954          70 :       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
    3955          38 :        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
    3956           3 :     SDValue One = DAG.getConstant(1, DL, OpVT);
    3957           3 :     SDValue Two = DAG.getConstant(2, DL, OpVT);
    3958           3 :     SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
    3959           3 :     AddToWorklist(Add.getNode());
    3960           3 :     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
    3961             :   }
    3962             : 
    3963             :   // Try more general transforms if the predicates match and the only user of
    3964             :   // the compares is the 'and' or 'or'.
    3965        2252 :   if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
    3966        2966 :       N0.hasOneUse() && N1.hasOneUse()) {
    3967             :     // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
    3968             :     // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
    3969         232 :     if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
    3970          81 :       SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
    3971          81 :       SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
    3972         158 :       SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
    3973          79 :       SDValue Zero = DAG.getConstant(0, DL, OpVT);
    3974          79 :       return DAG.getSetCC(DL, VT, Or, Zero, CC1);
    3975             :     }
    3976             :   }
    3977             : 
    3978             :   // Canonicalize equivalent operands to LL == RL.
    3979        2695 :   if (LL == RR && LR == RL) {
    3980           0 :     CC1 = ISD::getSetCCSwappedOperands(CC1);
    3981             :     std::swap(RL, RR);
    3982             :   }
    3983             : 
    3984             :   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
    3985             :   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
    3986        3292 :   if (LL == RL && LR == RR) {
    3987          86 :     ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
    3988          43 :                                 : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
    3989          86 :     if (NewCC != ISD::SETCC_INVALID &&
    3990          71 :         (!LegalOperations ||
    3991          39 :          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
    3992             :           TLI.isOperationLegal(ISD::SETCC, OpVT))))
    3993          32 :       return DAG.getSetCC(DL, VT, LL, LR, NewCC);
    3994             :   }
    3995             : 
    3996        2623 :   return SDValue();
    3997             : }
    3998             : 
    3999             : /// This contains all DAGCombine rules which reduce two values combined by
    4000             : /// an And operation to a single value. This makes them reusable in the context
    4001             : /// of visitSELECT(). Rules involving constants are not included as
    4002             : /// visitSELECT() already handles those cases.
    4003      377699 : SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
    4004      755398 :   EVT VT = N1.getValueType();
    4005             :   SDLoc DL(N);
    4006             : 
    4007             :   // fold (and x, undef) -> 0
    4008      377699 :   if (N0.isUndef() || N1.isUndef())
    4009           2 :     return DAG.getConstant(0, DL, VT);
    4010             : 
    4011      377697 :   if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
    4012          93 :     return V;
    4013             : 
    4014      377604 :   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
    4015         280 :       VT.getSizeInBits() <= 64) {
    4016             :     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
    4017             :       if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
    4018             :         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
    4019             :         // immediate for an add, but it is legal if its top c2 bits are set,
    4020             :         // transform the ADD so the immediate doesn't need to be materialized
    4021             :         // in a register.
    4022          62 :         APInt ADDC = ADDI->getAPIntValue();
    4023          62 :         APInt SRLC = SRLI->getAPIntValue();
    4024         124 :         if (ADDC.getMinSignedBits() <= 64 &&
    4025         123 :             SRLC.ult(VT.getSizeInBits()) &&
    4026         122 :             !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
    4027             :           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
    4028          13 :                                              SRLC.getZExtValue());
    4029          26 :           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
    4030             :             ADDC |= Mask;
    4031          26 :             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
    4032             :               SDLoc DL0(N0);
    4033             :               SDValue NewAdd =
    4034          13 :                 DAG.getNode(ISD::ADD, DL0, VT,
    4035          13 :                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
    4036          13 :               CombineTo(N0.getNode(), NewAdd);
    4037             :               // Return N so it doesn't get rechecked!
    4038          13 :               return SDValue(N, 0);
    4039             :             }
    4040             :           }
    4041             :         }
    4042             :       }
    4043             :     }
    4044             :   }
    4045             : 
    4046             :   // Reduce bit extract of low half of an integer to the narrower type.
    4047             :   // (and (srl i64:x, K), KMask) ->
    4048             :   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
    4049      405727 :   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
    4050             :     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
    4051             :       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
    4052       19572 :         unsigned Size = VT.getSizeInBits();
    4053       19572 :         const APInt &AndMask = CAnd->getAPIntValue();
    4054       19572 :         unsigned ShiftBits = CShift->getZExtValue();
    4055             : 
    4056             :         // Bail out, this node will probably disappear anyway.
    4057       19572 :         if (ShiftBits == 0)
    4058           2 :           return SDValue();
    4059             : 
    4060             :         unsigned MaskBits = AndMask.countTrailingOnes();
    4061       19570 :         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
    4062             : 
    4063       36135 :         if (AndMask.isMask() &&
    4064             :             // Required bits must not span the two halves of the integer and
    4065             :             // must fit in the half size type.
    4066       25745 :             (ShiftBits + MaskBits <= Size / 2) &&
    4067        9303 :             TLI.isNarrowingProfitable(VT, HalfVT) &&
    4068         246 :             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
    4069         246 :             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
    4070       19816 :             TLI.isTruncateFree(VT, HalfVT) &&
    4071         123 :             TLI.isZExtFree(HalfVT, VT)) {
    4072             :           // The isNarrowingProfitable is to avoid regressions on PPC and
    4073             :           // AArch64 which match a few 64-bit bit insert / bit extract patterns
    4074             :           // on downstream users of this. Those patterns could probably be
    4075             :           // extended to handle extensions mixed in.
    4076             : 
    4077             :           SDValue SL(N0);
    4078             :           assert(MaskBits <= Size);
    4079             : 
    4080             :           // Extracting the highest bit of the low half.
    4081         116 :           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
    4082         116 :           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
    4083         116 :                                       N0.getOperand(0));
    4084             : 
    4085         278 :           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
    4086         162 :           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
    4087         162 :           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
    4088         162 :           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
    4089         278 :           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
    4090             :         }
    4091             :       }
    4092             :     }
    4093             :   }
    4094             : 
    4095      377473 :   return SDValue();
    4096             : }
    4097             : 
    4098           0 : bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
    4099             :                                    EVT LoadResultTy, EVT &ExtVT) {
    4100           0 :   if (!AndC->getAPIntValue().isMask())
    4101           0 :     return false;
    4102             : 
    4103             :   unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
    4104             : 
    4105           0 :   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
    4106           0 :   EVT LoadedVT = LoadN->getMemoryVT();
    4107             : 
    4108           0 :   if (ExtVT == LoadedVT &&
    4109           0 :       (!LegalOperations ||
    4110           0 :        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
    4111             :     // ZEXTLOAD will match without needing to change the size of the value being
    4112             :     // loaded.
    4113           0 :     return true;
    4114             :   }
    4115             : 
    4116             :   // Do not change the width of a volatile load.
    4117           0 :   if (LoadN->isVolatile())
    4118           0 :     return false;
    4119             : 
    4120             :   // Do not generate loads of non-round integer types since these can
    4121             :   // be expensive (and would be wrong if the type is not byte sized).
    4122           0 :   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
    4123           0 :     return false;
    4124             : 
    4125           0 :   if (LegalOperations &&
    4126           0 :       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
    4127           0 :     return false;
    4128             : 
    4129           0 :   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
    4130           0 :     return false;
    4131             : 
    4132             :   return true;
    4133             : }
    4134             : 
    4135      149739 : bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
    4136             :                                     ISD::LoadExtType ExtType, EVT &MemVT,
    4137             :                                     unsigned ShAmt) {
    4138      149739 :   if (!LDST)
    4139             :     return false;
    4140             :   // Only allow byte offsets.
    4141      149739 :   if (ShAmt % 8)
    4142             :     return false;
    4143             : 
    4144             :   // Do not generate loads of non-round integer types since these can
    4145             :   // be expensive (and would be wrong if the type is not byte sized).
    4146             :   if (!MemVT.isRound())
    4147             :     return false;
    4148             : 
    4149             :   // Don't change the width of a volatile load.
    4150       54685 :   if (LDST->isVolatile())
    4151             :     return false;
    4152             : 
    4153             :   // Verify that we are actually reducing a load width here.
    4154       53804 :   if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
    4155             :     return false;
    4156             : 
    4157             :   // Ensure that this isn't going to produce an unsupported unaligned access.
    4158       65082 :   if (ShAmt &&
    4159       11741 :       !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
    4160             :                               LDST->getAddressSpace(), ShAmt / 8))
    4161             :     return false;
    4162             : 
    4163             :   // It's not possible to generate a constant of extended or untyped type.
    4164       53335 :   EVT PtrType = LDST->getBasePtr().getValueType();
    4165       53335 :   if (PtrType == MVT::Untyped || PtrType.isExtended())
    4166             :     return false;
    4167             : 
    4168       53335 :   if (isa<LoadSDNode>(LDST)) {
    4169             :     LoadSDNode *Load = cast<LoadSDNode>(LDST);
    4170             :     // Don't transform one with multiple uses, this would require adding a new
    4171             :     // load.
    4172       53335 :     if (!SDValue(Load, 0).hasOneUse())
    4173             :       return false;
    4174             : 
    4175       22761 :     if (LegalOperations &&
    4176       18448 :         !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
    4177             :       return false;
    4178             : 
    4179             :     // For the transform to be legal, the load must produce only two values
    4180             :     // (the value loaded and the chain).  Don't transform a pre-increment
    4181             :     // load, for example, which produces an extra value.  Otherwise the
    4182             :     // transformation is not equivalent, and the downstream logic to replace
    4183             :     // uses gets things wrong.
    4184        5154 :     if (Load->getNumValues() > 2)
    4185             :       return false;
    4186             : 
    4187             :     // If the load that we're shrinking is an extload and we're not just
    4188             :     // discarding the extension we can't simply shrink the load. Bail.
    4189             :     // TODO: It would be possible to merge the extensions in some cases.
    4190        5154 :     if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
    4191         848 :         Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
    4192           1 :       return false;
    4193             : 
    4194        5153 :     if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
    4195        1874 :       return false;
    4196             :   } else {
    4197             :     assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
    4198             :     StoreSDNode *Store = cast<StoreSDNode>(LDST);
    4199             :     // Can't write outside the original store
    4200           0 :     if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
    4201             :       return false;
    4202             : 
    4203           0 :     if (LegalOperations &&
    4204           0 :         !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
    4205           0 :       return false;
    4206             :   }
    4207             :   return true;
    4208             : }
    4209             : 
    4210      200374 : bool DAGCombiner::SearchForAndLoads(SDNode *N,
    4211             :                                     SmallPtrSetImpl<LoadSDNode*> &Loads,
    4212             :                                     SmallPtrSetImpl<SDNode*> &NodesWithConsts,
    4213             :                                     ConstantSDNode *Mask,
    4214             :                                     SDNode *&NodeToMask) {
    4215             :   // Recursively search for the operands, looking for loads which can be
    4216             :   // narrowed.
    4217      553516 :   for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
    4218      754452 :     SDValue Op = N->getOperand(i);
    4219             : 
    4220      754452 :     if (Op.getValueType().isVector())
    4221             :       return false;
    4222             : 
    4223             :     // Some constants may need fixing up later if they are too large.
    4224             :     if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
    4225      353170 :       if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
    4226      179238 :           (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
    4227          89 :         NodesWithConsts.insert(N);
    4228      176206 :       continue;
    4229             :     }
    4230             : 
    4231      201020 :     if (!Op.hasOneUse())
    4232             :       return false;
    4233             : 
    4234      182599 :     switch(Op.getOpcode()) {
    4235             :     case ISD::LOAD: {
    4236             :       auto *Load = cast<LoadSDNode>(Op);
    4237        1278 :       EVT ExtVT;
    4238        2726 :       if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
    4239         170 :           isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
    4240             : 
    4241             :         // ZEXTLOAD is already small enough.
    4242         159 :         if (Load->getExtensionType() == ISD::ZEXTLOAD &&
    4243          13 :             ExtVT.bitsGE(Load->getMemoryVT()))
    4244         159 :           continue;
    4245             : 
    4246             :         // Use LE to convert equal sized loads to zext.
    4247         150 :         if (ExtVT.bitsLE(Load->getMemoryVT()))
    4248         150 :           Loads.insert(Load);
    4249             : 
    4250         150 :         continue;
    4251             :       }
    4252        1119 :       return false;
    4253             :     }
    4254        1730 :     case ISD::ZERO_EXTEND:
    4255             :     case ISD::AssertZext: {
    4256        1730 :       unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
    4257        1730 :       EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
    4258             :       EVT VT = Op.getOpcode() == ISD::AssertZext ?
    4259        1168 :         cast<VTSDNode>(Op.getOperand(1))->getVT() :
    4260        1730 :         Op.getOperand(0).getValueType();
    4261             : 
    4262             :       // We can accept extending nodes if the mask is wider or an equal
    4263             :       // width to the original type.
    4264        1730 :       if (ExtVT.bitsGE(VT))
    4265        1162 :         continue;
    4266         568 :       break;
    4267             :     }
    4268        4418 :     case ISD::OR:
    4269             :     case ISD::XOR:
    4270             :     case ISD::AND:
    4271        4418 :       if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
    4272             :                              NodeToMask))
    4273             :         return false;
    4274             :       continue;
    4275             :     }
    4276             : 
    4277             :     // Allow one node which will masked along with any loads found.
    4278      175741 :     if (NodeToMask)
    4279             :       return false;
    4280             : 
    4281             :     // Also ensure that the node to be masked only produces one data result.
    4282      175345 :     NodeToMask = Op.getNode();
    4283      350690 :     if (NodeToMask->getNumValues() > 1) {
    4284             :       bool HasValue = false;
    4285       20927 :       for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
    4286             :         MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
    4287       14747 :         if (VT != MVT::Glue && VT != MVT::Other) {
    4288        7734 :           if (HasValue) {
    4289         777 :             NodeToMask = nullptr;
    4290             :             return false;
    4291             :           }
    4292             :           HasValue = true;
    4293             :         }
    4294             :       }
    4295             :       assert(HasValue && "Node to be masked has no data result?");
    4296             :     }
    4297             :   }
    4298             :   return true;
    4299             : }
    4300             : 
    4301      308037 : bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
    4302      308037 :   auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
    4303             :   if (!Mask)
    4304             :     return false;
    4305             : 
    4306      502786 :   if (!Mask->getAPIntValue().isMask())
    4307             :     return false;
    4308             : 
    4309             :   // No need to do anything if the and directly uses a load.
    4310      228221 :   if (isa<LoadSDNode>(N->getOperand(0)))
    4311             :     return false;
    4312             : 
    4313             :   SmallPtrSet<LoadSDNode*, 8> Loads;
    4314             :   SmallPtrSet<SDNode*, 2> NodesWithConsts;
    4315      195956 :   SDNode *FixupNode = nullptr;
    4316      195956 :   if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
    4317      175243 :     if (Loads.size() == 0)
    4318             :       return false;
    4319             : 
    4320             :     LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
    4321          88 :     SDValue MaskOp = N->getOperand(1);
    4322             : 
    4323             :     // If it exists, fixup the single node we allow in the tree that needs
    4324             :     // masking.
    4325          88 :     if (FixupNode) {
    4326             :       LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
    4327          26 :       SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
    4328             :                                 FixupNode->getValueType(0),
    4329          26 :                                 SDValue(FixupNode, 0), MaskOp);
    4330          52 :       DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
    4331          26 :       if (And.getOpcode() == ISD ::AND)
    4332          50 :         DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
    4333             :     }
    4334             : 
    4335             :     // Narrow any constants that need it.
    4336         102 :     for (auto *LogicN : NodesWithConsts) {
    4337          14 :       SDValue Op0 = LogicN->getOperand(0);
    4338          14 :       SDValue Op1 = LogicN->getOperand(1);
    4339             : 
    4340             :       if (isa<ConstantSDNode>(Op0))
    4341             :           std::swap(Op0, Op1);
    4342             : 
    4343          14 :       SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
    4344          14 :                                 Op1, MaskOp);
    4345             : 
    4346          14 :       DAG.UpdateNodeOperands(LogicN, Op0, And);
    4347             :     }
    4348             : 
    4349             :     // Create narrow loads.
    4350         233 :     for (auto *Load : Loads) {
    4351             :       LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
    4352         145 :       SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
    4353         145 :                                 SDValue(Load, 0), MaskOp);
    4354         145 :       DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
    4355         145 :       if (And.getOpcode() == ISD ::AND)
    4356         145 :         And = SDValue(
    4357             :             DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
    4358         145 :       SDValue NewLoad = ReduceLoadWidth(And.getNode());
    4359             :       assert(NewLoad &&
    4360             :              "Shouldn't be masking the load if it can't be narrowed");
    4361             :       CombineTo(Load, NewLoad, NewLoad.getValue(1));
    4362             :     }
    4363          88 :     DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
    4364          88 :     return true;
    4365             :   }
    4366             :   return false;
    4367             : }
    4368             : 
    4369             : // Unfold
    4370             : //    x &  (-1 'logical shift' y)
    4371             : // To
    4372             : //    (x 'opposite logical shift' y) 'logical shift' y
    4373             : // if it is better for performance.
    4374           0 : SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
    4375             :   assert(N->getOpcode() == ISD::AND);
    4376             : 
    4377           0 :   SDValue N0 = N->getOperand(0);
    4378           0 :   SDValue N1 = N->getOperand(1);
    4379             : 
    4380             :   // Do we actually prefer shifts over mask?
    4381           0 :   if (!TLI.preferShiftsToClearExtremeBits(N0))
    4382           0 :     return SDValue();
    4383             : 
    4384             :   // Try to match  (-1 '[outer] logical shift' y)
    4385             :   unsigned OuterShift;
    4386             :   unsigned InnerShift; // The opposite direction to the OuterShift.
    4387           0 :   SDValue Y;           // Shift amount.
    4388             :   auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
    4389             :     if (!M.hasOneUse())
    4390             :       return false;
    4391             :     OuterShift = M->getOpcode();
    4392             :     if (OuterShift == ISD::SHL)
    4393             :       InnerShift = ISD::SRL;
    4394             :     else if (OuterShift == ISD::SRL)
    4395             :       InnerShift = ISD::SHL;
    4396             :     else
    4397             :       return false;
    4398             :     if (!isAllOnesConstant(M->getOperand(0)))
    4399             :       return false;
    4400             :     Y = M->getOperand(1);
    4401             :     return true;
    4402           0 :   };
    4403             : 
    4404           0 :   SDValue X;
    4405           0 :   if (matchMask(N1))
    4406           0 :     X = N0;
    4407           0 :   else if (matchMask(N0))
    4408           0 :     X = N1;
    4409             :   else
    4410           0 :     return SDValue();
    4411             : 
    4412             :   SDLoc DL(N);
    4413           0 :   EVT VT = N->getValueType(0);
    4414             : 
    4415             :   //     tmp = x   'opposite logical shift' y
    4416           0 :   SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
    4417             :   //     ret = tmp 'logical shift' y
    4418           0 :   SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
    4419             : 
    4420           0 :   return T1;
    4421             : }
    4422             : 
    4423      400002 : SDValue DAGCombiner::visitAND(SDNode *N) {
    4424      400002 :   SDValue N0 = N->getOperand(0);
    4425      400002 :   SDValue N1 = N->getOperand(1);
    4426      400002 :   EVT VT = N1.getValueType();
    4427             : 
    4428             :   // x & x --> x
    4429      400002 :   if (N0 == N1)
    4430          21 :     return N0;
    4431             : 
    4432             :   // fold vector ops
    4433      399981 :   if (VT.isVector()) {
    4434       54116 :     if (SDValue FoldedVOp = SimplifyVBinOp(N))
    4435          10 :       return FoldedVOp;
    4436             : 
    4437             :     // fold (and x, 0) -> 0, vector edition
    4438       54106 :     if (ISD::isBuildVectorAllZeros(N0.getNode()))
    4439             :       // do not return N0, because undef node may exist in N0
    4440         126 :       return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
    4441         189 :                              SDLoc(N), N0.getValueType());
    4442       54043 :     if (ISD::isBuildVectorAllZeros(N1.getNode()))
    4443             :       // do not return N1, because undef node may exist in N1
    4444           4 :       return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
    4445           6 :                              SDLoc(N), N1.getValueType());
    4446             : 
    4447             :     // fold (and x, -1) -> x, vector edition
    4448       54041 :     if (ISD::isBuildVectorAllOnes(N0.getNode()))
    4449          51 :       return N1;
    4450       53990 :     if (ISD::isBuildVectorAllOnes(N1.getNode()))
    4451           7 :       return N0;
    4452             :   }
    4453             : 
    4454             :   // fold (and c1, c2) -> c1&c2
    4455      399848 :   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
    4456      399848 :   ConstantSDNode *N1C = isConstOrConstSplat(N1);
    4457      399848 :   if (N0C && N1C && !N1C->isOpaque())
    4458          44 :     return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
    4459             :   // canonicalize constant to RHS
    4460      400440 :   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
    4461         614 :       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
    4462        1204 :     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
    4463             :   // fold (and x, -1) -> x
    4464      399224 :   if (isAllOnesConstant(N1))
    4465          11 :     return N0;
    4466             :   // if (and x, c) is known to be zero, return 0
    4467             :   unsigned BitWidth = VT.getScalarSizeInBits();
    4468      732794 :   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
    4469     1066316 :                                    APInt::getAllOnesValue(BitWidth)))
    4470         118 :     return DAG.getConstant(0, SDLoc(N), VT);
    4471             : 
    4472      399154 :   if (SDValue NewSel = foldBinOpIntoSelect(N))
    4473         190 :     return NewSel;
    4474             : 
    4475             :   // reassociate and
    4476      544076 :   if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
    4477        2044 :     return RAND;
    4478             : 
    4479             :   // Try to convert a constant mask AND into a shuffle clear mask.
    4480      396920 :   if (VT.isVector())
    4481       53297 :     if (SDValue Shuffle = XformToShuffleWithZero(N))
    4482        1089 :       return Shuffle;
    4483             : 
    4484             :   // fold (and (or x, C), D) -> D if (C & D) == D
    4485             :   auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
    4486             :     return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
    4487             :   };
    4488      802719 :   if (N0.getOpcode() == ISD::OR &&
    4489      415656 :       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
    4490        2289 :     return N1;
    4491             :   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
    4492      393542 :   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
    4493       68021 :     SDValue N0Op0 = N0.getOperand(0);
    4494       68021 :     APInt Mask = ~N1C->getAPIntValue();
    4495       68021 :     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
    4496       68021 :     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
    4497       13463 :       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
    4498       27189 :                                  N0.getValueType(), N0Op0);
    4499             : 
    4500             :       // Replace uses of the AND with uses of the Zero extend node.
    4501       13463 :       CombineTo(N, Zext);
    4502             : 
    4503             :       // We actually want to replace all uses of the any_extend with the
    4504             :       // zero_extend, to avoid duplicating things.  This will later cause this
    4505             :       // AND to be folded.
    4506       13463 :       CombineTo(N0.getNode(), Zext);
    4507       13463 :       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
    4508             :     }
    4509             :   }
    4510             :   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
    4511             :   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
    4512             :   // already be zero by virtue of the width of the base type of the load.
    4513             :   //
    4514             :   // the 'X' node here can either be nothing or an extract_vector_elt to catch
    4515             :   // more cases.
    4516      389425 :   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
    4517       17636 :        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
    4518       16580 :        N0.getOperand(0).getOpcode() == ISD::LOAD &&
    4519      389425 :        N0.getOperand(0).getResNo() == 0) ||
    4520      376341 :       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
    4521      160864 :     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
    4522             :                                          N0 : N0.getOperand(0) );
    4523             : 
    4524             :     // Get the constant (if applicable) the zero'th operand is being ANDed with.
    4525             :     // This can be a pure constant or a vector splat, in which case we treat the
    4526             :     // vector as a scalar and use the splat value.
    4527             :     APInt Constant = APInt::getNullValue(1);
    4528             :     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
    4529      148002 :       Constant = C->getAPIntValue();
    4530             :     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
    4531             :       APInt SplatValue, SplatUndef;
    4532             :       unsigned SplatBitSize;
    4533             :       bool HasAnyUndefs;
    4534        1107 :       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
    4535             :                                              SplatBitSize, HasAnyUndefs);
    4536        1107 :       if (IsSplat) {
    4537             :         // Undef bits can contribute to a possible optimisation if set, so
    4538             :         // set them.
    4539             :         SplatValue |= SplatUndef;
    4540             : 
    4541             :         // The splat value may be something like "0x00FFFFFF", which means 0 for
    4542             :         // the first vector value and FF for the rest, repeating. We need a mask
    4543             :         // that will apply equally to all members of the vector, so AND all the
    4544             :         // lanes of the constant together.
    4545        2210 :         EVT VT = Vector->getValueType(0);
    4546             :         unsigned BitWidth = VT.getScalarSizeInBits();
    4547             : 
    4548             :         // If the splat value has been compressed to a bitlength lower
    4549             :         // than the size of the vector lane, we need to re-expand it to
    4550             :         // the lane size.
    4551        1105 :         if (BitWidth > SplatBitSize)
    4552           2 :           for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
    4553           2 :                SplatBitSize < BitWidth;
    4554           1 :                SplatBitSize = SplatBitSize * 2)
    4555           2 :             SplatValue |= SplatValue.shl(SplatBitSize);
    4556             : 
    4557             :         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
    4558             :         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
    4559        1105 :         if (SplatBitSize % BitWidth == 0) {
    4560        1104 :           Constant = APInt::getAllOnesValue(BitWidth);
    4561        2253 :           for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
    4562        2306 :             Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
    4563             :         }
    4564             :       }
    4565             :     }
    4566             : 
    4567             :     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
    4568             :     // actually legal and isn't going to get expanded, else this is a false
    4569             :     // optimisation.
    4570       80432 :     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
    4571             :                                                     Load->getValueType(0),
    4572             :                                                     Load->getMemoryVT());
    4573             : 
    4574             :     // Resize the constant to the same size as the original memory access before
    4575             :     // extension. If it is still the AllOnesValue then this AND is completely
    4576             :     // unneeded.
    4577      160864 :     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
    4578             : 
    4579             :     bool B;
    4580       80432 :     switch (Load->getExtensionType()) {
    4581             :     default: B = false; break;
    4582             :     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
    4583             :     case ISD::ZEXTLOAD:
    4584             :     case ISD::NON_EXTLOAD: B = true; break;
    4585             :     }
    4586             : 
    4587       93092 :     if (B && Constant.isAllOnesValue()) {
    4588             :       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
    4589             :       // preserve semantics once we get rid of the AND.
    4590             :       SDValue NewLoad(Load, 0);
    4591             : 
    4592             :       // Fold the AND away. NewLoad may get replaced immediately.
    4593        4444 :       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
    4594             : 
    4595        2222 :       if (Load->getExtensionType() == ISD::EXTLOAD) {
    4596        1268 :         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
    4597        1268 :                               Load->getValueType(0), SDLoc(Load),
    4598             :                               Load->getChain(), Load->getBasePtr(),
    4599             :                               Load->getOffset(), Load->getMemoryVT(),
    4600        1276 :                               Load->getMemOperand());
    4601             :         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
    4602        1268 :         if (Load->getNumValues() == 3) {
    4603             :           // PRE/POST_INC loads have 3 values.
    4604           0 :           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
    4605           0 :                            NewLoad.getValue(2) };
    4606           0 :           CombineTo(Load, To, 3, true);
    4607             :         } else {
    4608        1268 :           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
    4609             :         }
    4610             :       }
    4611             : 
    4612        2222 :       return SDValue(N, 0); // Return N so it doesn't get rechecked!
    4613             :     }
    4614             :   }
    4615             : 
    4616             :   // fold (and (load x), 255) -> (zextload x, i8)
    4617             :   // fold (and (extload x, i16), 255) -> (zextload x, i8)
    4618             :   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
    4619      377857 :   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
    4620       54529 :                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
    4621       54529 :                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
    4622       80073 :     if (SDValue Res = ReduceLoadWidth(N)) {
    4623         138 :       LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
    4624         138 :         ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
    4625             : 
    4626         138 :       AddToWorklist(N);
    4627             :       CombineTo(LN0, Res, Res.getValue(1));
    4628         138 :       return SDValue(N, 0);
    4629             :     }
    4630             :   }
    4631             : 
    4632      377719 :   if (Level >= AfterLegalizeTypes) {
    4633             :     // Attempt to propagate the AND back up to the leaves which, if they're
    4634             :     // loads, can be combined to narrow loads and the AND node can be removed.
    4635             :     // Perform after legalization so that extend nodes will already be
    4636             :     // combined into the loads.
    4637      308037 :     if (BackwardsPropagateMask(N, DAG)) {
    4638          88 :       return SDValue(N, 0);
    4639             :     }
    4640             :   }
    4641             : 
    4642      377631 :   if (SDValue Combined = visitANDLike(N0, N1, N))
    4643         218 :     return Combined;
    4644             : 
    4645             :   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
    4646     1132239 :   if (N0.getOpcode() == N1.getOpcode())
    4647       27632 :     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
    4648         299 :       return Tmp;
    4649             : 
    4650             :   // Masking the negated extension of a boolean is just the zero-extended
    4651             :   // boolean:
    4652             :   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
    4653             :   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
    4654             :   //
    4655             :   // Note: the SimplifyDemandedBits fold below can make an information-losing
    4656             :   // transform, and then we have no way to find this better fold.
    4657      689439 :   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
    4658          28 :     if (isNullConstantOrNullSplatConstant(N0.getOperand(0))) {
    4659          12 :       SDValue SubRHS = N0.getOperand(1);
    4660           9 :       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
    4661           3 :           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
    4662           3 :         return SubRHS;
    4663           6 :       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
    4664           3 :           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
    4665           6 :         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
    4666             :     }
    4667             :   }
    4668             : 
    4669             :   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
    4670             :   // fold (and (sra)) -> (and (srl)) when possible.
    4671      377108 :   if (SimplifyDemandedBits(SDValue(N, 0)))
    4672      110614 :     return SDValue(N, 0);
    4673             : 
    4674             :   // fold (zext_inreg (extload x)) -> (zextload x)
    4675      266494 :   if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
    4676             :     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
    4677       14236 :     EVT MemVT = LN0->getMemoryVT();
    4678             :     // If we zero all the possible extended bits, then we can turn this into
    4679             :     // a zextload if we are running before legalize or the operation is legal.
    4680       14236 :     unsigned BitWidth = N1.getScalarValueSizeInBits();
    4681       28472 :     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
    4682       14236 :                            BitWidth - MemVT.getScalarSizeInBits())) &&
    4683       16727 :         ((!LegalOperations && !LN0->isVolatile()) ||
    4684        2561 :          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
    4685       24544 :       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
    4686             :                                        LN0->getChain(), LN0->getBasePtr(),
    4687       12309 :                                        MemVT, LN0->getMemOperand());
    4688       12272 :       AddToWorklist(N);
    4689       12272 :       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
    4690       12272 :       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
    4691             :     }
    4692             :   }
    4693             :   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
    4694      254568 :   if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
    4695         346 :       N0.hasOneUse()) {
    4696             :     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
    4697         301 :     EVT MemVT = LN0->getMemoryVT();
    4698             :     // If we zero all the possible extended bits, then we can turn this into
    4699             :     // a zextload if we are running before legalize or the operation is legal.
    4700         301 :     unsigned BitWidth = N1.getScalarValueSizeInBits();
    4701         602 :     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
    4702         301 :                            BitWidth - MemVT.getScalarSizeInBits())) &&
    4703         488 :         ((!LegalOperations && !LN0->isVolatile()) ||
    4704         226 :          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
    4705         492 :       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
    4706             :                                        LN0->getChain(), LN0->getBasePtr(),
    4707         246 :                                        MemVT, LN0->getMemOperand());
    4708         246 :       AddToWorklist(N);
    4709         246 :       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
    4710         246 :       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
    4711             :     }
    4712             :   }
    4713             :   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
    4714      253976 :   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
    4715         274 :     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
    4716         274 :                                            N0.getOperand(1), false))
    4717           1 :       return BSwap;
    4718             :   }
    4719             : 
    4720      253975 :   if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
    4721         636 :     return Shifts;
    4722             : 
    4723      253339 :   return SDValue();
    4724             : }
    4725             : 
    4726             : /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
    4727      143554 : SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
    4728             :                                         bool DemandHighBits) {
    4729      143554 :   if (!LegalOperations)
    4730       56652 :     return SDValue();
    4731             : 
    4732      173804 :   EVT VT = N->getValueType(0);
    4733             :   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
    4734       21392 :     return SDValue();
    4735       65510 :   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
    4736       14759 :     return SDValue();
    4737             : 
    4738             :   // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
    4739             :   bool LookPassAnd0 = false;
    4740             :   bool LookPassAnd1 = false;
    4741       50751 :   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
    4742             :       std::swap(N0, N1);
    4743       50751 :   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
    4744             :       std::swap(N0, N1);
    4745       50751 :   if (N0.getOpcode() == ISD::AND) {
    4746             :     if (!N0.getNode()->hasOneUse())
    4747         205 :       return SDValue();
    4748             :     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
    4749             :     // Also handle 0xffff since the LHS is guaranteed to have zeros there.
    4750             :     // This is needed for X86.
    4751       12808 :     if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
    4752             :                   N01C->getZExtValue() != 0xFFFF))
    4753        5599 :       return SDValue();
    4754         999 :     N0 = N0.getOperand(0);
    4755             :     LookPassAnd0 = true;
    4756             :   }
    4757             : 
    4758       44947 :   if (N1.getOpcode() == ISD::AND) {
    4759             :     if (!N1.getNode()->hasOneUse())
    4760          24 :       return SDValue();
    4761             :     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
    4762        2288 :     if (!N11C || N11C->getZExtValue() != 0xFF)
    4763        1052 :       return SDValue();
    4764         107 :     N1 = N1.getOperand(0);
    4765             :     LookPassAnd1 = true;
    4766             :   }
    4767             : 
    4768       43871 :   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
    4769             :     std::swap(N0, N1);
    4770       43871 :   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
    4771       42824 :     return SDValue();
    4772             :   if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
    4773          14 :     return SDValue();
    4774             : 
    4775             :   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
    4776             :   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
    4777        1033 :   if (!N01C || !N11C)
    4778         239 :     return SDValue();
    4779        1625 :   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
    4780         774 :     return SDValue();
    4781             : 
    4782             :   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
    4783          20 :   SDValue N00 = N0->getOperand(0);
    4784          20 :   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
    4785             :     if (!N00.getNode()->hasOneUse())
    4786           0 :       return SDValue();
    4787             :     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
    4788          12 :     if (!N001C || N001C->getZExtValue() != 0xFF)
    4789           2 :       return SDValue();
    4790           4 :     N00 = N00.getOperand(0);
    4791             :     LookPassAnd0 = true;
    4792             :   }
    4793             : 
    4794          18 :   SDValue N10 = N1->getOperand(0);
    4795          18 :   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
    4796             :     if (!N10.getNode()->hasOneUse())
    4797           0 :       return SDValue();
    4798             :     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
    4799             :     // Also allow 0xFFFF since the bits will be shifted out. This is needed
    4800             :     // for X86.
    4801          10 :     if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
    4802             :                    N101C->getZExtValue() != 0xFFFF))
    4803           0 :       return SDValue();
    4804           5 :     N10 = N10.getOperand(0);
    4805             :     LookPassAnd1 = true;
    4806             :   }
    4807             : 
    4808             :   if (N00 != N10)
    4809           0 :     return SDValue();
    4810             : 
    4811             :   // Make sure everything beyond the low halfword gets set to zero since the SRL
    4812             :   // 16 will clear the top bits.
    4813          18 :   unsigned OpSizeInBits = VT.getSizeInBits();
    4814          18 :   if (DemandHighBits && OpSizeInBits > 16) {
    4815             :     // If the left-shift isn't masked out then the only way this is a bswap is
    4816             :     // if all bits beyond the low 8 are 0. In that case the entire pattern
    4817             :     // reduces to a left shift anyway: leave it for other parts of the combiner.
    4818           9 :     if (!LookPassAnd0)
    4819           2 :       return SDValue();
    4820             : 
    4821             :     // However, if the right shift isn't masked out then it might be because
    4822             :     // it's not needed. See if we can spot that too.
    4823           9 :     if (!LookPassAnd1 &&
    4824           2 :         !DAG.MaskedValueIsZero(
    4825          11 :             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
    4826           0 :       return SDValue();
    4827             :   }
    4828             : 
    4829          16 :   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
    4830          16 :   if (OpSizeInBits > 16) {
    4831             :     SDLoc DL(N);
    4832          16 :     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
    4833          16 :                       DAG.getConstant(OpSizeInBits - 16, DL,
    4834          16 :                                       getShiftAmountTy(VT)));
    4835             :   }
    4836          16 :   return Res;
    4837             : }
    4838             : 
    4839             : /// Return true if the specified node is an element that makes up a 32-bit
    4840             : /// packed halfword byteswap.
    4841             : /// ((x & 0x000000ff) << 8) |
    4842             : /// ((x & 0x0000ff00) >> 8) |
    4843             : /// ((x & 0x00ff0000) << 8) |
    4844             : /// ((x & 0xff000000) >> 8)
    4845           0 : static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
    4846             :   if (!N.getNode()->hasOneUse())
    4847           0 :     return false;
    4848             : 
    4849             :   unsigned Opc = N.getOpcode();
    4850           0 :   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
    4851           0 :     return false;
    4852             : 
    4853           0 :   SDValue N0 = N.getOperand(0);
    4854             :   unsigned Opc0 = N0.getOpcode();
    4855           0 :   if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
    4856           0 :     return false;
    4857             : 
    4858             :   ConstantSDNode *N1C = nullptr;
    4859             :   // SHL or SRL: look upstream for AND mask operand
    4860           0 :   if (Opc == ISD::AND)
    4861             :     N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
    4862           0 :   else if (Opc0 == ISD::AND)
    4863             :     N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
    4864           0 :   if (!N1C)
    4865           0 :     return false;
    4866             : 
    4867             :   unsigned MaskByteOffset;
    4868           0 :   switch (N1C->getZExtValue()) {
    4869             :   default:
    4870             :     return false;
    4871             :   case 0xFF:       MaskByteOffset = 0; break;
    4872           0 :   case 0xFF00:     MaskByteOffset = 1; break;
    4873           0 :   case 0xFFFF:
    4874             :     // In case demanded bits didn't clear the bits that will be shifted out.
    4875             :     // This is needed for X86.
    4876           0 :     if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
    4877             :       MaskByteOffset = 1;
    4878             :       break;
    4879             :     }
    4880             :     return false;
    4881           0 :   case 0xFF0000:   MaskByteOffset = 2; break;
    4882           0 :   case 0xFF000000: MaskByteOffset = 3; break;
    4883             :   }
    4884             : 
    4885             :   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
    4886           0 :   if (Opc == ISD::AND) {
    4887           0 :     if (MaskByteOffset == 0 || MaskByteOffset == 2) {
    4888             :       // (x >> 8) & 0xff
    4889             :       // (x >> 8) & 0xff0000
    4890           0 :       if (Opc0 != ISD::SRL)
    4891           0 :         return false;
    4892             :       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
    4893           0 :       if (!C || C->getZExtValue() != 8)
    4894           0 :         return false;
    4895             :     } else {
    4896             :       // (x << 8) & 0xff00
    4897             :       // (x << 8) & 0xff000000
    4898           0 :       if (Opc0 != ISD::SHL)
    4899           0 :         return false;
    4900             :       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
    4901           0 :       if (!C || C->getZExtValue() != 8)
    4902           0 :         return false;
    4903             :     }
    4904           0 :   } else if (Opc == ISD::SHL) {
    4905             :     // (x & 0xff) << 8
    4906             :     // (x & 0xff0000) << 8
    4907           0 :     if (MaskByteOffset != 0 && MaskByteOffset != 2)
    4908           0 :       return false;
    4909             :     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
    4910           0 :     if (!C || C->getZExtValue() != 8)
    4911           0 :       return false;
    4912             :   } else { // Opc == ISD::SRL
    4913             :     // (x & 0xff00) >> 8
    4914             :     // (x & 0xff000000) >> 8
    4915           0 :     if (MaskByteOffset != 1 && MaskByteOffset != 3)
    4916           0 :       return false;
    4917             :     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
    4918           0 :     if (!C || C->getZExtValue() != 8)
    4919           0 :       return false;
    4920             :   }
    4921             : 
    4922           0 :   if (Parts[MaskByteOffset])
    4923           0 :     return false;
    4924             : 
    4925           0 :   Parts[MaskByteOffset] = N0.getOperand(0).getNode();
    4926           0 :   return true;
    4927             : }
    4928             : 
    4929             : /// Match a 32-bit packed halfword bswap. That is
    4930             : /// ((x & 0x000000ff) << 8) |
    4931             : /// ((x & 0x0000ff00) >> 8) |
    4932             : /// ((x & 0x00ff0000) << 8) |
    4933             : /// ((x & 0xff000000) >> 8)
    4934             : /// => (rotl (bswap x), 16)
    4935           0 : SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
    4936           0 :   if (!LegalOperations)
    4937           0 :     return SDValue();
    4938             : 
    4939           0 :   EVT VT = N->getValueType(0);
    4940           0 :   if (VT != MVT::i32)
    4941           0 :     return SDValue();
    4942           0 :   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
    4943           0 :     return SDValue();
    4944             : 
    4945             :   // Look for either
    4946             :   // (or (or (and), (and)), (or (and), (and)))
    4947             :   // (or (or (or (and), (and)), (and)), (and))
    4948           0 :   if (N0.getOpcode() != ISD::OR)
    4949           0 :     return SDValue();
    4950           0 :   SDValue N00 = N0.getOperand(0);
    4951           0 :   SDValue N01 = N0.getOperand(1);
    4952           0 :   SDNode *Parts[4] = {};
    4953             : 
    4954           0 :   if (N1.getOpcode() == ISD::OR &&
    4955           0 :       N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
    4956             :     // (or (or (and), (and)), (or (and), (and)))
    4957           0 :     if (!isBSwapHWordElement(N00, Parts))
    4958           0 :       return SDValue();
    4959             : 
    4960           0 :     if (!isBSwapHWordElement(N01, Parts))
    4961           0 :       return SDValue();
    4962           0 :     SDValue N10 = N1.getOperand(0);
    4963           0 :     if (!isBSwapHWordElement(N10, Parts))
    4964           0 :       return SDValue();
    4965           0 :     SDValue N11 = N1.getOperand(1);
    4966           0 :     if (!isBSwapHWordElement(N11, Parts))
    4967           0 :       return SDValue();
    4968             :   } else {
    4969             :     // (or (or (or (and), (and)), (and)), (and))
    4970           0 :     if (!isBSwapHWordElement(N1, Parts))
    4971           0 :       return SDValue();
    4972           0 :     if (!isBSwapHWordElement(N01, Parts))
    4973           0 :       return SDValue();
    4974           0 :     if (N00.getOpcode() != ISD::OR)
    4975           0 :       return SDValue();
    4976           0 :     SDValue N000 = N00.getOperand(0);
    4977           0 :     if (!isBSwapHWordElement(N000, Parts))
    4978           0 :       return SDValue();
    4979           0 :     SDValue N001 = N00.getOperand(1);
    4980           0 :     if (!isBSwapHWordElement(N001, Parts))
    4981           0 :       return SDValue();
    4982             :   }
    4983             : 
    4984             :   // Make sure the parts are all coming from the same node.
    4985           0 :   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
    4986           0 :     return SDValue();
    4987             : 
    4988             :   SDLoc DL(N);
    4989           0 :   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
    4990           0 :                               SDValue(Parts[0], 0));
    4991             : 
    4992             :   // Result of the bswap should be rotated by 16. If it's not legal, then
    4993             :   // do  (x << 16) | (x >> 16).
    4994           0 :   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
    4995           0 :   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
    4996           0 :     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
    4997           0 :   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
    4998           0 :     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
    4999           0 :   return DAG.getNode(ISD::OR, DL, VT,
    5000           0 :                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
    5001           0 :                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
    5002             : }
    5003             : 
    5004             : /// This contains all DAGCombine rules which reduce two values combined by
    5005             : /// an Or operation to a single value \see visitANDLike().
    5006      143660 : SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
    5007      287320 :   EVT VT = N1.getValueType();
    5008             :   SDLoc DL(N);
    5009             : 
    5010             :   // fold (or x, undef) -> -1
    5011      143660 :   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
    5012          15 :     return DAG.getAllOnesConstant(DL, VT);
    5013             : 
    5014      143645 :   if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
    5015         112 :     return V;
    5016             : 
    5017             :   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
    5018      143533 :   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
    5019             :       // Don't increase # computations.
    5020             :       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
    5021             :     // We can only do this xform if we know that bits from X that are set in C2
    5022             :     // but not in C1 are already zero.  Likewise for Y.
    5023             :     if (const ConstantSDNode *N0O1C =
    5024        8167 :         getAsNonOpaqueConstant(N0.getOperand(1))) {
    5025             :       if (const ConstantSDNode *N1O1C =
    5026        2980 :           getAsNonOpaqueConstant(N1.getOperand(1))) {
    5027             :         // We can only do this xform if we know that bits from X that are set in
    5028             :         // C2 but not in C1 are already zero.  Likewise for Y.
    5029        2905 :         const APInt &LHSMask = N0O1C->getAPIntValue();
    5030        2905 :         const APInt &RHSMask = N1O1C->getAPIntValue();
    5031             : 
    5032        8954 :         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
    5033        4100 :             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
    5034          21 :           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
    5035          21 :                                   N0.getOperand(0), N1.getOperand(0));
    5036          21 :           return DAG.getNode(ISD::AND, DL, VT, X,
    5037          42 :                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
    5038             :         }
    5039             :       }
    5040             :     }
    5041             :   }
    5042             : 
    5043             :   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
    5044       25558 :   if (N0.getOpcode() == ISD::AND &&
    5045             :       N1.getOpcode() == ISD::AND &&
    5046      151670 :       N0.getOperand(0) == N1.getOperand(0) &&
    5047             :       // Don't increase # computations.
    5048             :       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
    5049         188 :     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
    5050         188 :                             N0.getOperand(1), N1.getOperand(1));
    5051         376 :     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
    5052             :   }
    5053             : 
    5054      143324 :   return SDValue();
    5055             : }
    5056             : 
    5057      146434 : SDValue DAGCombiner::visitOR(SDNode *N) {
    5058      146434 :   SDValue N0 = N->getOperand(0);
    5059      146434 :   SDValue N1 = N->getOperand(1);
    5060      146434 :   EVT VT = N1.getValueType();
    5061             : 
    5062             :   // x | x --> x
    5063             :   if (N0 == N1)
    5064          17 :     return N0;
    5065             : 
    5066             :   // fold vector ops
    5067      146417 :   if (VT.isVector()) {
    5068       24910 :     if (SDValue FoldedVOp = SimplifyVBinOp(N))
    5069           4 :       return FoldedVOp;
    5070             : 
    5071             :     // fold (or x, 0) -> x, vector edition
    5072       24906 :     if (ISD::isBuildVectorAllZeros(N0.getNode()))
    5073          22 :       return N1;
    5074       24884 :     if (ISD::isBuildVectorAllZeros(N1.getNode()))
    5075         127 :       return N0;
    5076             : 
    5077             :     // fold (or x, -1) -> -1, vector edition
    5078       24757 :     if (ISD::isBuildVectorAllOnes(N0.getNode()))
    5079             :       // do not return N0, because undef node may exist in N0
    5080          10 :       return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
    5081       24752 :     if (ISD::isBuildVectorAllOnes(N1.getNode()))
    5082             :       // do not return N1, because undef node may exist in N1
    5083           2 :       return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
    5084             : 
    5085             :     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
    5086             :     // Do this only if the resulting shuffle is legal.
    5087         170 :     if (isa<ShuffleVectorSDNode>(N0) &&
    5088       24751 :         isa<ShuffleVectorSDNode>(N1) &&
    5089             :         // Avoid folding a node with illegal type.
    5090         143 :         TLI.isTypeLegal(VT)) {
    5091         142 :       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
    5092         142 :       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
    5093         142 :       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
    5094         142 :       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
    5095             :       // Ensure both shuffles have a zero input.
    5096         142 :       if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
    5097             :         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
    5098             :         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
    5099             :         const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
    5100             :         const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
    5101             :         bool CanFold = true;
    5102          66 :         int NumElts = VT.getVectorNumElements();
    5103          66 :         SmallVector<int, 4> Mask(NumElts);
    5104             : 
    5105         300 :         for (int i = 0; i != NumElts; ++i) {
    5106         239 :           int M0 = SV0->getMaskElt(i);
    5107         239 :           int M1 = SV1->getMaskElt(i);
    5108             : 
    5109             :           // Determine if either index is pointing to a zero vector.
    5110         239 :           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
    5111         239 :           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
    5112             : 
    5113             :           // If one element is zero and the otherside is undef, keep undef.
    5114             :           // This also handles the case that both are undef.
    5115         239 :           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
    5116           1 :             Mask[i] = -1;
    5117           1 :             continue;
    5118             :           }
    5119             : 
    5120             :           // Make sure only one of the elements is zero.
    5121         238 :           if (M0Zero == M1Zero) {
    5122             :             CanFold = false;
    5123             :             break;
    5124             :           }
    5125             : 
    5126             :           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
    5127             : 
    5128             :           // We have a zero and non-zero element. If the non-zero came from
    5129             :           // SV0 make the index a LHS index. If it came from SV1, make it
    5130             :           // a RHS index. We need to mod by NumElts because we don't care
    5131             :           // which operand it came from in the original shuffles.
    5132         233 :           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
    5133             :         }
    5134             : 
    5135          66 :         if (CanFold) {
    5136          61 :           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
    5137          61 :           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
    5138             : 
    5139         122 :           bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
    5140          61 :           if (!LegalMask) {
    5141             :             std::swap(NewLHS, NewRHS);
    5142             :             ShuffleVectorSDNode::commuteMask(Mask);
    5143           0 :             LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
    5144             :           }
    5145             : 
    5146          61 :           if (LegalMask)
    5147         122 :             return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
    5148             :         }
    5149             :       }
    5150             :     }
    5151             :   }
    5152             : 
    5153             :   // fold (or c1, c2) -> c1|c2
    5154             :   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
    5155             :   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
    5156      146197 :   if (N0C && N1C && !N1C->isOpaque())
    5157         182 :     return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
    5158             :   // canonicalize constant to RHS
    5159      147686 :   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
    5160        1580 :      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
    5161        3149 :     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
    5162             :   // fold (or x, 0) -> x
    5163      144532 :   if (isNullConstant(N1))
    5164         831 :     return N0;
    5165             :   // fold (or x, -1) -> -1
    5166      143701 :   if (isAllOnesConstant(N1))
    5167          85 :     return N1;
    5168             : 
    5169      143616 :   if (SDValue NewSel = foldBinOpIntoSelect(N))
    5170          54 :     return NewSel;
    5171             : 
    5172             :   // fold (or x, c) -> c iff (x & ~c) == 0
    5173      402740 :   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
    5174           2 :     return N1;
    5175             : 
    5176      143560 :   if (SDValue Combined = visitORLike(N0, N1, N))
    5177         330 :     return Combined;
    5178             : 
    5179             :   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
    5180      143230 :   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
    5181           8 :     return BSwap;
    5182      143222 :   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
    5183           7 :     return BSwap;
    5184             : 
    5185             :   // reassociate or
    5186      181815 :   if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
    5187          67 :     return ROR;
    5188             : 
    5189             :   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
    5190             :   // iff (c1 & c2) != 0.
    5191             :   auto MatchIntersect = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
    5192             :     return LHS->getAPIntValue().intersects(RHS->getAPIntValue());
    5193             :   };
    5194      168198 :   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
    5195      193223 :       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect)) {
    5196          25 :     if (SDValue COR = DAG.FoldConstantArithmetic(
    5197          50 :             ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) {
    5198          25 :       SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
    5199          25 :       AddToWorklist(IOR.getNode());
    5200          50 :       return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
    5201             :     }
    5202             :   }
    5203             : 
    5204             :   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
    5205      143123 :   if (N0.getOpcode() == N1.getOpcode())
    5206       24813 :     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
    5207         424 :       return Tmp;
    5208             : 
    5209             :   // See if this is some rotate idiom.
    5210      285398 :   if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
    5211        1112 :     return SDValue(Rot, 0);
    5212             : 
    5213      141587 :   if (SDValue Load = MatchLoadCombine(N))
    5214         184 :     return Load;
    5215             : 
    5216             :   // Simplify the operands using demanded-bits information.
    5217      141403 :   if (SimplifyDemandedBits(SDValue(N, 0)))
    5218        6353 :     return SDValue(N, 0);
    5219             : 
    5220      135050 :   return SDValue();
    5221             : }
    5222             : 
    5223      265514 : static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
    5224      297777 :   if (Op.getOpcode() == ISD::AND &&
    5225       32263 :       DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
    5226       27712 :     Mask = Op.getOperand(1);
    5227       27712 :     return Op.getOperand(0);
    5228             :   }
    5229      237802 :   return Op;
    5230             : }
    5231             : 
    5232             : /// Match "(X shl/srl V1) & V2" where V2 may not be present.
    5233      226104 : static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
    5234             :                             SDValue &Mask) {
    5235      226104 :   Op = stripConstantMask(DAG, Op, Mask);
    5236      226104 :   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
    5237       39402 :     Shift = Op;
    5238       39402 :     return true;
    5239             :   }
    5240             :   return false;
    5241             : }
    5242             : 
    5243             : /// Helper function for visitOR to extract the needed side of a rotate idiom
    5244             : /// from a shl/srl/mul/udiv.  This is meant to handle cases where
    5245             : /// InstCombine merged some outside op with one of the shifts from
    5246             : /// the rotate pattern.
    5247             : /// \returns An empty \c SDValue if the needed shift couldn't be extracted.
    5248             : /// Otherwise, returns an expansion of \p ExtractFrom based on the following
    5249             : /// patterns:
    5250             : ///
    5251             : ///   (or (mul v c0) (shrl (mul v c1) c2)):
    5252             : ///     expands (mul v c0) -> (shl (mul v c1) c3)
    5253             : ///
    5254             : ///   (or (udiv v c0) (shl (udiv v c1) c2)):
    5255             : ///     expands (udiv v c0) -> (shrl (udiv v c1) c3)
    5256             : ///
    5257             : ///   (or (shl v c0) (shrl (shl v c1) c2)):
    5258             : ///     expands (shl v c0) -> (shl (shl v c1) c3)
    5259             : ///
    5260             : ///   (or (shrl v c0) (shl (shrl v c1) c2)):
    5261             : ///     expands (shrl v c0) -> (shrl (shrl v c1) c3)
    5262             : ///
    5263             : /// Such that in all cases, c3+c2==bitwidth(op v c1).
    5264           0 : static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
    5265             :                                      SDValue ExtractFrom, SDValue &Mask,
    5266             :                                      const SDLoc &DL) {
    5267             :   assert(OppShift && ExtractFrom && "Empty SDValue");
    5268             :   assert(
    5269             :       (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
    5270             :       "Existing shift must be valid as a rotate half");
    5271             : 
    5272           0 :   ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
    5273             :   // Preconditions:
    5274             :   //    (or (op0 v c0) (shiftl/r (op0 v c1) c2))
    5275             :   //
    5276             :   // Find opcode of the needed shift to be extracted from (op0 v c0).
    5277           0 :   unsigned Opcode = ISD::DELETED_NODE;
    5278           0 :   bool IsMulOrDiv = false;
    5279             :   // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
    5280             :   // opcode or its arithmetic (mul or udiv) variant.
    5281             :   auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
    5282           0 :     IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
    5283           0 :     if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
    5284             :       return false;
    5285           0 :     Opcode = NeededShift;
    5286             :     return true;
    5287             :   };
    5288             :   // op0 must be either the needed shift opcode or the mul/udiv equivalent
    5289             :   // that the needed shift can be extracted from.
    5290           0 :   if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
    5291             :       (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
    5292           0 :     return SDValue();
    5293             : 
    5294             :   // op0 must be the same opcode on both sides, have the same LHS argument,
    5295             :   // and produce the same value type.
    5296           0 :   SDValue OppShiftLHS = OppShift.getOperand(0);
    5297           0 :   EVT ShiftedVT = OppShiftLHS.getValueType();
    5298           0 :   if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
    5299           0 :       OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
    5300           0 :       ShiftedVT != ExtractFrom.getValueType())
    5301           0 :     return SDValue();
    5302             : 
    5303             :   // Amount of the existing shift.
    5304           0 :   ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
    5305             :   // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
    5306           0 :   ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
    5307             :   // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
    5308             :   ConstantSDNode *ExtractFromCst =
    5309           0 :       isConstOrConstSplat(ExtractFrom.getOperand(1));
    5310             :   // TODO: We should be able to handle non-uniform constant vectors for these values
    5311             :   // Check that we have constant values.
    5312           0 :   if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
    5313           0 :       !OppLHSCst || !OppLHSCst->getAPIntValue() ||
    5314           0 :       !ExtractFromCst || !ExtractFromCst->getAPIntValue())
    5315           0 :     return SDValue();
    5316             : 
    5317             :   // Compute the shift amount we need to extract to complete the rotate.
    5318             :   const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
    5319           0 :   if (OppShiftCst->getAPIntValue().ugt(VTWidth))
    5320           0 :     return SDValue();
    5321           0 :   APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
    5322             :   // Normalize the bitwidth of the two mul/udiv/shift constant operands.
    5323           0 :   APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
    5324           0 :   APInt OppLHSAmt = OppLHSCst->getAPIntValue();
    5325           0 :   zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
    5326             : 
    5327             :   // Now try extract the needed shift from the ExtractFrom op and see if the
    5328             :   // result matches up with the existing shift's LHS op.
    5329           0 :   if (IsMulOrDiv) {
    5330             :     // Op to extract from is a mul or udiv by a constant.
    5331             :     // Check:
    5332             :     //     c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
    5333             :     //     c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
    5334             :     const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
    5335           0 :                                                  NeededShiftAmt.getZExtValue());
    5336             :     APInt ResultAmt;
    5337             :     APInt Rem;
    5338           0 :     APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
    5339           0 :     if (Rem != 0 || ResultAmt != OppLHSAmt)
    5340           0 :       return SDValue();
    5341             :   } else {
    5342             :     // Op to extract from is a shift by a constant.
    5343             :     // Check:
    5344             :     //      c2 - (bitwidth(op0 v c0) - c1) == c0
    5345           0 :     if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
    5346             :                                           ExtractFromAmt.getBitWidth()))
    5347           0 :       return SDValue();
    5348             :   }
    5349             : 
    5350             :   // Return the expanded shift op that should allow a rotate to be formed.
    5351           0 :   EVT ShiftVT = OppShift.getOperand(1).getValueType();
    5352           0 :   EVT ResVT = ExtractFrom.getValueType();
    5353           0 :   SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
    5354           0 :   return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
    5355             : }
    5356             : 
    5357             : // Return true if we can prove that, whenever Neg and Pos are both in the
    5358             : // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
    5359             : // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
    5360             : //
    5361             : //     (or (shift1 X, Neg), (shift2 X, Pos))
    5362             : //
    5363             : // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
    5364             : // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
    5365             : // to consider shift amounts with defined behavior.
    5366         546 : static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
    5367             :                            SelectionDAG &DAG) {
    5368             :   // If EltSize is a power of 2 then:
    5369             :   //
    5370             :   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
    5371             :   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
    5372             :   //
    5373             :   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
    5374             :   // for the stronger condition:
    5375             :   //
    5376             :   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
    5377             :   //
    5378             :   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
    5379             :   // we can just replace Neg with Neg' for the rest of the function.
    5380             :   //
    5381             :   // In other cases we check for the even stronger condition:
    5382             :   //
    5383             :   //     Neg == EltSize - Pos                                    [B]
    5384             :   //
    5385             :   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
    5386             :   // behavior if Pos == 0 (and consequently Neg == EltSize).
    5387             :   //
    5388             :   // We could actually use [A] whenever EltSize is a power of 2, but the
    5389             :   // only extra cases that it would match are those uninteresting ones
    5390             :   // where Neg and Pos are never in range at the same time.  E.g. for
    5391             :   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
    5392             :   // as well as (sub 32, Pos), but:
    5393             :   //
    5394             :   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
    5395             :   //
    5396             :   // always invokes undefined behavior for 32-bit X.
    5397             :   //
    5398             :   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
    5399             :   unsigned MaskLoBits = 0;
    5400         546 :   if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
    5401          83 :     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
    5402          83 :       KnownBits Known;
    5403          83 :       DAG.computeKnownBits(Neg.getOperand(0), Known);
    5404             :       unsigned Bits = Log2_64(EltSize);
    5405         246 :       if (NegC->getAPIntValue().getActiveBits() <= Bits &&
    5406         243 :           ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
    5407          78 :         Neg = Neg.getOperand(0);
    5408             :         MaskLoBits = Bits;
    5409             :       }
    5410             :     }
    5411             :   }
    5412             : 
    5413             :   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
    5414         546 :   if (Neg.getOpcode() != ISD::SUB)
    5415             :     return false;
    5416         400 :   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
    5417         400 :   if (!NegC)
    5418             :     return false;
    5419         400 :   SDValue NegOp1 = Neg.getOperand(1);
    5420             : 
    5421             :   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
    5422             :   // Pos'.  The truncation is redundant for the purpose of the equality.
    5423         400 :   if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
    5424          54 :     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
    5425          54 :       KnownBits Known;
    5426          54 :       DAG.computeKnownBits(Pos.getOperand(0), Known);
    5427         162 :       if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
    5428         162 :           ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
    5429             :            MaskLoBits))
    5430          42 :         Pos = Pos.getOperand(0);
    5431             :     }
    5432             :   }
    5433             : 
    5434             :   // The condition we need is now:
    5435             :   //
    5436             :   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
    5437             :   //
    5438             :   // If NegOp1 == Pos then we need:
    5439             :   //
    5440             :   //              EltSize & Mask == NegC & Mask
    5441             :   //
    5442             :   // (because "x & Mask" is a truncation and distributes through subtraction).
    5443             :   APInt Width;
    5444             :   if (Pos == NegOp1)
    5445         780 :     Width = NegC->getAPIntValue();
    5446             : 
    5447             :   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
    5448             :   // Then the condition we want to prove becomes:
    5449             :   //
    5450             :   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
    5451             :   //
    5452             :   // which, again because "x & Mask" is a truncation, becomes:
    5453             :   //
    5454             :   //                NegC & Mask == (EltSize - PosC) & Mask
    5455             :   //             EltSize & Mask == (NegC + PosC) & Mask
    5456          10 :   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
    5457           3 :     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
    5458           9 :       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
    5459             :     else
    5460             :       return false;
    5461             :   } else
    5462             :     return false;
    5463             : 
    5464             :   // Now we just need to check that EltSize & Mask == Width & Mask.
    5465         393 :   if (MaskLoBits)
    5466             :     // EltSize & Mask is 0 since Mask is EltSize - 1.
    5467          60 :     return Width.getLoBits(MaskLoBits) == 0;
    5468         333 :   return Width == EltSize;
    5469             : }
    5470             : 
    5471             : // A subroutine of MatchRotate used once we have found an OR of two opposite
    5472             : // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
    5473             : // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
    5474             : // former being preferred if supported.  InnerPos and InnerNeg are Pos and
    5475             : // Neg with outer conversions stripped away.
    5476           0 : SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
    5477             :                                        SDValue Neg, SDValue InnerPos,
    5478             :                                        SDValue InnerNeg, unsigned PosOpcode,
    5479             :                                        unsigned NegOpcode, const SDLoc &DL) {
    5480             :   // fold (or (shl x, (*ext y)),
    5481             :   //          (srl x, (*ext (sub 32, y)))) ->
    5482             :   //   (rotl x, y) or (rotr x, (sub 32, y))
    5483             :   //
    5484             :   // fold (or (shl x, (*ext (sub 32, y))),
    5485             :   //          (srl x, (*ext y))) ->
    5486             :   //   (rotr x, y) or (rotl x, (sub 32, y))
    5487           0 :   EVT VT = Shifted.getValueType();
    5488           0 :   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
    5489           0 :     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
    5490           0 :     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
    5491           0 :                        HasPos ? Pos : Neg).getNode();
    5492             :   }
    5493             : 
    5494             :   return nullptr;
    5495             : }
    5496             : 
    5497             : // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
    5498             : // idioms for rotate, and if the target supports rotation instructions, generate
    5499             : // a rot[lr].
    5500      142856 : SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
    5501             :   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
    5502      142856 :   EVT VT = LHS.getValueType();
    5503      142856 :   if (!TLI.isTypeLegal(VT)) return nullptr;
    5504             : 
    5505             :   // The target must have at least one rotate flavor.
    5506      140206 :   bool HasROTL = hasOperation(ISD::ROTL, VT);
    5507      140206 :   bool HasROTR = hasOperation(ISD::ROTR, VT);
    5508      140206 :   if (!HasROTL && !HasROTR) return nullptr;
    5509             : 
    5510             :   // Check for truncated rotate.
    5511      113053 :   if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
    5512         314 :       LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
    5513             :     assert(LHS.getValueType() == RHS.getValueType());
    5514         157 :     if (SDNode *Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
    5515           1 :       return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(),
    5516           1 :                          SDValue(Rot, 0)).getNode();
    5517             :     }
    5518             :   }
    5519             : 
    5520             :   // Match "(X shl/srl V1) & V2" where V2 may not be present.
    5521      113052 :   SDValue LHSShift;   // The shift.
    5522      113052 :   SDValue LHSMask;    // AND value if any.
    5523      113052 :   matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
    5524             : 
    5525      113052 :   SDValue RHSShift;   // The shift.
    5526      113052 :   SDValue RHSMask;    // AND value if any.
    5527      113052 :   matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
    5528             : 
    5529             :   // If neither side matched a rotate half, bail
    5530      113052 :   if (!LHSShift && !RHSShift)
    5531             :     return nullptr;
    5532             : 
    5533             :   // InstCombine may have combined a constant shl, srl, mul, or udiv with one
    5534             :   // side of the rotate, so try to handle that here. In all cases we need to
    5535             :   // pass the matched shift from the opposite side to compute the opcode and
    5536             :   // needed shift amount to extract.  We still want to do this if both sides
    5537             :   // matched a rotate half because one half may be a potential overshift that
    5538             :   // can be broken down (ie if InstCombine merged two shl or srl ops into a
    5539             :   // single one).
    5540             : 
    5541             :   // Have LHS side of the rotate, try to extract the needed shift from the RHS.
    5542       33445 :   if (LHSShift)
    5543       10248 :     if (SDValue NewRHSShift =
    5544       10248 :             extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
    5545          12 :       RHSShift = NewRHSShift;
    5546             :   // Have RHS side of the rotate, try to extract the needed shift from the LHS.
    5547       33445 :   if (RHSShift)
    5548       29162 :     if (SDValue NewLHSShift =
    5549       29162 :             extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
    5550          11 :       LHSShift = NewLHSShift;
    5551             : 
    5552             :   // If a side is still missing, nothing else we can do.
    5553       33445 :   if (!RHSShift || !LHSShift)
    5554             :     return nullptr;
    5555             : 
    5556             :   // At this point we've matched or extracted a shift op on each side.
    5557             : 
    5558             :   if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
    5559             :     return nullptr;   // Not shifting the same value.
    5560             : 
    5561        1365 :   if (LHSShift.getOpcode() == RHSShift.getOpcode())
    5562             :     return nullptr;   // Shifts must disagree.
    5563             : 
    5564             :   // Canonicalize shl to left side in a shl/srl pair.
    5565        1186 :   if (RHSShift.getOpcode() == ISD::SHL) {
    5566             :     std::swap(LHS, RHS);
    5567             :     std::swap(LHSShift, RHSShift);
    5568             :     std::swap(LHSMask, RHSMask);
    5569             :   }
    5570             : 
    5571             :   unsigned EltSizeInBits = VT.getScalarSizeInBits();
    5572        1186 :   SDValue LHSShiftArg = LHSShift.getOperand(0);
    5573        1186 :   SDValue LHSShiftAmt = LHSShift.getOperand(1);
    5574        1186 :   SDValue RHSShiftArg = RHSShift.getOperand(0);
    5575        1186 :   SDValue RHSShiftAmt = RHSShift.getOperand(1);
    5576             : 
    5577             :   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
    5578             :   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
    5579             :   auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
    5580             :                                         ConstantSDNode *RHS) {
    5581             :     return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
    5582             :   };
    5583        2372 :   if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
    5584         723 :     SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
    5585        1033 :                               LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
    5586             : 
    5587             :     // If there is an AND of either shifted operand, apply it to the result.
    5588         723 :     if (LHSMask.getNode() || RHSMask.getNode()) {
    5589         104 :       SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
    5590         104 :       SDValue Mask = AllOnes;
    5591             : 
    5592         104 :       if (LHSMask.getNode()) {
    5593         208 :         SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
    5594         104 :         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
    5595         104 :                            DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
    5596             :       }
    5597         104 :       if (RHSMask.getNode()) {
    5598         174 :         SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
    5599          87 :         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
    5600          87 :                            DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
    5601             :       }
    5602             : 
    5603         208 :       Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
    5604             :     }
    5605             : 
    5606             :     return Rot.getNode();
    5607             :   }
    5608             : 
    5609             :   // If there is a mask here, and we have a variable shift, we can't be sure
    5610             :   // that we're masking out the right stuff.
    5611         463 :   if (LHSMask.getNode() || RHSMask.getNode())
    5612             :     return nullptr;
    5613             : 
    5614             :   // If the shift amount is sign/zext/any-extended just peel it off.
    5615         424 :   SDValue LExtOp0 = LHSShiftAmt;
    5616         424 :   SDValue RExtOp0 = RHSShiftAmt;
    5617         424 :   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
    5618         413 :        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
    5619         413 :        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
    5620         511 :        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
    5621          87 :       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
    5622          76 :        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
    5623          76 :        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
    5624             :        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
    5625          79 :     LExtOp0 = LHSShiftAmt.getOperand(0);
    5626          79 :     RExtOp0 = RHSShiftAmt.getOperand(0);
    5627             :   }
    5628             : 
    5629         424 :   SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
    5630             :                                    LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
    5631         424 :   if (TryL)
    5632             :     return TryL;
    5633             : 
    5634         122 :   SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
    5635             :                                    RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
    5636         122 :   if (TryR)
    5637          87 :     return TryR;
    5638             : 
    5639             :   return nullptr;
    5640             : }
    5641             : 
    5642             : namespace {
    5643             : 
    5644             : /// Represents known origin of an individual byte in load combine pattern. The
    5645             : /// value of the byte is either constant zero or comes from memory.
    5646             : struct ByteProvider {
    5647             :   // For constant zero providers Load is set to nullptr. For memory providers
    5648             :   // Load represents the node which loads the byte from memory.
    5649             :   // ByteOffset is the offset of the byte in the value produced by the load.
    5650             :   LoadSDNode *Load = nullptr;
    5651             :   unsigned ByteOffset = 0;
    5652             : 
    5653             :   ByteProvider() = default;
    5654             : 
    5655             :   static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
    5656             :     return ByteProvider(Load, ByteOffset);
    5657             :   }
    5658             : 
    5659             :   static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
    5660             : 
    5661           0 :   bool isConstantZero() const { return !Load; }
    5662           0 :   bool isMemory() const { return Load; }
    5663             : 
    5664             :   bool operator==(const ByteProvider &Other) const {
    5665             :     return Other.Load == Load && Other.ByteOffset == ByteOffset;
    5666             :   }
    5667             : 
    5668             : private:
    5669             :   ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
    5670             :       : Load(Load), ByteOffset(ByteOffset) {}
    5671             : };
    5672             : 
    5673             : } // end anonymous namespace
    5674             : 
    5675             : /// Recursively traverses the expression calculating the origin of the requested
    5676             : /// byte of the given value. Returns None if the provider can't be calculated.
    5677             : ///
    5678             : /// For all the values except the root of the expression verifies that the value
    5679             : /// has exactly one use and if it's not true return None. This way if the origin
    5680             : /// of the byte is returned it's guaranteed that the values which contribute to
    5681             : /// the byte are not used outside of this expression.
    5682             : ///
    5683             : /// Because the parts of the expression are not allowed to have more than one
    5684             : /// use this function iterates over trees, not DAGs. So it never visits the same
    5685             : /// node more than once.
    5686             : static const Optional<ByteProvider>
    5687      293077 : calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
    5688             :                       bool Root = false) {
    5689             :   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
    5690      293077 :   if (Depth == 10)
    5691             :     return None;
    5692             : 
    5693      289963 :   if (!Root && !Op.hasOneUse())
    5694             :     return None;
    5695             : 
    5696             :   assert(Op.getValueType().isScalarInteger() && "can't handle other types");
    5697      243935 :   unsigned BitWidth = Op.getValueSizeInBits();
    5698      243935 :   if (BitWidth % 8 != 0)
    5699             :     return None;
    5700      243935 :   unsigned ByteWidth = BitWidth / 8;
    5701             :   assert(Index < ByteWidth && "invalid index requested");
    5702             :   (void) ByteWidth;
    5703             : 
    5704      487870 :   switch (Op.getOpcode()) {
    5705      154519 :   case ISD::OR: {
    5706      309038 :     auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
    5707      154519 :     if (!LHS)
    5708             :       return None;
    5709       32604 :     auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
    5710       16302 :     if (!RHS)
    5711             :       return None;
    5712             : 
    5713       12346 :     if (LHS->isConstantZero())
    5714             :       return RHS;
    5715        6219 :     if (RHS->isConstantZero())
    5716             :       return LHS;
    5717             :     return None;
    5718             :   }
    5719       15857 :   case ISD::SHL: {
    5720       15857 :     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
    5721             :     if (!ShiftOp)
    5722             :       return None;
    5723             : 
    5724       15360 :     uint64_t BitShift = ShiftOp->getZExtValue();
    5725       15360 :     if (BitShift % 8 != 0)
    5726             :       return None;
    5727       12520 :     uint64_t ByteShift = BitShift / 8;
    5728             : 
    5729       12520 :     return Index < ByteShift
    5730             :                ? ByteProvider::getConstantZero()
    5731             :                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
    5732       12520 :                                        Depth + 1);
    5733             :   }
    5734       12218 :   case ISD::ANY_EXTEND:
    5735             :   case ISD::SIGN_EXTEND:
    5736             :   case ISD::ZERO_EXTEND: {
    5737       12218 :     SDValue NarrowOp = Op->getOperand(0);
    5738       12218 :     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
    5739       12218 :     if (NarrowBitWidth % 8 != 0)
    5740             :       return None;
    5741       12148 :     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
    5742             : 
    5743       12148 :     if (Index >= NarrowByteWidth)
    5744        1565 :       return Op.getOpcode() == ISD::ZERO_EXTEND
    5745             :                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
    5746        1565 :                  : None;
    5747       10583 :     return calculateByteProvider(NarrowOp, Index, Depth + 1);
    5748             :   }
    5749          42 :   case ISD::BSWAP:
    5750          84 :     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
    5751          84 :                                  Depth + 1);
    5752       16760 :   case ISD::LOAD: {
    5753             :     auto L = cast<LoadSDNode>(Op.getNode());
    5754       16760 :     if (L->isVolatile() || L->isIndexed())
    5755             :       return None;
    5756             : 
    5757       16674 :     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
    5758       16674 :     if (NarrowBitWidth % 8 != 0)
    5759             :       return None;
    5760       16674 :     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
    5761             : 
    5762       16674 :     if (Index >= NarrowByteWidth)
    5763             :       return L->getExtensionType() == ISD::ZEXTLOAD
    5764             :                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
    5765        4496 :                  : None;
    5766             :     return ByteProvider::getMemory(L, Index);
    5767             :   }
    5768             :   }
    5769             : 
    5770             :   return None;
    5771             : }
    5772             : 
    5773             : /// Match a pattern where a wide type scalar value is loaded by several narrow
    5774             : /// loads and combined by shifts and ors. Fold it into a single load or a load
    5775             : /// and a BSWAP if the targets supports it.
    5776             : ///
    5777             : /// Assuming little endian target:
    5778             : ///  i8 *a = ...
    5779             : ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
    5780             : /// =>
    5781             : ///  i32 val = *((i32)a)
    5782             : ///
    5783             : ///  i8 *a = ...
    5784             : ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
    5785             : /// =>
    5786             : ///  i32 val = BSWAP(*((i32)a))
    5787             : ///
    5788             : /// TODO: This rule matches complex patterns with OR node roots and doesn't
    5789             : /// interact well with the worklist mechanism. When a part of the pattern is
    5790             : /// updated (e.g. one of the loads) its direct users are put into the worklist,
    5791             : /// but the root node of the pattern which triggers the load combine is not
    5792             : /// necessarily a direct user of the changed node. For example, once the address
    5793             : /// of t28 load is reassociated load combine won't be triggered:
    5794             : ///             t25: i32 = add t4, Constant:i32<2>
    5795             : ///           t26: i64 = sign_extend t25
    5796             : ///        t27: i64 = add t2, t26
    5797             : ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
    5798             : ///     t29: i32 = zero_extend t28
    5799             : ///   t32: i32 = shl t29, Constant:i8<8>
    5800             : /// t33: i32 = or t23, t32
    5801             : /// As a possible fix visitLoad can check if the load can be a part of a load
    5802             : /// combine pattern and add corresponding OR roots to the worklist.
    5803           0 : SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
    5804             :   assert(N->getOpcode() == ISD::OR &&
    5805             :          "Can only match load combining against OR nodes");
    5806             : 
    5807             :   // Handles simple types only
    5808           0 :   EVT VT = N->getValueType(0);
    5809           0 :   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
    5810           0 :     return SDValue();
    5811           0 :   unsigned ByteWidth = VT.getSizeInBits() / 8;
    5812             : 
    5813           0 :   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
    5814             :   // Before legalize we can introduce too wide illegal loads which will be later
    5815             :   // split into legal sized loads. This enables us to combine i64 load by i8
    5816             :   // patterns to a couple of i32 loads on 32 bit targets.
    5817           0 :   if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
    5818           0 :     return SDValue();
    5819             : 
    5820             :   std::function<unsigned(unsigned, unsigned)> LittleEndianByteAt = [](
    5821             :     unsigned BW, unsigned i) { return i; };
    5822             :   std::function<unsigned(unsigned, unsigned)> BigEndianByteAt = [](
    5823        4274 :     unsigned BW, unsigned i) { return BW - i - 1; };
    5824             : 
    5825           0 :   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
    5826             :   auto MemoryByteOffset = [&] (ByteProvider P) {
    5827             :     assert(P.isMemory() && "Must be a memory byte provider");
    5828             :     unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
    5829             :     assert(LoadBitWidth % 8 == 0 &&
    5830             :            "can only analyze providers for individual bytes not bit");
    5831             :     unsigned LoadByteWidth = LoadBitWidth / 8;
    5832             :     return IsBigEndianTarget
    5833             :             ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
    5834             :             : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
    5835           0 :   };
    5836             : 
    5837             :   Optional<BaseIndexOffset> Base;
    5838             :   SDValue Chain;
    5839             : 
    5840             :   SmallPtrSet<LoadSDNode *, 8> Loads;
    5841             :   Optional<ByteProvider> FirstByteProvider;
    5842             :   int64_t FirstOffset = INT64_MAX;
    5843             : 
    5844             :   // Check if all the bytes of the OR we are looking at are loaded from the same
    5845             :   // base address. Collect bytes offsets from Base address in ByteOffsets.
    5846           0 :   SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
    5847           0 :   for (unsigned i = 0; i < ByteWidth; i++) {
    5848           0 :     auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
    5849           0 :     if (!P || !P->isMemory()) // All the bytes must be loaded from memory
    5850           0 :       return SDValue();
    5851             : 
    5852             :     LoadSDNode *L = P->Load;
    5853             :     assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() &&
    5854             :            "Must be enforced by calculateByteProvider");
    5855             :     assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
    5856             : 
    5857             :     // All loads must share the same chain
    5858           0 :     SDValue LChain = L->getChain();
    5859           0 :     if (!Chain)
    5860           0 :       Chain = LChain;
    5861             :     else if (Chain != LChain)
    5862           0 :       return SDValue();
    5863             : 
    5864             :     // Loads must share the same base address
    5865           0 :     BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
    5866           0 :     int64_t ByteOffsetFromBase = 0;
    5867           0 :     if (!Base)
    5868             :       Base = Ptr;
    5869           0 :     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
    5870           0 :       return SDValue();
    5871             : 
    5872             :     // Calculate the offset of the current byte from the base address
    5873           0 :     ByteOffsetFromBase += MemoryByteOffset(*P);
    5874           0 :     ByteOffsets[i] = ByteOffsetFromBase;
    5875             : 
    5876             :     // Remember the first byte load
    5877           0 :     if (ByteOffsetFromBase < FirstOffset) {
    5878             :       FirstByteProvider = P;
    5879             :       FirstOffset = ByteOffsetFromBase;
    5880             :     }
    5881             : 
    5882           0 :     Loads.insert(L);
    5883             :   }
    5884             :   assert(!Loads.empty() && "All the bytes of the value must be loaded from "
    5885             :          "memory, so there must be at least one load which produces the value");
    5886             :   assert(Base && "Base address of the accessed memory location must be set");
    5887             :   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
    5888             : 
    5889             :   // Check if the bytes of the OR we are looking at match with either big or
    5890             :   // little endian value load
    5891             :   bool BigEndian = true, LittleEndian = true;
    5892           0 :   for (unsigned i = 0; i < ByteWidth; i++) {
    5893           0 :     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
    5894           0 :     LittleEndian &= CurrentByteOffset == LittleEndianByteAt(ByteWidth, i);
    5895           0 :     BigEndian &= CurrentByteOffset == BigEndianByteAt(ByteWidth, i);
    5896           0 :     if (!BigEndian && !LittleEndian)
    5897           0 :       return SDValue();
    5898             :   }
    5899             :   assert((BigEndian != LittleEndian) && "should be either or");
    5900             :   assert(FirstByteProvider && "must be set");
    5901             : 
    5902             :   // Ensure that the first byte is loaded from zero offset of the first load.
    5903             :   // So the combined value can be loaded from the first load address.
    5904           0 :   if (MemoryByteOffset(*FirstByteProvider) != 0)
    5905           0 :     return SDValue();
    5906             :   LoadSDNode *FirstLoad = FirstByteProvider->Load;
    5907             : 
    5908             :   // The node we are looking at matches with the pattern, check if we can
    5909             :   // replace it with a single load and bswap if needed.
    5910             : 
    5911             :   // If the load needs byte swap check if the target supports it
    5912           0 :   bool NeedsBswap = IsBigEndianTarget != BigEndian;
    5913             : 
    5914             :   // Before legalize we can introduce illegal bswaps which will be later
    5915             :   // converted to an explicit bswap sequence. This way we end up with a single
    5916             :   // load and byte shuffling instead of several loads and byte shuffling.
    5917           0 :   if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
    5918           0 :     return SDValue();
    5919             : 
    5920             :   // Check that a load of the wide type is both allowed and fast on the target
    5921           0 :   bool Fast = false;
    5922           0 :   bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
    5923             :                                         VT, FirstLoad->getAddressSpace(),
    5924             :                                         FirstLoad->getAlignment(), &Fast);
    5925           0 :   if (!Allowed || !Fast)
    5926           0 :     return SDValue();
    5927             : 
    5928             :   SDValue NewLoad =
    5929           0 :       DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
    5930           0 :                   FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
    5931             : 
    5932             :   // Transfer chain users from old loads to the new load.
    5933           0 :   for (LoadSDNode *L : Loads)
    5934           0 :     DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
    5935             : 
    5936           0 :   return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
    5937             : }
    5938             : 
    5939             : // If the target has andn, bsl, or a similar bit-select instruction,
    5940             : // we want to unfold masked merge, with canonical pattern of:
    5941             : //   |        A  |  |B|
    5942             : //   ((x ^ y) & m) ^ y
    5943             : //    |  D  |
    5944             : // Into:
    5945             : //   (x & m) | (y & ~m)
    5946             : // If y is a constant, and the 'andn' does not work with immediates,
    5947             : // we unfold into a different pattern:
    5948             : //   ~(~x & m) & (m | y)
    5949             : // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
    5950             : //       the very least that breaks andnpd / andnps patterns, and because those
    5951             : //       patterns are simplified in IR and shouldn't be created in the DAG
    5952           0 : SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
    5953             :   assert(N->getOpcode() == ISD::XOR);
    5954             : 
    5955             :   // Don't touch 'not' (i.e. where y = -1).
    5956           0 :   if (isAllOnesConstantOrAllOnesSplatConstant(N->getOperand(1)))
    5957           0 :     return SDValue();
    5958             : 
    5959           0 :   EVT VT = N->getValueType(0);
    5960             : 
    5961             :   // There are 3 commutable operators in the pattern,
    5962             :   // so we have to deal with 8 possible variants of the basic pattern.
    5963           0 :   SDValue X, Y, M;
    5964             :   auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
    5965             :     if (And.getOpcode() != ISD::AND || !And.hasOneUse())
    5966             :       return false;
    5967             :     SDValue Xor = And.getOperand(XorIdx);
    5968             :     if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
    5969             :       return false;
    5970             :     SDValue Xor0 = Xor.getOperand(0);
    5971             :     SDValue Xor1 = Xor.getOperand(1);
    5972             :     // Don't touch 'not' (i.e. where y = -1).
    5973             :     if (isAllOnesConstantOrAllOnesSplatConstant(Xor1))
    5974             :       return false;
    5975             :     if (Other == Xor0)
    5976             :       std::swap(Xor0, Xor1);
    5977             :     if (Other != Xor1)
    5978             :       return false;
    5979             :     X = Xor0;
    5980             :     Y = Xor1;
    5981             :     M = And.getOperand(XorIdx ? 0 : 1);
    5982             :     return true;
    5983           0 :   };
    5984             : 
    5985           0 :   SDValue N0 = N->getOperand(0);
    5986           0 :   SDValue N1 = N->getOperand(1);
    5987           0 :   if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
    5988           0 :       !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
    5989           0 :     return SDValue();
    5990             : 
    5991             :   // Don't do anything if the mask is constant. This should not be reachable.
    5992             :   // InstCombine should have already unfolded this pattern, and DAGCombiner
    5993             :   // probably shouldn't produce it, too.
    5994           0 :   if (isa<ConstantSDNode>(M.getNode()))
    5995           0 :     return SDValue();
    5996             : 
    5997             :   // We can transform if the target has AndNot
    5998           0 :   if (!TLI.hasAndNot(M))
    5999           0 :     return SDValue();
    6000             : 
    6001             :   SDLoc DL(N);
    6002             : 
    6003             :   // If Y is a constant, check that 'andn' works with immediates.
    6004           0 :   if (!TLI.hasAndNot(Y)) {
    6005             :     assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
    6006             :     // If not, we need to do a bit more work to make sure andn is still used.
    6007           0 :     SDValue NotX = DAG.getNOT(DL, X, VT);
    6008           0 :     SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
    6009           0 :     SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
    6010           0 :     SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
    6011           0 :     return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
    6012             :   }
    6013             : 
    6014           0 :   SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
    6015           0 :   SDValue NotM = DAG.getNOT(DL, M, VT);
    6016           0 :   SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
    6017             : 
    6018           0 :   return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
    6019             : }
    6020             : 
    6021      117833 : SDValue DAGCombiner::visitXOR(SDNode *N) {
    6022      117833 :   SDValue N0 = N->getOperand(0);
    6023      117833 :   SDValue N1 = N->getOperand(1);
    6024      235666 :   EVT VT = N0.getValueType();
    6025             : 
    6026             :   // fold vector ops
    6027      117833 :   if (VT.isVector()) {
    6028       24850 :     if (SDValue FoldedVOp = SimplifyVBinOp(N))
    6029           0 :       return FoldedVOp;
    6030             : 
    6031             :     // fold (xor x, 0) -> x, vector edition
    6032       24850 :     if (ISD::isBuildVectorAllZeros(N0.getNode()))
    6033           0 :       return N1;
    6034       24850 :     if (ISD::isBuildVectorAllZeros(N1.getNode()))
    6035         110 :       return N0;
    6036             :   }
    6037             : 
    6038             :   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
    6039      117723 :   if (N0.isUndef() && N1.isUndef())
    6040           0 :     return DAG.getConstant(0, SDLoc(N), VT);
    6041             :   // fold (xor x, undef) -> undef
    6042      117723 :   if (N0.isUndef())
    6043          14 :     return N0;
    6044      235418 :   if (N1.isUndef())
    6045           0 :     return N1;
    6046             :   // fold (xor c1, c2) -> c1^c2
    6047             :   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
    6048             :   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
    6049      117709 :   if (N0C && N1C)
    6050        1808 :     return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C);
    6051             :   // canonicalize constant to RHS
    6052      117331 :   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
    6053         526 :      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
    6054        1046 :     return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
    6055             :   // fold (xor x, 0) -> x
    6056      116282 :   if (isNullConstant(N1))
    6057           0 :     return N0;
    6058             : 
    6059      116282 :   if (SDValue NewSel = foldBinOpIntoSelect(N))
    6060           6 :     return NewSel;
    6061             : 
    6062             :   // reassociate xor
    6063      165686 :   if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1, N->getFlags()))
    6064         288 :     return RXOR;
    6065             : 
    6066             :   // fold !(x cc y) -> (x !cc y)
    6067      115988 :   SDValue LHS, RHS, CC;
    6068      115988 :   if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
    6069       22739 :     bool isInt = LHS.getValueType().isInteger();
    6070       45478 :     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
    6071             :                                                isInt);
    6072             : 
    6073       22739 :     if (!LegalOperations ||
    6074         121 :         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
    6075       22625 :       switch (N0.getOpcode()) {
    6076           0 :       default:
    6077           0 :         llvm_unreachable("Unhandled SetCC Equivalent!");
    6078       22625 :       case ISD::SETCC:
    6079       58896 :         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
    6080           0 :       case ISD::SELECT_CC:
    6081           0 :         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
    6082           0 :                                N0.getOperand(3), NotCC);
    6083             :       }
    6084             :     }
    6085             :   }
    6086             : 
    6087             :   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
    6088      135505 :   if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND &&
    6089       93370 :       N0.getNode()->hasOneUse() &&
    6090           7 :       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
    6091           3 :     SDValue V = N0.getOperand(0);
    6092             :     SDLoc DL(N0);
    6093           3 :     V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V,
    6094           3 :                     DAG.getConstant(1, DL, V.getValueType()));
    6095           3 :     AddToWorklist(V.getNode());
    6096           6 :     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
    6097             :   }
    6098             : 
    6099             :   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
    6100      112996 :   if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
    6101       19310 :       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
    6102          82 :     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
    6103          82 :     if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
    6104          53 :       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
    6105          55 :       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
    6106          55 :       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
    6107          53 :       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
    6108         108 :       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
    6109             :     }
    6110             :   }
    6111             :   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
    6112       93307 :   if (isAllOnesConstant(N1) && N0.hasOneUse() &&
    6113       46654 :       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
    6114         612 :     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
    6115             :     if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
    6116          64 :       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
    6117          65 :       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
    6118          64 :       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
    6119          64 :       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
    6120         129 :       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
    6121             :     }
    6122             :   }
    6123             :   // fold (xor (and x, y), y) -> (and (not x), y)
    6124      186486 :   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
    6125        1890 :       N0->getOperand(1) == N1) {
    6126          73 :     SDValue X = N0->getOperand(0);
    6127          88 :     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
    6128          73 :     AddToWorklist(NotX.getNode());
    6129         161 :     return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1);
    6130             :   }
    6131             : 
    6132             :   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
    6133       93170 :   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
    6134       19117 :     SDValue A = N0.getOpcode() == ISD::ADD ? N0 : N1;
    6135       38134 :     SDValue S = N0.getOpcode() == ISD::SRA ? N0 : N1;
    6136       19117 :     if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
    6137         119 :       SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
    6138         119 :       SDValue S0 = S.getOperand(0);
    6139             :       if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) {
    6140             :         unsigned OpSizeInBits = VT.getScalarSizeInBits();
    6141         113 :         if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
    6142         222 :           if (C->getAPIntValue() == (OpSizeInBits - 1))
    6143         225 :             return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
    6144             :       }
    6145             :     }
    6146             :   }
    6147             : 
    6148             :   // fold (xor x, x) -> 0
    6149       93059 :   if (N0 == N1)
    6150          38 :     return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
    6151             : 
    6152             :   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
    6153             :   // Here is a concrete example of this equivalence:
    6154             :   // i16   x ==  14
    6155             :   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
    6156             :   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
    6157             :   //
    6158             :   // =>
    6159             :   //
    6160             :   // i16     ~1      == 0b1111111111111110
    6161             :   // i16 rol(~1, 14) == 0b1011111111111111
    6162             :   //
    6163             :   // Some additional tips to help conceptualize this transform:
    6164             :   // - Try to see the operation as placing a single zero in a value of all ones.
    6165             :   // - There exists no value for x which would allow the result to contain zero.
    6166             :   // - Values of x larger than the bitwidth are undefined and do not require a
    6167             :   //   consistent result.
    6168             :   // - Pushing the zero left requires shifting one bits in from the right.
    6169             :   // A rotate left of ~1 is a nice way of achieving the desired result.
    6170      143541 :   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL
    6171         632 :       && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
    6172             :     SDLoc DL(N);
    6173          79 :     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
    6174          79 :                        N0.getOperand(1));
    6175             :   }
    6176             : 
    6177             :   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
    6178       92961 :   if (N0.getOpcode() == N1.getOpcode())
    6179       14346 :     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
    6180         438 :       return Tmp;
    6181             : 
    6182             :   // Unfold  ((x ^ y) & m) ^ y  into  (x & m) | (y & ~m)  if profitable
    6183       92523 :   if (SDValue MM = unfoldMaskedMerge(N))
    6184         114 :     return MM;
    6185             : 
    6186             :   // Simplify the expression using non-local knowledge.
    6187       92409 :   if (SimplifyDemandedBits(SDValue(N, 0)))
    6188         228 :     return SDValue(N, 0);
    6189             : 
    6190       92181 :   return SDValue();
    6191             : }
    6192             : 
    6193             : /// Handle transforms common to the three shifts, when the shift amount is a
    6194             : /// constant.
    6195           0 : SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
    6196           0 :   SDNode *LHS = N->getOperand(0).getNode();
    6197           0 :   if (!LHS->hasOneUse()) return SDValue();
    6198             : 
    6199             :   // We want to pull some binops through shifts, so that we have (and (shift))
    6200             :   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
    6201             :   // thing happens with address calculations, so it's important to canonicalize
    6202             :   // it.
    6203             :   bool HighBitSet = false;  // Can we transform this if the high bit is set?
    6204             : 
    6205           0 :   switch (LHS->getOpcode()) {
    6206           0 :   default: return SDValue();
    6207             :   case ISD::OR:
    6208             :   case ISD::XOR:
    6209             :     HighBitSet = false; // We can only transform sra if the high bit is clear.
    6210             :     break;
    6211           0 :   case ISD::AND:
    6212             :     HighBitSet = true;  // We can only transform sra if the high bit is set.
    6213           0 :     break;
    6214           0 :   case ISD::ADD:
    6215           0 :     if (N->getOpcode() != ISD::SHL)
    6216           0 :       return SDValue(); // only shl(add) not sr[al](add).
    6217             :     HighBitSet = false; // We can only transform sra if the high bit is clear.
    6218             :     break;
    6219             :   }
    6220             : 
    6221             :   // We require the RHS of the binop to be a constant and not opaque as well.
    6222           0 :   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
    6223           0 :   if (!BinOpCst) return SDValue();
    6224             : 
    6225             :   // FIXME: disable this unless the input to the binop is a shift by a constant
    6226             :   // or is copy/select.Enable this in other cases when figure out it's exactly profitable.
    6227           0 :   SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
    6228           0 :   bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL ||
    6229           0 :                  BinOpLHSVal->getOpcode() == ISD::SRA ||
    6230             :                  BinOpLHSVal->getOpcode() == ISD::SRL;
    6231           0 :   bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg ||
    6232             :                         BinOpLHSVal->getOpcode() == ISD::SELECT;
    6233             : 
    6234           0 :   if ((!isShift || !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) &&
    6235             :       !isCopyOrSelect)
    6236           0 :     return SDValue();
    6237             : 
    6238           0 :   if (isCopyOrSelect && N->hasOneUse())
    6239           0 :     return SDValue();
    6240             : 
    6241           0 :   EVT VT = N->getValueType(0);
    6242             : 
    6243             :   // If this is a signed shift right, and the high bit is modified by the
    6244             :   // logical operation, do not perform the transformation. The highBitSet
    6245             :   // boolean indicates the value of the high bit of the constant which would
    6246             :   // cause it to be modified for this operation.
    6247           0 :   if (N->getOpcode() == ISD::SRA) {
    6248           0 :     bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
    6249           0 :     if (BinOpRHSSignSet != HighBitSet)
    6250           0 :       return SDValue();
    6251             :   }
    6252             : 
    6253           0 :   if (!TLI.isDesirableToCommuteWithShift(N, Level))
    6254           0 :     return SDValue();
    6255             : 
    6256             :   // Fold the constants, shifting the binop RHS by the shift amount.
    6257           0 :   SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
    6258             :                                N->getValueType(0),
    6259           0 :                                LHS->getOperand(1), N->getOperand(1));
    6260             :   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
    6261             : 
    6262             :   // Create the new shift.
    6263           0 :   SDValue NewShift = DAG.getNode(N->getOpcode(),
    6264           0 :                                  SDLoc(LHS->getOperand(0)),
    6265           0 :                                  VT, LHS->getOperand(0), N->getOperand(1));
    6266             : 
    6267             :   // Create the new binop.
    6268           0 :   return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
    6269             : }
    6270             : 
    6271         844 : SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
    6272             :   assert(N->getOpcode() == ISD::TRUNCATE);
    6273             :   assert(N->getOperand(0).getOpcode() == ISD::AND);
    6274             : 
    6275             :   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
    6276         827 :   if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
    6277         587 :     SDValue N01 = N->getOperand(0).getOperand(1);
    6278         587 :     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
    6279             :       SDLoc DL(N);
    6280         584 :       EVT TruncVT = N->getValueType(0);
    6281         584 :       SDValue N00 = N->getOperand(0).getOperand(0);
    6282        1168 :       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
    6283        1168 :       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
    6284         584 :       AddToWorklist(Trunc00.getNode());
    6285         584 :       AddToWorklist(Trunc01.getNode());
    6286        1168 :       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
    6287             :     }
    6288             :   }
    6289             : 
    6290         260 :   return SDValue();
    6291             : }
    6292             : 
    6293        2760 : SDValue DAGCombiner::visitRotate(SDNode *N) {
    6294             :   SDLoc dl(N);
    6295        2760 :   SDValue N0 = N->getOperand(0);
    6296        2760 :   SDValue N1 = N->getOperand(1);
    6297        5520 :   EVT VT = N->getValueType(0);
    6298             :   unsigned Bitsize = VT.getScalarSizeInBits();
    6299             : 
    6300             :   // fold (rot x, 0) -> x
    6301        2760 :   if (isNullConstantOrNullSplatConstant(N1))
    6302           4 :     return N0;
    6303             : 
    6304             :   // fold (rot x, c) -> (rot x, c % BitSize)
    6305        2756 :   if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) {
    6306        3303 :     if (Cst->getAPIntValue().uge(Bitsize)) {
    6307          10 :       uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize);
    6308          10 :       return DAG.getNode(N->getOpcode(), dl, VT, N0,
    6309          10 :                          DAG.getConstant(RotAmt, dl, N1.getValueType()));
    6310             :     }
    6311             :   }
    6312             : 
    6313             :   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
    6314        2746 :   if (N1.getOpcode() == ISD::TRUNCATE &&
    6315         257 :       N1.getOperand(0).getOpcode() == ISD::AND) {
    6316          53 :     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
    6317         159 :       return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
    6318             :   }
    6319             : 
    6320             :   unsigned NextOp = N0.getOpcode();
    6321             :   // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
    6322        2693 :   if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
    6323          17 :     SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
    6324          34 :     SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
    6325          17 :     if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
    6326          17 :       EVT ShiftVT = C1->getValueType(0);
    6327          17 :       bool SameSide = (N->getOpcode() == NextOp);
    6328          17 :       unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
    6329          17 :       if (SDValue CombinedShift =
    6330          17 :               DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) {
    6331          17 :         SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
    6332          17 :         SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
    6333             :             ISD::SREM, dl, ShiftVT, CombinedShift.getNode(),
    6334          17 :             BitsizeC.getNode());
    6335          34 :         return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
    6336          34 :                            CombinedShiftNorm);
    6337             :       }
    6338             :     }
    6339             :   }
    6340        2676 :   return SDValue();
    6341             : }
    6342             : 
    6343      123493 : SDValue DAGCombiner::visitSHL(SDNode *N) {
    6344      123493 :   SDValue N0 = N->getOperand(0);
    6345      123493 :   SDValue N1 = N->getOperand(1);
    6346      123493 :   EVT VT = N0.getValueType();
    6347             :   unsigned OpSizeInBits = VT.getScalarSizeInBits();
    6348             : 
    6349             :   // fold vector ops
    6350      123493 :   if (VT.isVector()) {
    6351        3968 :     if (SDValue FoldedVOp = SimplifyVBinOp(N))
    6352           6 :       return FoldedVOp;
    6353             : 
    6354             :     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
    6355             :     // If setcc produces all-one true value then:
    6356             :     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
    6357        2476 :     if (N1CV && N1CV->isConstant()) {
    6358        2350 :       if (N0.getOpcode() == ISD::AND) {
    6359          20 :         SDValue N00 = N0->getOperand(0);
    6360          20 :         SDValue N01 = N0->getOperand(1);
    6361             :         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
    6362             : 
    6363          19 :         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
    6364          12 :             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
    6365             :                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
    6366           4 :           if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
    6367           8 :                                                      N01CV, N1CV))
    6368           8 :             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
    6369             :         }
    6370             :       }
    6371             :     }
    6372             :   }
    6373             : 
    6374      123483 :   ConstantSDNode *N1C = isConstOrConstSplat(N1);
    6375             : 
    6376             :   // fold (shl c1, c2) -> c1<<c2
    6377             :   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
    6378      123483 :   if (N0C && N1C && !N1C->isOpaque())
    6379        7530 :     return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
    6380             :   // fold (shl 0, x) -> 0
    6381      119718 :   if (isNullConstantOrNullSplatConstant(N0))
    6382          82 :     return N0;
    6383             :   // fold (shl x, c >= size(x)) -> undef
    6384             :   // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
    6385             :   auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
    6386      205922 :     return Val->getAPIntValue().uge(OpSizeInBits);
    6387             :   };
    6388      239272 :   if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))
    6389          49 :     return DAG.getUNDEF(VT);
    6390             :   // fold (shl x, 0) -> x
    6391      221801 :   if (N1C && N1C->isNullValue())
    6392         443 :     return N0;
    6393             :   // fold (shl undef, x) -> 0
    6394      119144 :   if (N0.isUndef())
    6395          22 :     return DAG.getConstant(0, SDLoc(N), VT);
    6396             : 
    6397      119133 :   if (SDValue NewSel = foldBinOpIntoSelect(N))
    6398          12 :     return NewSel;
    6399             : 
    6400             :   // if (shl x, c) is known to be zero, return 0
    6401      119121 :   if (DAG.MaskedValueIsZero(SDValue(N, 0),
    6402      238242 :                             APInt::getAllOnesValue(OpSizeInBits)))
    6403        4238 :     return DAG.getConstant(0, SDLoc(N), VT);
    6404             :   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
    6405      117002 :   if (N1.getOpcode() == ISD::TRUNCATE &&
    6406        5738 :       N1.getOperand(0).getOpcode() == ISD::AND) {
    6407         725 :     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
    6408        1022 :       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
    6409             :   }
    6410             : 
    6411      116518 :   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
    6412        6762 :     return SDValue(N, 0);
    6413             : 
    6414             :   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
    6415      109756 :   if (N0.getOpcode() == ISD::SHL) {
    6416             :     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
    6417             :                                           ConstantSDNode *RHS) {
    6418             :       APInt c1 = LHS->getAPIntValue();
    6419             :       APInt c2 = RHS->getAPIntValue();
    6420             :       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
    6421             :       return (c1 + c2).uge(OpSizeInBits);
    6422             :     };
    6423         746 :     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
    6424          12 :       return DAG.getConstant(0, SDLoc(N), VT);
    6425             : 
    6426             :     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
    6427             :                                        ConstantSDNode *RHS) {
    6428             :       APInt c1 = LHS->getAPIntValue();
    6429             :       APInt c2 = RHS->getAPIntValue();
    6430             :       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
    6431             :       return (c1 + c2).ult(OpSizeInBits);
    6432             :     };
    6433         734 :     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
    6434             :       SDLoc DL(N);
    6435          90 :       EVT ShiftVT = N1.getValueType();
    6436         180 :       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
    6437         180 :       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
    6438             :     }
    6439             :   }
    6440             : 
    6441             :   // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
    6442             :   // For this to be valid, the second form must not preserve any of the bits
    6443             :   // that are shifted out by the inner shift in the first form.  This means
    6444             :   // the outer shift size must be >= the number of bits added by the ext.
    6445             :   // As a corollary, we don't care what kind of ext it is.
    6446       92780 :   if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
    6447       78960 :               N0.getOpcode() == ISD::ANY_EXTEND ||
    6448      126183 :               N0.getOpcode() == ISD::SIGN_EXTEND) &&
    6449       16523 :       N0.getOperand(0).getOpcode() == ISD::SHL) {
    6450          47 :     SDValue N0Op0 = N0.getOperand(0);
    6451          47 :     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
    6452          39 :       APInt c1 = N0Op0C1->getAPIntValue();
    6453          39 :       APInt c2 = N1C->getAPIntValue();
    6454          39 :       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
    6455             : 
    6456          39 :       EVT InnerShiftVT = N0Op0.getValueType();
    6457          39 :       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
    6458          78 :       if (c2.uge(OpSizeInBits - InnerShiftSize)) {
    6459             :         SDLoc DL(N0);
    6460          15 :         APInt Sum = c1 + c2;
    6461          15 :         if (Sum.uge(OpSizeInBits))
    6462           4 :           return DAG.getConstant(0, DL, VT);
    6463             : 
    6464          11 :         return DAG.getNode(
    6465             :             ISD::SHL, DL, VT,
    6466          22 :             DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)),
    6467          11 :             DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
    6468             :       }
    6469             :     }
    6470             :   }
    6471             : 
    6472             :   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
    6473             :   // Only fold this if the inner zext has no other uses to avoid increasing
    6474             :   // the total number of instructions.
    6475      119083 :   if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
    6476        8486 :       N0.getOperand(0).getOpcode() == ISD::SRL) {
    6477             :     SDValue N0Op0 = N0.getOperand(0);
    6478         103 :     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
    6479         103 :       if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) {
    6480         103 :         uint64_t c1 = N0Op0C1->getZExtValue();
    6481         103 :         uint64_t c2 = N1C->getZExtValue();
    6482         103 :         if (c1 == c2) {
    6483          14 :           SDValue NewOp0 = N0.getOperand(0);
    6484          28 :           EVT CountVT = NewOp0.getOperand(1).getValueType();
    6485             :           SDLoc DL(N);
    6486          14 :           SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
    6487             :                                        NewOp0,
    6488          14 :                                        DAG.getConstant(c2, DL, CountVT));
    6489          14 :           AddToWorklist(NewSHL.getNode());
    6490          28 :           return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
    6491             :         }
    6492             :       }
    6493             :     }
    6494             :   }
    6495             : 
    6496             :   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
    6497             :   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
    6498      109631 :   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
    6499        5236 :       N0->getFlags().hasExact()) {
    6500        1864 :     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
    6501        1864 :       uint64_t C1 = N0C1->getZExtValue();
    6502        1864 :       uint64_t C2 = N1C->getZExtValue();
    6503             :       SDLoc DL(N);
    6504        1864 :       if (C1 <= C2)
    6505        1834 :         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
    6506        1834 :                            DAG.getConstant(C2 - C1, DL, N1.getValueType()));
    6507          30 :       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
    6508          30 :                          DAG.getConstant(C1 - C2, DL, N1.getValueType()));
    6509             :     }
    6510             :   }
    6511             : 
    6512             :   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
    6513             :   //                               (and (srl x, (sub c1, c2), MASK)
    6514             :   // Only fold this if the inner shift has no other uses -- if it does, folding
    6515             :   // this will increase the total number of instructions.
    6516      110328 :   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
    6517        1820 :     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
    6518        1654 :       uint64_t c1 = N0C1->getZExtValue();
    6519        1654 :       if (c1 < OpSizeInBits) {
    6520        1654 :         uint64_t c2 = N1C->getZExtValue();
    6521        1654 :         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
    6522        1654 :         SDValue Shift;
    6523        1654 :         if (c2 > c1) {
    6524         124 :           Mask <<= c2 - c1;
    6525             :           SDLoc DL(N);
    6526         124 :           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
    6527         124 :                               DAG.getConstant(c2 - c1, DL, N1.getValueType()));
    6528             :         } else {
    6529        1530 :           Mask.lshrInPlace(c1 - c2);
    6530             :           SDLoc DL(N);
    6531        1530 :           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
    6532        1530 :                               DAG.getConstant(c1 - c2, DL, N1.getValueType()));
    6533             :         }
    6534             :         SDLoc DL(N0);
    6535        1654 :         return DAG.getNode(ISD::AND, DL, VT, Shift,
    6536        1654 :                            DAG.getConstant(Mask, DL, VT));
    6537             :       }
    6538             :     }
    6539             :   }
    6540             : 
    6541             :   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
    6542      106184 :   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
    6543          71 :       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
    6544             :     SDLoc DL(N);
    6545          71 :     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
    6546         142 :     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
    6547         142 :     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
    6548             :   }
    6549             : 
    6550             :   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
    6551             :   // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
    6552             :   // Variant of version done on multiply, except mul by a power of 2 is turned
    6553             :   // into a shift.
    6554      102902 :   if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
    6555        3085 :       N0.getNode()->hasOneUse() &&
    6556        6148 :       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
    6557      110241 :       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
    6558        1136 :       TLI.isDesirableToCommuteWithShift(N, Level)) {
    6559        1231 :     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
    6560        1062 :     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
    6561        1062 :     AddToWorklist(Shl0.getNode());
    6562        1062 :     AddToWorklist(Shl1.getNode());
    6563        2293 :     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
    6564             :   }
    6565             : 
    6566             :   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
    6567        1419 :   if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
    6568      107814 :       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
    6569        1415 :       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
    6570        1287 :     SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
    6571        1287 :     if (isConstantOrConstantVector(Shl))
    6572        2576 :       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
    6573             :   }
    6574             : 
    6575      103693 :   if (N1C && !N1C->isOpaque())
    6576       86829 :     if (SDValue NewSHL = visitShiftByConstant(N, N1C))
    6577         636 :       return NewSHL;
    6578             : 
    6579      103057 :   return SDValue();
    6580             : }
    6581             : 
    6582       30600 : SDValue DAGCombiner::visitSRA(SDNode *N) {
    6583       30600 :   SDValue N0 = N->getOperand(0);
    6584       30600 :   SDValue N1 = N->getOperand(1);
    6585       30600 :   EVT VT = N0.getValueType();
    6586       30600 :   unsigned OpSizeInBits = VT.getScalarSizeInBits();
    6587             : 
    6588             :   // Arithmetic shifting an all-sign-bit value is a no-op.
    6589             :   // fold (sra 0, x) -> 0
    6590             :   // fold (sra -1, x) -> -1
    6591       30600 :   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
    6592         455 :     return N0;
    6593             : 
    6594             :   // fold vector ops
    6595       30145 :   if (VT.isVector())
    6596        3183 :     if (SDValue FoldedVOp = SimplifyVBinOp(N))
    6597           3 :       return FoldedVOp;
    6598             : 
    6599       30142 :   ConstantSDNode *N1C = isConstOrConstSplat(N1);
    6600             : 
    6601             :   // fold (sra c1, c2) -> (sra c1, c2)
    6602             :   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
    6603       30142 :   if (N0C && N1C && !N1C->isOpaque())
    6604           0 :     return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
    6605             :   // fold (sra x, c >= size(x)) -> undef
    6606             :   // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
    6607             :   auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
    6608       55274 :     return Val->getAPIntValue().uge(OpSizeInBits);
    6609       30142 :   };
    6610       60284 :   if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))
    6611          23 :     return DAG.getUNDEF(VT);
    6612             :   // fold (sra x, 0) -> x
    6613       57255 :   if (N1C && N1C->isNullValue())
    6614          15 :     return N0;
    6615             : 
    6616       30104 :   if (SDValue NewSel = foldBinOpIntoSelect(N))
    6617           3 :     return NewSel;
    6618             : 
    6619             :   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
    6620             :   // sext_inreg.
    6621       30101 :   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
    6622        3133 :     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
    6623        3133 :     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
    6624        3133 :     if (VT.isVector())
    6625         183 :       ExtVT = EVT::getVectorVT(*DAG.getContext(),
    6626         183 :                                ExtVT, VT.getVectorNumElements());
    6627        3133 :     if ((!LegalOperations ||
    6628        2433 :          TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
    6629         733 :       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
    6630        2199 :                          N0.getOperand(0), DAG.getValueType(ExtVT));
    6631             :   }
    6632             : 
    6633             :   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
    6634             :   // clamp (add c1, c2) to max shift.
    6635       29368 :   if (N0.getOpcode() == ISD::SRA) {
    6636             :     SDLoc DL(N);
    6637         332 :     EVT ShiftVT = N1.getValueType();
    6638         332 :     EVT ShiftSVT = ShiftVT.getScalarType();
    6639             :     SmallVector<SDValue, 16> ShiftValues;
    6640             : 
    6641             :     auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
    6642             :       APInt c1 = LHS->getAPIntValue();
    6643             :       APInt c2 = RHS->getAPIntValue();
    6644             :       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
    6645             :       APInt Sum = c1 + c2;
    6646             :       unsigned ShiftSum =
    6647             :           Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
    6648             :       ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
    6649             :       return true;
    6650             :     };
    6651         664 :     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
    6652         256 :       SDValue ShiftValue;
    6653         256 :       if (VT.isVector())
    6654          30 :         ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
    6655             :       else
    6656         241 :         ShiftValue = ShiftValues[0];
    6657         512 :       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
    6658             :     }
    6659             :   }
    6660             : 
    6661             :   // fold (sra (shl X, m), (sub result_size, n))
    6662             :   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
    6663             :   // result_size - n != m.
    6664             :   // If truncate is free for the target sext(shl) is likely to result in better
    6665             :   // code.
    6666       29112 :   if (N0.getOpcode() == ISD::SHL && N1C) {
    6667             :     // Get the two constanst of the shifts, CN0 = m, CN = n.
    6668        4332 :     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
    6669        4332 :     if (N01C) {
    6670        4332 :       LLVMContext &Ctx = *DAG.getContext();
    6671             :       // Determine what the truncate's result bitsize and type would be.
    6672        8664 :       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
    6673             : 
    6674        4332 :       if (VT.isVector())
    6675          87 :         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
    6676             : 
    6677             :       // Determine the residual right-shift amount.
    6678        8664 :       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
    6679             : 
    6680             :       // If the shift is not a no-op (in which case this should be just a sign
    6681             :       // extend already), the truncated to type is legal, sign_extend is legal
    6682             :       // on that type, and the truncate to that type is both legal and free,
    6683             :       // perform the transform.
    6684             :       if ((ShiftAmt > 0) &&
    6685        1886 :           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
    6686        4433 :           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
    6687         101 :           TLI.isTruncateFree(VT, TruncVT)) {
    6688             :         SDLoc DL(N);
    6689          13 :         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
    6690          26 :             getShiftAmountTy(N0.getOperand(0).getValueType()));
    6691          13 :         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
    6692          13 :                                     N0.getOperand(0), Amt);
    6693          13 :         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
    6694          13 :                                     Shift);
    6695          13 :         return DAG.getNode(ISD::SIGN_EXTEND, DL,
    6696          26 :                            N->getValueType(0), Trunc);
    6697             :       }
    6698             :     }
    6699             :   }
    6700             : 
    6701             :   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
    6702       29099 :   if (N1.getOpcode() == ISD::TRUNCATE &&
    6703         320 :       N1.getOperand(0).getOpcode() == ISD::AND) {
    6704          18 :     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
    6705          30 :       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
    6706             :   }
    6707             : 
    6708             :   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
    6709             :   //      if c1 is equal to the number of bits the trunc removes
    6710        4783 :   if (N0.getOpcode() == ISD::TRUNCATE &&
    6711        4783 :       (N0.getOperand(0).getOpcode() == ISD::SRL ||
    6712        2304 :        N0.getOperand(0).getOpcode() == ISD::SRA) &&
    6713        4074 :       N0.getOperand(0).hasOneUse() &&
    6714       32736 :       N0.getOperand(0).getOperand(1).hasOneUse() &&
    6715             :       N1C) {
    6716         106 :     SDValue N0Op0 = N0.getOperand(0);
    6717         106 :     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
    6718         212 :       unsigned LargeShiftVal = LargeShift->getZExtValue();
    6719         106 :       EVT LargeVT = N0Op0.getValueType();
    6720             : 
    6721         106 :       if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
    6722             :         SDLoc DL(N);
    6723             :         SDValue Amt =
    6724         100 :           DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
    6725         300 :                           getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
    6726         100 :         SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
    6727         100 :                                   N0Op0.getOperand(0), Amt);
    6728         200 :         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
    6729             :       }
    6730             :     }
    6731             :   }
    6732             : 
    6733             :   // Simplify, based on bits shifted out of the LHS.
    6734       28984 :   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
    6735        1502 :     return SDValue(N, 0);
    6736             : 
    6737             :   // If the sign bit is known to be zero, switch this to a SRL.
    6738       27482 :   if (DAG.SignBitIsZero(N0))
    6739         238 :     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
    6740             : 
    6741       27363 :   if (N1C && !N1C->isOpaque())
    6742       24524 :     if (SDValue NewSRA = visitShiftByConstant(N, N1C))
    6743           0 :       return NewSRA;
    6744             : 
    6745       27363 :   return SDValue();
    6746             : }
    6747             : 
    6748      164810 : SDValue DAGCombiner::visitSRL(SDNode *N) {
    6749      164810 :   SDValue N0 = N->getOperand(0);
    6750      164810 :   SDValue N1 = N->getOperand(1);
    6751      164810 :   EVT VT = N0.getValueType();
    6752             :   unsigned OpSizeInBits = VT.getScalarSizeInBits();
    6753             : 
    6754             :   // fold vector ops
    6755      164810 :   if (VT.isVector())
    6756        4830 :     if (SDValue FoldedVOp = SimplifyVBinOp(N))
    6757           4 :       return FoldedVOp;
    6758             : 
    6759      164806 :   ConstantSDNode *N1C = isConstOrConstSplat(N1);
    6760             : 
    6761             :   // fold (srl c1, c2) -> c1 >>u c2
    6762             :   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
    6763      164806 :   if (N0C && N1C && !N1C->isOpaque())
    6764        5270 :     return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
    6765             :   // fold (srl 0, x) -> 0
    6766      162171 :   if (isNullConstantOrNullSplatConstant(N0))
    6767          59 :     return N0;
    6768             :   // fold (srl x, c >= size(x)) -> undef
    6769             :   // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
    6770             :   auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
    6771      299900 :     return Val->getAPIntValue().uge(OpSizeInBits);
    6772             :   };
    6773      324224 :   if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))
    6774          27 :     return DAG.getUNDEF(VT);
    6775             :   // fold (srl x, 0) -> x
    6776      311245 :   if (N1C && N1C->isNullValue())
    6777        2218 :     return N0;
    6778             : 
    6779      159867 :   if (SDValue NewSel = foldBinOpIntoSelect(N))
    6780           4 :     return NewSel;
    6781             : 
    6782             :   // if (srl x, c) is known to be zero, return 0
    6783      306802 :   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
    6784      453695 :                                    APInt::getAllOnesValue(OpSizeInBits)))
    6785          92 :     return DAG.getConstant(0, SDLoc(N), VT);
    6786             : 
    6787             :   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
    6788      159817 :   if (N0.getOpcode() == ISD::SRL) {
    6789             :     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
    6790             :                                           ConstantSDNode *RHS) {
    6791             :       APInt c1 = LHS->getAPIntValue();
    6792             :       APInt c2 = RHS->getAPIntValue();
    6793             :       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
    6794             :       return (c1 + c2).uge(OpSizeInBits);
    6795             :     };
    6796       10598 :     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
    6797          10 :       return DAG.getConstant(0, SDLoc(N), VT);
    6798             : 
    6799             :     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
    6800             :                                        ConstantSDNode *RHS) {
    6801             :       APInt c1 = LHS->getAPIntValue();
    6802             :       APInt c2 = RHS->getAPIntValue();
    6803             :       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
    6804             :       return (c1 + c2).ult(OpSizeInBits);
    6805             :     };
    6806       10588 :     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
    6807             :       SDLoc DL(N);
    6808        4426 :       EVT ShiftVT = N1.getValueType();
    6809        8852 :       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
    6810        8852 :       return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
    6811             :     }
    6812             :   }
    6813             : 
    6814             :   // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
    6815      155386 :   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
    6816       14494 :       N0.getOperand(0).getOpcode() == ISD::SRL) {
    6817        4673 :     if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
    6818        4673 :       uint64_t c1 = N001C->getZExtValue();
    6819        4673 :       uint64_t c2 = N1C->getZExtValue();
    6820        9346 :       EVT InnerShiftVT = N0.getOperand(0).getValueType();
    6821        9346 :       EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType();
    6822        4673 :       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
    6823             :       // This is only valid if the OpSizeInBits + c1 = size of inner shift.
    6824        4673 :       if (c1 + OpSizeInBits == InnerShiftSize) {
    6825             :         SDLoc DL(N0);
    6826        3217 :         if (c1 + c2 >= InnerShiftSize)
    6827           0 :           return DAG.getConstant(0, DL, VT);
    6828        3217 :         return DAG.getNode(ISD::TRUNCATE, DL, VT,
    6829             :                            DAG.getNode(ISD::SRL, DL, InnerShiftVT,
    6830        3217 :                                        N0.getOperand(0).getOperand(0),
    6831             :                                        DAG.getConstant(c1 + c2, DL,
    6832        3217 :                                                        ShiftCountVT)));
    6833             :       }
    6834             :     }
    6835             :   }
    6836             : 
    6837             :   // fold (srl (shl x, c), c) -> (and x, cst2)
    6838      153501 :   if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
    6839        1332 :       isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
    6840             :     SDLoc DL(N);
    6841             :     SDValue Mask =
    6842         410 :         DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
    6843         205 :     AddToWorklist(Mask.getNode());
    6844         410 :     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
    6845             :   }
    6846             : 
    6847             :   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
    6848      151964 :   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
    6849             :     // Shifting in all undef bits?
    6850         770 :     EVT SmallVT = N0.getOperand(0).getValueType();
    6851             :     unsigned BitSize = SmallVT.getScalarSizeInBits();
    6852         770 :     if (N1C->getZExtValue() >= BitSize)
    6853         171 :       return DAG.getUNDEF(VT);
    6854             : 
    6855         385 :     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
    6856         171 :       uint64_t ShiftAmt = N1C->getZExtValue();
    6857             :       SDLoc DL0(N0);
    6858         171 :       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
    6859             :                                        N0.getOperand(0),
    6860             :                           DAG.getConstant(ShiftAmt, DL0,
    6861         171 :                                           getShiftAmountTy(SmallVT)));
    6862         171 :       AddToWorklist(SmallShift.getNode());
    6863         171 :       APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
    6864             :       SDLoc DL(N);
    6865         171 :       return DAG.getNode(ISD::AND, DL, VT,
    6866         171 :                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
    6867         171 :                          DAG.getConstant(Mask, DL, VT));
    6868             :     }
    6869             :   }
    6870             : 
    6871             :   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
    6872             :   // bit, which is unmodified by sra.
    6873      290672 :   if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
    6874        4372 :     if (N0.getOpcode() == ISD::SRA)
    6875         826 :       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
    6876             :   }
    6877             : 
    6878             :   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
    6879      151401 :   if (N1C && N0.getOpcode() == ISD::CTLZ &&
    6880         348 :       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
    6881         171 :     KnownBits Known;
    6882         348 :     DAG.computeKnownBits(N0.getOperand(0), Known);
    6883             : 
    6884             :     // If any of the input bits are KnownOne, then the input couldn't be all
    6885             :     // zeros, thus the result of the srl will always be zero.
    6886         174 :     if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
    6887             : 
    6888             :     // If all of the bits input the to ctlz node are known to be zero, then
    6889             :     // the result of the ctlz is "32" and the result of the shift is one.
    6890         174 :     APInt UnknownBits = ~Known.Zero;
    6891         174 :     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
    6892             : 
    6893             :     // Otherwise, check to see if there is exactly one bit input to the ctlz.
    6894         174 :     if (UnknownBits.isPowerOf2()) {
    6895             :       // Okay, we know that only that the single bit specified by UnknownBits
    6896             :       // could be set on input to the CTLZ node. If this bit is set, the SRL
    6897             :       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
    6898             :       // to an SRL/XOR pair, which is likely to simplify more.
    6899           3 :       unsigned ShAmt = UnknownBits.countTrailingZeros();
    6900           3 :       SDValue Op = N0.getOperand(0);
    6901             : 
    6902           3 :       if (ShAmt) {
    6903             :         SDLoc DL(N0);
    6904           3 :         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
    6905             :                   DAG.getConstant(ShAmt, DL,
    6906           3 :                                   getShiftAmountTy(Op.getValueType())));
    6907           3 :         AddToWorklist(Op.getNode());
    6908             :       }
    6909             : 
    6910             :       SDLoc DL(N);
    6911           3 :       return DAG.getNode(ISD::XOR, DL, VT,
    6912           3 :                          Op, DAG.getConstant(1, DL, VT));
    6913             :     }
    6914             :   }
    6915             : 
    6916             :   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
    6917      151398 :   if (N1.getOpcode() == ISD::TRUNCATE &&
    6918        3190 :       N1.getOperand(0).getOpcode() == ISD::AND) {
    6919          48 :     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
    6920          65 :       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
    6921             :   }
    6922             : 
    6923             :   // fold operands of srl based on knowledge that the low bits are not
    6924             :   // demanded.
    6925      151366 :   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
    6926        7862 :     return SDValue(N, 0);
    6927             : 
    6928      143504 :   if (N1C && !N1C->isOpaque())
    6929      130607 :     if (SDValue NewSRL = visitShiftByConstant(N, N1C))
    6930         600 :       return NewSRL;
    6931             : 
    6932             :   // Attempt to convert a srl of a load into a narrower zero-extending load.
    6933      142904 :   if (SDValue NarrowLoad = ReduceLoadWidth(N))
    6934         120 :     return NarrowLoad;
    6935             : 
    6936             :   // Here is a common situation. We want to optimize:
    6937             :   //
    6938             :   //   %a = ...
    6939             :   //   %b = and i32 %a, 2
    6940             :   //   %c = srl i32 %b, 1
    6941             :   //   brcond i32 %c ...
    6942             :   //
    6943             :   // into
    6944             :   //
    6945             :   //   %a = ...
    6946             :   //   %b = and %a, 2
    6947             :   //   %c = setcc eq %b, 0
    6948             :   //   brcond %c ...
    6949             :   //
    6950             :   // However when after the source operand of SRL is optimized into AND, the SRL
    6951             :   // itself may not be optimized further. Look for it and add the BRCOND into
    6952             :   // the worklist.
    6953             :   if (N->hasOneUse()) {
    6954             :     SDNode *Use = *N->use_begin();
    6955      271004 :     if (Use->getOpcode() == ISD::BRCOND)
    6956           4 :       AddToWorklist(Use);
    6957      135498 :     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
    6958             :       // Also look pass the truncate.
    6959             :       Use = *Use->use_begin();
    6960       30099 :       if (Use->getOpcode() == ISD::BRCOND)
    6961           2 :         AddToWorklist(Use);
    6962             :     }
    6963             :   }
    6964             : 
    6965      142784 :   return SDValue();
    6966             : }
    6967             : 
    6968           0 : SDValue DAGCombiner::visitABS(SDNode *N) {
    6969           0 :   SDValue N0 = N->getOperand(0);
    6970           0 :   EVT VT = N->getValueType(0);
    6971             : 
    6972             :   // fold (abs c1) -> c2
    6973           0 :   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
    6974           0 :     return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
    6975             :   // fold (abs (abs x)) -> (abs x)
    6976           0 :   if (N0.getOpcode() == ISD::ABS)
    6977           0 :     return N0;
    6978             :   // fold (abs x) -> x iff not-negative
    6979           0 :   if (DAG.SignBitIsZero(N0))
    6980           0 :     return N0;
    6981           0 :   return SDValue();
    6982             : }
    6983             : 
    6984           0 : SDValue DAGCombiner::visitBSWAP(SDNode *N) {
    6985           0 :   SDValue N0 = N->getOperand(0);
    6986           0 :   EVT VT = N->getValueType(0);
    6987             : 
    6988             :   // fold (bswap c1) -> c2
    6989           0 :   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
    6990           0 :     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
    6991             :   // fold (bswap (bswap x)) -> x
    6992           0 :   if (N0.getOpcode() == ISD::BSWAP)
    6993           0 :     return N0->getOperand(0);
    6994           0 :   return SDValue();
    6995             : }
    6996             : 
    6997           0 : SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
    6998           0 :   SDValue N0 = N->getOperand(0);
    6999           0 :   EVT VT = N->getValueType(0);
    7000             : 
    7001             :   // fold (bitreverse c1) -> c2
    7002           0 :   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
    7003           0 :     return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
    7004             :   // fold (bitreverse (bitreverse x)) -> x
    7005           0 :   if (N0.getOpcode() == ISD::BITREVERSE)
    7006           0 :     return N0.getOperand(0);
    7007           0 :   return SDValue();
    7008             : }
    7009             : 
    7010        1221 : SDValue DAGCombiner::visitCTLZ(SDNode *N) {
    7011        1221 :   SDValue N0 = N->getOperand(0);
    7012        1221 :   EVT VT = N->getValueType(0);
    7013             : 
    7014             :   // fold (ctlz c1) -> c2
    7015        1221 :   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
    7016           4 :     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
    7017             : 
    7018             :   // If the value is known never to be zero, switch to the undef version.
    7019        1219 :   if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
    7020         400 :     if (DAG.isKnownNeverZero(N0))
    7021          14 :       return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
    7022             :   }
    7023             : 
    7024        1212 :   return SDValue();
    7025             : }
    7026             : 
    7027           0 : SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
    7028           0 :   SDValue N0 = N->getOperand(0);
    7029           0 :   EVT VT = N->getValueType(0);
    7030             : 
    7031             :   // fold (ctlz_zero_undef c1) -> c2
    7032           0 :   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
    7033           0 :     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
    7034           0 :   return SDValue();
    7035             : }
    7036             : 
    7037         500 : SDValue DAGCombiner::visitCTTZ(SDNode *N) {
    7038         500 :   SDValue N0 = N->getOperand(0);
    7039         500 :   EVT VT = N->getValueType(0);
    7040             : 
    7041             :   // fold (cttz c1) -> c2
    7042         500 :   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
    7043           0 :     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
    7044             : 
    7045             :   // If the value is known never to be zero, switch to the undef version.
    7046         500 :   if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
    7047         322 :     if (DAG.isKnownNeverZero(N0))
    7048          34 :       return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
    7049             :   }
    7050             : 
    7051         483 :   return SDValue();
    7052             : }
    7053             : 
    7054           0 : SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
    7055           0 :   SDValue N0 = N->getOperand(0);
    7056           0 :   EVT VT = N->getValueType(0);
    7057             : 
    7058             :   // fold (cttz_zero_undef c1) -> c2
    7059           0 :   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
    7060           0 :     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
    7061           0 :   return SDValue();
    7062             : }
    7063             : 
    7064           0 : SDValue DAGCombiner::visitCTPOP(SDNode *N) {
    7065           0 :   SDValue N0 = N->getOperand(0);
    7066           0 :   EVT VT = N->getValueType(0);
    7067             : 
    7068             :   // fold (ctpop c1) -> c2
    7069           0 :   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
    7070           0 :     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
    7071           0 :   return SDValue();
    7072             : }
    7073             : 
    7074             : // FIXME: This should be checking for no signed zeros on individual operands, as
    7075             : // well as no nans.
    7076       28878 : static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, SDValue RHS) {
    7077       28878 :   const TargetOptions &Options = DAG.getTarget().Options;
    7078       28878 :   EVT VT = LHS.getValueType();
    7079             : 
    7080         808 :   return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
    7081       29477 :          DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
    7082             : }
    7083             : 
    7084             : /// Generate Min/Max node
    7085           0 : static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
    7086             :                                    SDValue RHS, SDValue True, SDValue False,
    7087             :                                    ISD::CondCode CC, const TargetLowering &TLI,
    7088             :                                    SelectionDAG &DAG) {
    7089             :   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
    7090           0 :     return SDValue();
    7091             : 
    7092           0 :   EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
    7093           0 :   switch (CC) {
    7094           0 :   case ISD::SETOLT:
    7095             :   case ISD::SETOLE:
    7096             :   case ISD::SETLT:
    7097             :   case ISD::SETLE:
    7098             :   case ISD::SETULT:
    7099             :   case ISD::SETULE: {
    7100             :     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
    7101           0 :     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
    7102           0 :       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
    7103           0 :     return SDValue();
    7104             :   }
    7105           0 :   case ISD::SETOGT:
    7106             :   case ISD::SETOGE:
    7107             :   case ISD::SETGT:
    7108             :   case ISD::SETGE:
    7109             :   case ISD::SETUGT:
    7110             :   case ISD::SETUGE: {
    7111             :     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
    7112           0 :     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
    7113           0 :       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
    7114           0 :     return SDValue();
    7115             :   }
    7116           0 :   default:
    7117           0 :     return SDValue();
    7118             :   }
    7119             : }
    7120             : 
    7121       31935 : SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
    7122       31935 :   SDValue Cond = N->getOperand(0);
    7123       31935 :   SDValue N1 = N->getOperand(1);
    7124       31935 :   SDValue N2 = N->getOperand(2);
    7125       63870 :   EVT VT = N->getValueType(0);
    7126       31935 :   EVT CondVT = Cond.getValueType();
    7127             :   SDLoc DL(N);
    7128             : 
    7129       31935 :   if (!VT.isInteger())
    7130        7164 :     return SDValue();
    7131             : 
    7132             :   auto *C1 = dyn_cast<ConstantSDNode>(N1);
    7133             :   auto *C2 = dyn_cast<ConstantSDNode>(N2);
    7134       24771 :   if (!C1 || !C2)
    7135       20499 :     return SDValue();
    7136             : 
    7137             :   // Only do this before legalization to avoid conflicting with target-specific
    7138             :   // transforms in the other direction (create a select from a zext/sext). There
    7139             :   // is also a target-independent combine here in DAGCombiner in the other
    7140             :   // direction for (select Cond, -1, 0) when the condition is not i1.
    7141        2865 :   if (CondVT == MVT::i1 && !LegalOperations) {
    7142        2849 :     if (C1->isNullValue() && C2->isOne()) {
    7143             :       // select Cond, 0, 1 --> zext (!Cond)
    7144          66 :       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
    7145             :       if (VT != MVT::i1)
    7146          66 :         NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
    7147          33 :       return NotCond;
    7148             :     }
    7149        1438 :     if (C1->isNullValue() && C2->isAllOnesValue()) {
    7150             :       // select Cond, 0, -1 --> sext (!Cond)
    7151          52 :       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
    7152             :       if (VT != MVT::i1)
    7153          52 :         NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
    7154          26 :       return NotCond;
    7155             :     }
    7156        1446 :     if (C1->isOne() && C2->isNullValue()) {
    7157             :       // select Cond, 1, 0 --> zext (Cond)
    7158             :       if (VT != MVT::i1)
    7159         156 :         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
    7160          78 :       return Cond;
    7161             :     }
    7162        1329 :     if (C1->isAllOnesValue() && C2->isNullValue()) {
    7163             :       // select Cond, -1, 0 --> sext (Cond)
    7164             :       if (VT != MVT::i1)
    7165         114 :         Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
    7166          57 :       return Cond;
    7167             :     }
    7168             : 
    7169             :     // For any constants that differ by 1, we can transform the select into an
    7170             :     // extend and add. Use a target hook because some targets may prefer to
    7171             :     // transform in the other direction.
    7172        1151 :     if (TLI.convertSelectOfConstantsToMath(VT)) {
    7173        1800 :       if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {
    7174             :         // select Cond, C1, C1-1 --> add (zext Cond), C1-1
    7175             :         if (VT != MVT::i1)
    7176          66 :           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
    7177          66 :         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
    7178             :       }
    7179        1701 :       if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) {
    7180             :         // select Cond, C1, C1+1 --> add (sext Cond), C1+1
    7181             :         if (VT != MVT::i1)
    7182         170 :           Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
    7183         170 :         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
    7184             :       }
    7185             :     }
    7186             : 
    7187        1033 :     return SDValue();
    7188             :   }
    7189             : 
    7190             :   // fold (select Cond, 0, 1) -> (xor Cond, 1)
    7191             :   // We can't do this reliably if integer based booleans have different contents
    7192             :   // to floating point based booleans. This is because we can't tell whether we
    7193             :   // have an integer-based boolean or a floating-point-based boolean unless we
    7194             :   // can find the SETCC that produced it and inspect its operands. This is
    7195             :   // fairly easy if C is the SETCC node, but it can potentially be
    7196             :   // undiscoverable (or not reasonably discoverable). For example, it could be
    7197             :   // in another basic block or it could require searching a complicated
    7198             :   // expression.
    7199        2927 :   if (CondVT.isInteger() &&
    7200        2927 :       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
    7201        1273 :           TargetLowering::ZeroOrOneBooleanContent &&
    7202             :       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
    7203        1273 :           TargetLowering::ZeroOrOneBooleanContent &&
    7204        4942 :       C1->isNullValue() && C2->isOne()) {
    7205             :     SDValue NotCond =
    7206           0 :         DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
    7207           0 :     if (VT.bitsEq(CondVT))
    7208           0 :       return NotCond;
    7209           0 :     return DAG.getZExtOrTrunc(NotCond, DL, VT);
    7210             :   }
    7211             : 
    7212        2927 :   return SDValue();
    7213             : }
    7214             : 
    7215       32115 : SDValue DAGCombiner::visitSELECT(SDNode *N) {
    7216       32115 :   SDValue N0 = N->getOperand(0);
    7217       32115 :   SDValue N1 = N->getOperand(1);
    7218       32115 :   SDValue N2 = N->getOperand(2);
    7219       64230 :   EVT VT = N->getValueType(0);
    7220       32115 :   EVT VT0 = N0.getValueType();
    7221             :   SDLoc DL(N);
    7222             : 
    7223             :   // fold (select C, X, X) -> X
    7224       32115 :   if (N1 == N2)
    7225          70 :     return N1;
    7226             : 
    7227             :   if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) {
    7228             :     // fold (select true, X, Y) -> X
    7229             :     // fold (select false, X, Y) -> Y
    7230         230 :     return !N0C->isNullValue() ? N1 : N2;
    7231             :   }
    7232             : 
    7233             :   // fold (select X, X, Y) -> (or X, Y)
    7234             :   // fold (select X, 1, Y) -> (or C, Y)
    7235         102 :   if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
    7236          54 :     return DAG.getNode(ISD::OR, DL, VT, N0, N2);
    7237             : 
    7238       31935 :   if (SDValue V = foldSelectOfConstants(N))
    7239         312 :     return V;
    7240             : 
    7241             :   // fold (select C, 0, X) -> (and (not C), X)
    7242          81 :   if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
    7243           7 :     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
    7244           7 :     AddToWorklist(NOTNode.getNode());
    7245          14 :     return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
    7246             :   }
    7247             :   // fold (select C, X, 1) -> (or (not C), X)
    7248          74 :   if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
    7249           2 :     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
    7250           2 :     AddToWorklist(NOTNode.getNode());
    7251           4 :     return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
    7252             :   }
    7253             :   // fold (select X, Y, X) -> (and X, Y)
    7254             :   // fold (select X, Y, 0) -> (and X, Y)
    7255          70 :   if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
    7256          30 :     return DAG.getNode(ISD::AND, DL, VT, N0, N1);
    7257             : 
    7258             :   // If we can fold this based on the true/false value, do so.
    7259       31599 :   if (SimplifySelectOps(N, N1, N2))
    7260         108 :     return SDValue(N, 0); // Don't revisit N.
    7261             : 
    7262             :   if (VT0 == MVT::i1) {
    7263             :     // The code in this block deals with the following 2 equivalences:
    7264             :     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
    7265             :     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
    7266             :     // The target can specify its preferred form with the
    7267             :     // shouldNormalizeToSelectSequence() callback. However we always transform
    7268             :     // to the right anyway if we find the inner select exists in the DAG anyway
    7269             :     // and we always transform to the left side if we know that we can further
    7270             :     // optimize the combination of the conditions.
    7271             :     bool normalizeToSequence =
    7272       21913 :         TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
    7273             :     // select (and Cond0, Cond1), X, Y
    7274             :     //   -> select Cond0, (select Cond1, X, Y), Y
    7275       21913 :     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
    7276         688 :       SDValue Cond0 = N0->getOperand(0);
    7277         688 :       SDValue Cond1 = N0->getOperand(1);
    7278             :       SDValue InnerSelect =
    7279        2064 :           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
    7280         688 :       if (normalizeToSequence || !InnerSelect.use_empty())
    7281          38 :         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
    7282          38 :                            InnerSelect, N2);
    7283             :     }
    7284             :     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
    7285       21875 :     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
    7286         177 :       SDValue Cond0 = N0->getOperand(0);
    7287         177 :       SDValue Cond1 = N0->getOperand(1);
    7288             :       SDValue InnerSelect =
    7289         531 :           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
    7290         177 :       if (normalizeToSequence || !InnerSelect.use_empty())
    7291          54 :         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
    7292          54 :                            InnerSelect);
    7293             :     }
    7294             : 
    7295             :     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
    7296       21821 :     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
    7297        1425 :       SDValue N1_0 = N1->getOperand(0);
    7298        1425 :       SDValue N1_1 = N1->getOperand(1);
    7299        1425 :       SDValue N1_2 = N1->getOperand(2);
    7300        1425 :       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
    7301             :         // Create the actual and node if we can generate good code for it.
    7302         214 :         if (!normalizeToSequence) {
    7303         292 :           SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
    7304         438 :           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1, N2);
    7305             :         }
    7306             :         // Otherwise see if we can optimize the "and" to a better pattern.
    7307          68 :         if (SDValue Combined = visitANDLike(N0, N1_0, N))
    7308           6 :           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
    7309          12 :                              N2);
    7310             :       }
    7311             :     }
    7312             :     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
    7313       21669 :     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
    7314         833 :       SDValue N2_0 = N2->getOperand(0);
    7315         833 :       SDValue N2_1 = N2->getOperand(1);
    7316         833 :       SDValue N2_2 = N2->getOperand(2);
    7317         833 :       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
    7318             :         // Create the actual or node if we can generate good code for it.
    7319         117 :         if (!normalizeToSequence) {
    7320          34 :           SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
    7321          34 :           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2);
    7322             :         }
    7323             :         // Otherwise see if we can optimize to a better pattern.
    7324         100 :         if (SDValue Combined = visitORLike(N0, N2_0, N))
    7325           6 :           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
    7326           6 :                              N2_2);
    7327             :       }
    7328             :     }
    7329             :   }
    7330             : 
    7331             :   if (VT0 == MVT::i1) {
    7332             :     // select (not Cond), N1, N2 -> select Cond, N2, N1
    7333       21646 :     if (isBitwiseNot(N0))
    7334         564 :       return DAG.getNode(ISD::SELECT, DL, VT, N0->getOperand(0), N2, N1);
    7335             :   }
    7336             : 
    7337             :   // Fold selects based on a setcc into other things, such as min/max/abs.
    7338       31036 :   if (N0.getOpcode() == ISD::SETCC) {
    7339       25213 :     SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
    7340       25213 :     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
    7341             : 
    7342             :     // select (fcmp lt x, y), x, y -> fminnum x, y
    7343             :     // select (fcmp gt x, y), x, y -> fmaxnum x, y
    7344             :     //
    7345             :     // This is OK if we don't care what happens if either operand is a NaN.
    7346       25213 :     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2))
    7347         136 :       if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
    7348         136 :                                                 CC, TLI, DAG))
    7349         103 :         return FMinMax;
    7350             : 
    7351             :     // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
    7352             :     // This is conservatively limited to pre-legal-operations to give targets
    7353             :     // a chance to reverse the transform if they want to do that. Also, it is
    7354             :     // unlikely that the pattern would be formed late, so it's probably not
    7355             :     // worth going through the other checks.
    7356       25036 :     if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
    7357         703 :         CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
    7358       25147 :         N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
    7359             :       auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
    7360             :       auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
    7361          68 :       if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
    7362             :         // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
    7363             :         // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
    7364             :         //
    7365             :         // The IR equivalent of this transform would have this form:
    7366             :         //   %a = add %x, C
    7367             :         //   %c = icmp ugt %x, ~C
    7368             :         //   %r = select %c, -1, %a
    7369             :         //   =>
    7370             :         //   %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
    7371             :         //   %u0 = extractvalue %u, 0
    7372             :         //   %u1 = extractvalue %u, 1
    7373             :         //   %r = select %u1, -1, %u0
    7374          10 :         SDVTList VTs = DAG.getVTList(VT, VT0);
    7375          20 :         SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
    7376          10 :         return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
    7377             :       }
    7378             :     }
    7379             : 
    7380       25100 :     if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
    7381       24739 :         (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)))
    7382        3598 :       return DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1, N2,
    7383        3598 :                          N0.getOperand(2));
    7384             : 
    7385       21502 :     return SimplifySelect(DL, N0, N1, N2);
    7386             :   }
    7387             : 
    7388        5823 :   return SDValue();
    7389             : }
    7390             : 
    7391             : static
    7392          19 : std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
    7393             :   SDLoc DL(N);
    7394             :   EVT LoVT, HiVT;
    7395          38 :   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
    7396             : 
    7397             :   // Split the inputs.
    7398             :   SDValue Lo, Hi, LL, LH, RL, RH;
    7399          19 :   std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
    7400          19 :   std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
    7401             : 
    7402          57 :   Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
    7403          57 :   Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
    7404             : 
    7405          19 :   return std::make_pair(Lo, Hi);
    7406             : }
    7407             : 
    7408             : // This function assumes all the vselect's arguments are CONCAT_VECTOR
    7409             : // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
    7410          43 : static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
    7411             :   SDLoc DL(N);
    7412          43 :   SDValue Cond = N->getOperand(0);
    7413          43 :   SDValue LHS = N->getOperand(1);
    7414          43 :   SDValue RHS = N->getOperand(2);
    7415          86 :   EVT VT = N->getValueType(0);
    7416          43 :   int NumElems = VT.getVectorNumElements();
    7417             :   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
    7418             :          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
    7419             :          Cond.getOpcode() == ISD::BUILD_VECTOR);
    7420             : 
    7421             :   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
    7422             :   // binary ones here.
    7423          43 :   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
    7424          11 :     return SDValue();
    7425             : 
    7426             :   // We're sure we have an even number of elements due to the
    7427             :   // concat_vectors we have as arguments to vselect.
    7428             :   // Skip BV elements until we find one that's not an UNDEF
    7429             :   // After we find an UNDEF element, keep looping until we get to half the
    7430             :   // length of the BV and see if all the non-undef nodes are the same.
    7431             :   ConstantSDNode *BottomHalf = nullptr;
    7432          98 :   for (int i = 0; i < NumElems / 2; ++i) {
    7433         170 :     if (Cond->getOperand(i)->isUndef())
    7434             :       continue;
    7435             : 
    7436          85 :     if (BottomHalf == nullptr)
    7437             :       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
    7438          53 :     else if (Cond->getOperand(i).getNode() != BottomHalf)
    7439          19 :       return SDValue();
    7440             :   }
    7441             : 
    7442             :   // Do the same for the second half of the BuildVector
    7443             :   ConstantSDNode *TopHalf = nullptr;
    7444          48 :   for (int i = NumElems / 2; i < NumElems; ++i) {
    7445          76 :     if (Cond->getOperand(i)->isUndef())
    7446             :       continue;
    7447             : 
    7448          38 :     if (TopHalf == nullptr)
    7449             :       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
    7450          25 :     else if (Cond->getOperand(i).getNode() != TopHalf)
    7451           3 :       return SDValue();
    7452             :   }
    7453             : 
    7454             :   assert(TopHalf && BottomHalf &&
    7455             :          "One half of the selector was all UNDEFs and the other was all the "
    7456             :          "same value. This should have been addressed before this function.");
    7457             :   return DAG.getNode(
    7458             :       ISD::CONCAT_VECTORS, DL, VT,
    7459          20 :       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
    7460          30 :       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
    7461             : }
    7462             : 
    7463         287 : SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
    7464         287 :   if (Level >= AfterLegalizeTypes)
    7465         166 :     return SDValue();
    7466             : 
    7467             :   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
    7468         121 :   SDValue Mask = MSC->getMask();
    7469         121 :   SDValue Data  = MSC->getValue();
    7470             :   SDLoc DL(N);
    7471             : 
    7472             :   // If the MSCATTER data type requires splitting and the mask is provided by a
    7473             :   // SETCC, then split both nodes and its operands before legalization. This
    7474             :   // prevents the type legalizer from unrolling SETCC into scalar comparisons
    7475             :   // and enables future optimizations (e.g. min/max pattern matching on X86).
    7476         121 :   if (Mask.getOpcode() != ISD::SETCC)
    7477          96 :     return SDValue();
    7478             : 
    7479             :   // Check if any splitting is required.
    7480          50 :   if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
    7481             :       TargetLowering::TypeSplitVector)
    7482          20 :     return SDValue();
    7483             :   SDValue MaskLo, MaskHi;
    7484           5 :   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
    7485             : 
    7486             :   EVT LoVT, HiVT;
    7487          10 :   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
    7488             : 
    7489           5 :   SDValue Chain = MSC->getChain();
    7490             : 
    7491           5 :   EVT MemoryVT = MSC->getMemoryVT();
    7492           5 :   unsigned Alignment = MSC->getOriginalAlignment();
    7493             : 
    7494           5 :   EVT LoMemVT, HiMemVT;
    7495           5 :   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
    7496             : 
    7497             :   SDValue DataLo, DataHi;
    7498           5 :   std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
    7499             : 
    7500           5 :   SDValue Scale = MSC->getScale();
    7501           5 :   SDValue BasePtr = MSC->getBasePtr();
    7502             :   SDValue IndexLo, IndexHi;
    7503          10 :   std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
    7504             : 
    7505           5 :   MachineMemOperand *MMO = DAG.getMachineFunction().
    7506          15 :     getMachineMemOperand(MSC->getPointerInfo(),
    7507             :                           MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
    7508           5 :                           Alignment, MSC->getAAInfo(), MSC->getRanges());
    7509             : 
    7510           5 :   SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale };
    7511           5 :   SDValue Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
    7512          10 :                                     DataLo.getValueType(), DL, OpsLo, MMO);
    7513             : 
    7514             :   // The order of the Scatter operation after split is well defined. The "Hi"
    7515             :   // part comes after the "Lo". So these two operations should be chained one
    7516             :   // after another.
    7517           5 :   SDValue OpsHi[] = { Lo, DataHi, MaskHi, BasePtr, IndexHi, Scale };
    7518           5 :   return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
    7519          10 :                               DL, OpsHi, MMO);
    7520             : }
    7521             : 
    7522         761 : SDValue DAGCombiner::visitMSTORE(SDNode *N) {
    7523         761 :   if (Level >= AfterLegalizeTypes)
    7524         448 :     return SDValue();
    7525             : 
    7526             :   MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
    7527         313 :   SDValue Mask = MST->getMask();
    7528         313 :   SDValue Data  = MST->getValue();
    7529         626 :   EVT VT = Data.getValueType();
    7530             :   SDLoc DL(N);
    7531             : 
    7532             :   // If the MSTORE data type requires splitting and the mask is provided by a
    7533             :   // SETCC, then split both nodes and its operands before legalization. This
    7534             :   // prevents the type legalizer from unrolling SETCC into scalar comparisons
    7535             :   // and enables future optimizations (e.g. min/max pattern matching on X86).
    7536         313 :   if (Mask.getOpcode() == ISD::SETCC) {
    7537             :     // Check if any splitting is required.
    7538          36 :     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
    7539             :         TargetLowering::TypeSplitVector)
    7540          34 :       return SDValue();
    7541             : 
    7542             :     SDValue MaskLo, MaskHi, Lo, Hi;
    7543           2 :     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
    7544             : 
    7545           2 :     SDValue Chain = MST->getChain();
    7546           2 :     SDValue Ptr   = MST->getBasePtr();
    7547             : 
    7548           2 :     EVT MemoryVT = MST->getMemoryVT();
    7549           2 :     unsigned Alignment = MST->getOriginalAlignment();
    7550             : 
    7551             :     // if Alignment is equal to the vector size,
    7552             :     // take the half of it for the second part
    7553             :     unsigned SecondHalfAlignment =
    7554           2 :       (Alignment == VT.getSizeInBits() / 8) ? Alignment / 2 : Alignment;
    7555             : 
    7556           2 :     EVT LoMemVT, HiMemVT;
    7557           2 :     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
    7558             : 
    7559             :     SDValue DataLo, DataHi;
    7560           2 :     std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
    7561             : 
    7562           2 :     MachineMemOperand *MMO = DAG.getMachineFunction().
    7563           6 :       getMachineMemOperand(MST->getPointerInfo(),
    7564             :                            MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
    7565           2 :                            Alignment, MST->getAAInfo(), MST->getRanges());
    7566             : 
    7567           2 :     Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
    7568           2 :                             MST->isTruncatingStore(),
    7569           4 :                             MST->isCompressingStore());
    7570             : 
    7571           2 :     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
    7572           2 :                                      MST->isCompressingStore());
    7573             :     unsigned HiOffset = LoMemVT.getStoreSize();
    7574             : 
    7575           6 :     MMO = DAG.getMachineFunction().getMachineMemOperand(
    7576             :         MST->getPointerInfo().getWithOffset(HiOffset),
    7577             :         MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment,
    7578           2 :         MST->getAAInfo(), MST->getRanges());
    7579             : 
    7580           2 :     Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
    7581           2 :                             MST->isTruncatingStore(),
    7582           4 :                             MST->isCompressingStore());
    7583             : 
    7584           2 :     AddToWorklist(Lo.getNode());
    7585           2 :     AddToWorklist(Hi.getNode());
    7586             : 
    7587           4 :     return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
    7588             :   }
    7589         277 :   return SDValue();
    7590             : }
    7591             : 
    7592         994 : SDValue DAGCombiner::visitMGATHER(SDNode *N) {
    7593         994 :   if (Level >= AfterLegalizeTypes)
    7594         508 :     return SDValue();
    7595             : 
    7596             :   MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
    7597         486 :   SDValue Mask = MGT->getMask();
    7598             :   SDLoc DL(N);
    7599             : 
    7600             :   // If the MGATHER result requires splitting and the mask is provided by a
    7601             :   // SETCC, then split both nodes and its operands before legalization. This
    7602             :   // prevents the type legalizer from unrolling SETCC into scalar comparisons
    7603             :   // and enables future optimizations (e.g. min/max pattern matching on X86).
    7604             : 
    7605         486 :   if (Mask.getOpcode() != ISD::SETCC)
    7606         456 :     return SDValue();
    7607             : 
    7608          30 :   EVT VT = N->getValueType(0);
    7609             : 
    7610             :   // Check if any splitting is required.
    7611          30 :   if (TLI.getTypeAction(*DAG.getContext(), VT) !=
    7612             :       TargetLowering::TypeSplitVector)
    7613          24 :     return SDValue();
    7614             : 
    7615             :   SDValue MaskLo, MaskHi, Lo, Hi;
    7616           6 :   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
    7617             : 
    7618           6 :   SDValue PassThru = MGT->getPassThru();
    7619             :   SDValue PassThruLo, PassThruHi;
    7620           6 :   std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
    7621             : 
    7622             :   EVT LoVT, HiVT;
    7623           6 :   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
    7624             : 
    7625           6 :   SDValue Chain = MGT->getChain();
    7626           6 :   EVT MemoryVT = MGT->getMemoryVT();
    7627           6 :   unsigned Alignment = MGT->getOriginalAlignment();
    7628             : 
    7629           6 :   EVT LoMemVT, HiMemVT;
    7630           6 :   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
    7631             : 
    7632           6 :   SDValue Scale = MGT->getScale();
    7633           6 :   SDValue BasePtr = MGT->getBasePtr();
    7634           6 :   SDValue Index = MGT->getIndex();
    7635             :   SDValue IndexLo, IndexHi;
    7636           6 :   std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
    7637             : 
    7638           6 :   MachineMemOperand *MMO = DAG.getMachineFunction().
    7639          18 :     getMachineMemOperand(MGT->getPointerInfo(),
    7640             :                           MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
    7641           6 :                           Alignment, MGT->getAAInfo(), MGT->getRanges());
    7642             : 
    7643           6 :   SDValue OpsLo[] = { Chain, PassThruLo, MaskLo, BasePtr, IndexLo, Scale };
    7644           6 :   Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
    7645           6 :                            MMO);
    7646             : 
    7647           6 :   SDValue OpsHi[] = { Chain, PassThruHi, MaskHi, BasePtr, IndexHi, Scale };
    7648           6 :   Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
    7649           6 :                            MMO);
    7650             : 
    7651           6 :   AddToWorklist(Lo.getNode());
    7652           6 :   AddToWorklist(Hi.getNode());
    7653             : 
    7654             :   // Build a factor node to remember that this load is independent of the
    7655             :   // other one.
    7656           6 :   Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
    7657           6 :                       Hi.getValue(1));
    7658             : 
    7659             :   // Legalized the chain result - switch anything that used the old chain to
    7660             :   // use the new one.
    7661          12 :   DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
    7662             : 
    7663          12 :   SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
    7664             : 
    7665           6 :   SDValue RetOps[] = { GatherRes, Chain };
    7666          12 :   return DAG.getMergeValues(RetOps, DL);
    7667             : }
    7668             : 
    7669        1291 : SDValue DAGCombiner::visitMLOAD(SDNode *N) {
    7670        1291 :   if (Level >= AfterLegalizeTypes)
    7671         722 :     return SDValue();
    7672             : 
    7673             :   MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N);
    7674         569 :   SDValue Mask = MLD->getMask();
    7675             :   SDLoc DL(N);
    7676             : 
    7677             :   // If the MLOAD result requires splitting and the mask is provided by a
    7678             :   // SETCC, then split both nodes and its operands before legalization. This
    7679             :   // prevents the type legalizer from unrolling SETCC into scalar comparisons
    7680             :   // and enables future optimizations (e.g. min/max pattern matching on X86).
    7681         569 :   if (Mask.getOpcode() == ISD::SETCC) {
    7682          76 :     EVT VT = N->getValueType(0);
    7683             : 
    7684             :     // Check if any splitting is required.
    7685          76 :     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
    7686             :         TargetLowering::TypeSplitVector)
    7687          70 :       return SDValue();
    7688             : 
    7689             :     SDValue MaskLo, MaskHi, Lo, Hi;
    7690           6 :     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
    7691             : 
    7692           6 :     SDValue PassThru = MLD->getPassThru();
    7693             :     SDValue PassThruLo, PassThruHi;
    7694           6 :     std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
    7695             : 
    7696             :     EVT LoVT, HiVT;
    7697          12 :     std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
    7698             : 
    7699           6 :     SDValue Chain = MLD->getChain();
    7700           6 :     SDValue Ptr   = MLD->getBasePtr();
    7701           6 :     EVT MemoryVT = MLD->getMemoryVT();
    7702           6 :     unsigned Alignment = MLD->getOriginalAlignment();
    7703             : 
    7704             :     // if Alignment is equal to the vector size,
    7705             :     // take the half of it for the second part
    7706             :     unsigned SecondHalfAlignment =
    7707          12 :       (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
    7708             :          Alignment/2 : Alignment;
    7709             : 
    7710           6 :     EVT LoMemVT, HiMemVT;
    7711           6 :     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
    7712             : 
    7713           6 :     MachineMemOperand *MMO = DAG.getMachineFunction().
    7714          18 :     getMachineMemOperand(MLD->getPointerInfo(),
    7715             :                          MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
    7716           6 :                          Alignment, MLD->getAAInfo(), MLD->getRanges());
    7717             : 
    7718           6 :     Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, PassThruLo, LoMemVT,
    7719           6 :                            MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
    7720             : 
    7721           6 :     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
    7722           6 :                                      MLD->isExpandingLoad());
    7723             :     unsigned HiOffset = LoMemVT.getStoreSize();
    7724             : 
    7725          18 :     MMO = DAG.getMachineFunction().getMachineMemOperand(
    7726             :         MLD->getPointerInfo().getWithOffset(HiOffset),
    7727             :         MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment,
    7728           6 :         MLD->getAAInfo(), MLD->getRanges());
    7729             : 
    7730           6 :     Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, PassThruHi, HiMemVT,
    7731           6 :                            MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
    7732             : 
    7733           6 :     AddToWorklist(Lo.getNode());
    7734           6 :     AddToWorklist(Hi.getNode());
    7735             : 
    7736             :     // Build a factor node to remember that this load is independent of the
    7737             :     // other one.
    7738           6 :     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
    7739           6 :                         Hi.getValue(1));
    7740             : 
    7741             :     // Legalized the chain result - switch anything that used the old chain to
    7742             :     // use the new one.
    7743          12 :     DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
    7744             : 
    7745          12 :     SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
    7746             : 
    7747           6 :     SDValue RetOps[] = { LoadRes, Chain };
    7748          12 :     return DAG.getMergeValues(RetOps, DL);
    7749             :   }
    7750         493 :   return SDValue();
    7751             : }
    7752             : 
    7753             : /// A vector select of 2 constant vectors can be simplified to math/logic to
    7754             : /// avoid a variable select instruction and possibly avoid constant loads.
    7755           0 : SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
    7756           0 :   SDValue Cond = N->getOperand(0);
    7757           0 :   SDValue N1 = N->getOperand(1);
    7758           0 :   SDValue N2 = N->getOperand(2);
    7759           0 :   EVT VT = N->getValueType(0);
    7760           0 :   if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
    7761           0 :       !TLI.convertSelectOfConstantsToMath(VT) ||
    7762           0 :       !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
    7763           0 :       !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
    7764           0 :     return SDValue();
    7765             : 
    7766             :   // Check if we can use the condition value to increment/decrement a single
    7767             :   // constant value. This simplifies a select to an add and removes a constant
    7768             :   // load/materialization from the general case.
    7769             :   bool AllAddOne = true;
    7770             :   bool AllSubOne = true;
    7771             :   unsigned Elts = VT.getVectorNumElements();
    7772           0 :   for (unsigned i = 0; i != Elts; ++i) {
    7773           0 :     SDValue N1Elt = N1.getOperand(i);
    7774           0 :     SDValue N2Elt = N2.getOperand(i);
    7775           0 :     if (N1Elt.isUndef() || N2Elt.isUndef())
    7776           0 :       continue;
    7777             : 
    7778           0 :     const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
    7779           0 :     const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
    7780           0 :     if (C1 != C2 + 1)
    7781             :       AllAddOne = false;
    7782           0 :     if (C1 != C2 - 1)
    7783             :       AllSubOne = false;
    7784             :   }
    7785             : 
    7786             :   // Further simplifications for the extra-special cases where the constants are
    7787             :   // all 0 or all -1 should be implemented as folds of these patterns.
    7788             :   SDLoc DL(N);
    7789           0 :   if (AllAddOne || AllSubOne) {
    7790             :     // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
    7791             :     // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
    7792           0 :     auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
    7793           0 :     SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
    7794           0 :     return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
    7795             :   }
    7796             : 
    7797             :   // The general case for select-of-constants:
    7798             :   // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
    7799             :   // ...but that only makes sense if a vselect is slower than 2 logic ops, so
    7800             :   // leave that to a machine-specific pass.
    7801           0 :   return SDValue();
    7802             : }
    7803             : 
    7804       39265 : SDValue DAGCombiner::visitVSELECT(SDNode *N) {
    7805       39265 :   SDValue N0 = N->getOperand(0);
    7806       39265 :   SDValue N1 = N->getOperand(1);
    7807       39265 :   SDValue N2 = N->getOperand(2);
    7808             :   SDLoc DL(N);
    7809             : 
    7810             :   // fold (vselect C, X, X) -> X
    7811             :   if (N1 == N2)
    7812           4 :     return N1;
    7813             : 
    7814             :   // Canonicalize integer abs.
    7815             :   // vselect (setg[te] X,  0),  X, -X ->
    7816             :   // vselect (setgt    X, -1),  X, -X ->
    7817             :   // vselect (setl[te] X,  0), -X,  X ->
    7818             :   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
    7819       39261 :   if (N0.getOpcode() == ISD::SETCC) {
    7820       12694 :     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
    7821       12694 :     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
    7822             :     bool isAbs = false;
    7823       12694 :     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
    7824             : 
    7825       20386 :     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
    7826       12564 :          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
    7827       13062 :         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
    7828         356 :       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
    7829        7722 :     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
    7830       12425 :              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
    7831          87 :       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
    7832             : 
    7833         443 :     if (isAbs) {
    7834         443 :       EVT VT = LHS.getValueType();
    7835         443 :       if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
    7836         712 :         return DAG.getNode(ISD::ABS, DL, VT, LHS);
    7837             : 
    7838          87 :       SDValue Shift = DAG.getNode(
    7839             :           ISD::SRA, DL, VT, LHS,
    7840          87 :           DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
    7841         174 :       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
    7842          87 :       AddToWorklist(Shift.getNode());
    7843          87 :       AddToWorklist(Add.getNode());
    7844         174 :       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
    7845             :     }
    7846             : 
    7847             :     // vselect x, y (fcmp lt x, y) -> fminnum x, y
    7848             :     // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
    7849             :     //
    7850             :     // This is OK if we don't care about what happens if either operand is a
    7851             :     // NaN.
    7852             :     //
    7853       24502 :     EVT VT = N->getValueType(0);
    7854       12251 :     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N0.getOperand(0), N0.getOperand(1))) {
    7855          35 :       ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
    7856          35 :       if (SDValue FMinMax = combineMinNumMaxNum(
    7857          70 :             DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
    7858          10 :         return FMinMax;
    7859             :     }
    7860             : 
    7861             :     // If this select has a condition (setcc) with narrower operands than the
    7862             :     // select, try to widen the compare to match the select width.
    7863             :     // TODO: This should be extended to handle any constant.
    7864             :     // TODO: This could be extended to handle non-loading patterns, but that
    7865             :     //       requires thorough testing to avoid regressions.
    7866       12241 :     if (isNullConstantOrNullSplatConstant(RHS)) {
    7867             :       EVT NarrowVT = LHS.getValueType();
    7868        4772 :       EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
    7869        4772 :       EVT SetCCVT = getSetCCResultType(LHS.getValueType());
    7870             :       unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
    7871             :       unsigned WideWidth = WideVT.getScalarSizeInBits();
    7872             :       bool IsSigned = isSignedIntSetCC(CC);
    7873        4772 :       auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
    7874          53 :       if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
    7875          51 :           SetCCWidth != 1 && SetCCWidth < WideWidth &&
    7876        4790 :           TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
    7877             :           TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
    7878             :         // Both compare operands can be widened for free. The LHS can use an
    7879             :         // extended load, and the RHS is a constant:
    7880             :         //   vselect (ext (setcc load(X), C)), N1, N2 -->
    7881             :         //   vselect (setcc extload(X), C'), N1, N2
    7882          10 :         auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
    7883          20 :         SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
    7884          20 :         SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
    7885          10 :         EVT WideSetCCVT = getSetCCResultType(WideVT);
    7886          10 :         SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
    7887          20 :         return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
    7888             :       }
    7889             :     }
    7890             :   }
    7891             : 
    7892       38798 :   if (SimplifySelectOps(N, N1, N2))
    7893           6 :     return SDValue(N, 0);  // Don't revisit N.
    7894             : 
    7895             :   // Fold (vselect (build_vector all_ones), N1, N2) -> N1
    7896       38792 :   if (ISD::isBuildVectorAllOnes(N0.getNode()))
    7897          69 :     return N1;
    7898             :   // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
    7899       38723 :   if (ISD::isBuildVectorAllZeros(N0.getNode()))
    7900         365 :     return N2;
    7901             : 
    7902             :   // The ConvertSelectToConcatVector function is assuming both the above
    7903             :   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
    7904             :   // and addressed.
    7905         639 :   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
    7906       38544 :       N2.getOpcode() == ISD::CONCAT_VECTORS &&
    7907         186 :       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
    7908          43 :     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
    7909          10 :       return CV;
    7910             :   }
    7911             : 
    7912       38348 :   if (SDValue V = foldVSelectOfConstants(N))
    7913          40 :     return V;
    7914             : 
    7915       38308 :   return SDValue();
    7916             : }
    7917             : 
    7918       16226 : SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
    7919       16226 :   SDValue N0 = N->getOperand(0);
    7920       16226 :   SDValue N1 = N->getOperand(1);
    7921       16226 :   SDValue N2 = N->getOperand(2);
    7922       16226 :   SDValue N3 = N->getOperand(3);
    7923       16226 :   SDValue N4 = N->getOperand(4);
    7924       16226 :   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
    7925             : 
    7926             :   // fold select_cc lhs, rhs, x, x, cc -> x
    7927             :   if (N2 == N3)
    7928          27 :     return N2;
    7929             : 
    7930             :   // Determine if the condition we're dealing with is constant
    7931       16199 :   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
    7932       32406 :                                   CC, SDLoc(N), false)) {
    7933         325 :     AddToWorklist(SCC.getNode());
    7934             : 
    7935             :     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
    7936          54 :       if (!SCCC->isNullValue())
    7937          17 :         return N2;    // cond always true -> true val
    7938             :       else
    7939          10 :         return N3;    // cond always false -> false val
    7940         298 :     } else if (SCC->isUndef()) {
    7941             :       // When the condition is UNDEF, just return the first operand. This is
    7942             :       // coherent the DAG creation, no setcc node is created in this case
    7943           0 :       return N2;
    7944         298 :     } else if (SCC.getOpcode() == ISD::SETCC) {
    7945             :       // Fold to a simpler select_cc
    7946         556 :       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
    7947             :                          SCC.getOperand(0), SCC.getOperand(1), N2, N3,
    7948         278 :                          SCC.getOperand(2));
    7949             :     }
    7950             :   }
    7951             : 
    7952             :   // If we can fold this based on the true/false value, do so.
    7953       15894 :   if (SimplifySelectOps(N, N2, N3))
    7954           0 :     return SDValue(N, 0);  // Don't revisit N.
    7955             : 
    7956             :   // fold select_cc into other things, such as min/max/abs
    7957       31788 :   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
    7958             : }
    7959             : 
    7960      298267 : SDValue DAGCombiner::visitSETCC(SDNode *N) {
    7961             :   // setcc is very commonly used as an argument to brcond. This pattern
    7962             :   // also lend itself to numerous combines and, as a result, it is desired
    7963             :   // we keep the argument to a brcond as a setcc as much as possible.
    7964             :   bool PreferSetCC =
    7965      292820 :       N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
    7966             : 
    7967             :   SDValue Combined = SimplifySetCC(
    7968             :       N->getValueType(0), N->getOperand(0), N->getOperand(1),
    7969      573879 :       cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
    7970             : 
    7971      298267 :   if (!Combined)
    7972      281078 :     return SDValue();
    7973             : 
    7974             :   // If we prefer to have a setcc, and we don't, we'll try our best to
    7975             :   // recreate one using rebuildSetCC.
    7976       17189 :   if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
    7977         685 :     SDValue NewSetCC = rebuildSetCC(Combined);
    7978             : 
    7979             :     // We don't have anything interesting to combine to.
    7980         685 :     if (NewSetCC.getNode() == N)
    7981         115 :       return SDValue();
    7982             : 
    7983         570 :     if (NewSetCC)
    7984           0 :       return NewSetCC;
    7985             :   }
    7986             : 
    7987       17074 :   return Combined;
    7988             : }
    7989             : 
    7990           0 : SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
    7991           0 :   SDValue LHS = N->getOperand(0);
    7992           0 :   SDValue RHS = N->getOperand(1);
    7993           0 :   SDValue Carry = N->getOperand(2);
    7994           0 :   SDValue Cond = N->getOperand(3);
    7995             : 
    7996             :   // If Carry is false, fold to a regular SETCC.
    7997           0 :   if (isNullConstant(Carry))
    7998           0 :     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
    7999             : 
    8000           0 :   return SDValue();
    8001             : }
    8002             : 
    8003             : /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
    8004             : /// a build_vector of constants.
    8005             : /// This function is called by the DAGCombiner when visiting sext/zext/aext
    8006             : /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
    8007             : /// Vector extends are not folded if operations are legal; this is to
    8008             : /// avoid introducing illegal build_vector dag nodes.
    8009      290803 : static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
    8010             :                                          SelectionDAG &DAG, bool LegalTypes,
    8011             :                                          bool LegalOperations) {
    8012      290803 :   unsigned Opcode = N->getOpcode();
    8013      290803 :   SDValue N0 = N->getOperand(0);
    8014      581606 :   EVT VT = N->getValueType(0);
    8015             : 
    8016             :   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
    8017             :          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
    8018             :          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
    8019             :          && "Expected EXTEND dag node in input!");
    8020             : 
    8021             :   // fold (sext c1) -> c1
    8022             :   // fold (zext c1) -> c1
    8023             :   // fold (aext c1) -> c1
    8024             :   if (isa<ConstantSDNode>(N0))
    8025         567 :     return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode();
    8026             : 
    8027             :   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
    8028             :   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
    8029             :   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
    8030      290236 :   EVT SVT = VT.getScalarType();
    8031      311556 :   if (!(VT.isVector() &&
    8032        9658 :       (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) &&
    8033       21320 :       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
    8034      290158 :     return nullptr;
    8035             : 
    8036             :   // We can fold this node into a build_vector.
    8037          78 :   unsigned VTBits = SVT.getSizeInBits();
    8038         156 :   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
    8039             :   SmallVector<SDValue, 8> Elts;
    8040             :   unsigned NumElts = VT.getVectorNumElements();
    8041             :   SDLoc DL(N);
    8042             : 
    8043         486 :   for (unsigned i=0; i != NumElts; ++i) {
    8044         408 :     SDValue Op = N0->getOperand(i);
    8045         408 :     if (Op->isUndef()) {
    8046          82 :       Elts.push_back(DAG.getUNDEF(SVT));
    8047          82 :       continue;
    8048             :     }
    8049             : 
    8050             :     SDLoc DL(Op);
    8051             :     // Get the constant value and if needed trunc it to the size of the type.
    8052             :     // Nodes like build_vector might have constants wider than the scalar type.
    8053         652 :     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
    8054         326 :     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
    8055         112 :       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
    8056             :     else
    8057         214 :       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
    8058             :   }
    8059             : 
    8060          78 :   return DAG.getBuildVector(VT, DL, Elts).getNode();
    8061             : }
    8062             : 
    8063             : // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
    8064             : // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
    8065             : // transformation. Returns true if extension are possible and the above
    8066             : // mentioned transformation is profitable.
    8067           0 : static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
    8068             :                                     unsigned ExtOpc,
    8069             :                                     SmallVectorImpl<SDNode *> &ExtendNodes,
    8070             :                                     const TargetLowering &TLI) {
    8071             :   bool HasCopyToRegUses = false;
    8072           0 :   bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
    8073           0 :   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
    8074             :                             UE = N0.getNode()->use_end();
    8075           0 :        UI != UE; ++UI) {
    8076           0 :     SDNode *User = *UI;
    8077           0 :     if (User == N)
    8078           0 :       continue;
    8079           0 :     if (UI.getUse().getResNo() != N0.getResNo())
    8080           0 :       continue;
    8081             :     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
    8082           0 :     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
    8083           0 :       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
    8084           0 :       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
    8085             :         // Sign bits will be lost after a zext.
    8086           0 :         return false;
    8087             :       bool Add = false;
    8088           0 :       for (unsigned i = 0; i != 2; ++i) {
    8089           0 :         SDValue UseOp = User->getOperand(i);
    8090           0 :         if (UseOp == N0)
    8091           0 :           continue;
    8092             :         if (!isa<ConstantSDNode>(UseOp))
    8093           0 :           return false;
    8094             :         Add = true;
    8095             :       }
    8096           0 :       if (Add)
    8097           0 :         ExtendNodes.push_back(User);
    8098           0 :       continue;
    8099             :     }
    8100             :     // If truncates aren't free and there are users we can't
    8101             :     // extend, it isn't worthwhile.
    8102           0 :     if (!isTruncFree)
    8103           0 :       return false;
    8104             :     // Remember if this value is live-out.
    8105           0 :     if (User->getOpcode() == ISD::CopyToReg)
    8106             :       HasCopyToRegUses = true;
    8107             :   }
    8108             : 
    8109           0 :   if (HasCopyToRegUses) {
    8110             :     bool BothLiveOut = false;
    8111           0 :     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
    8112           0 :          UI != UE; ++UI) {
    8113             :       SDUse &Use = UI.getUse();
    8114           0 :       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
    8115             :         BothLiveOut = true;
    8116             :         break;
    8117             :       }
    8118             :     }
    8119           0 :     if (BothLiveOut)
    8120             :       // Both unextended and extended values are live out. There had better be
    8121             :       // a good reason for the transformation.
    8122           0 :       return ExtendNodes.size();
    8123             :   }
    8124             :   return true;
    8125             : }
    8126             : 
    8127           0 : void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
    8128             :                                   SDValue OrigLoad, SDValue ExtLoad,
    8129             :                                   ISD::NodeType ExtType) {
    8130             :   // Extend SetCC uses if necessary.
    8131           0 :   SDLoc DL(ExtLoad);
    8132           0 :   for (SDNode *SetCC : SetCCs) {
    8133             :     SmallVector<SDValue, 4> Ops;
    8134             : 
    8135           0 :     for (unsigned j = 0; j != 2; ++j) {
    8136           0 :       SDValue SOp = SetCC->getOperand(j);
    8137           0 :       if (SOp == OrigLoad)
    8138           0 :         Ops.push_back(ExtLoad);
    8139             :       else
    8140           0 :         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
    8141             :     }
    8142             : 
    8143           0 :     Ops.push_back(SetCC->getOperand(2));
    8144           0 :     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
    8145             :   }
    8146           0 : }
    8147             : 
    8148             : // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
    8149      131440 : SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
    8150      131440 :   SDValue N0 = N->getOperand(0);
    8151      262880 :   EVT DstVT = N->getValueType(0);
    8152             :   EVT SrcVT = N0.getValueType();
    8153             : 
    8154             :   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
    8155             :           N->getOpcode() == ISD::ZERO_EXTEND) &&
    8156             :          "Unexpected node type (not an extend)!");
    8157             : 
    8158             :   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
    8159             :   // For example, on a target with legal v4i32, but illegal v8i32, turn:
    8160             :   //   (v8i32 (sext (v8i16 (load x))))
    8161             :   // into:
    8162             :   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
    8163             :   //                          (v4i32 (sextload (x + 16)))))
    8164             :   // Where uses of the original load, i.e.:
    8165             :   //   (v8i16 (load x))
    8166             :   // are replaced with:
    8167             :   //   (v8i16 (truncate
    8168             :   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
    8169             :   //                            (v4i32 (sextload (x + 16)))))))
    8170             :   //
    8171             :   // This combine is only applicable to illegal, but splittable, vectors.
    8172             :   // All legal types, and illegal non-vector types, are handled elsewhere.
    8173             :   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
    8174             :   //
    8175      131440 :   if (N0->getOpcode() != ISD::LOAD)
    8176      127356 :     return SDValue();
    8177             : 
    8178             :   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
    8179             : 
    8180        3827 :   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
    8181        6313 :       !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
    8182        2943 :       !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
    8183        3668 :     return SDValue();
    8184             : 
    8185             :   SmallVector<SDNode *, 4> SetCCs;
    8186         832 :   if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
    8187           0 :     return SDValue();
    8188             : 
    8189             :   ISD::LoadExtType ExtType =
    8190         416 :       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
    8191             : 
    8192             :   // Try to split the vector types to get down to legal types.
    8193         416 :   EVT SplitSrcVT = SrcVT;
    8194         416 :   EVT SplitDstVT = DstVT;
    8195        1702 :   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
    8196             :          SplitSrcVT.getVectorNumElements() > 1) {
    8197         541 :     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
    8198         541 :     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
    8199             :   }
    8200             : 
    8201             :   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
    8202         204 :     return SDValue();
    8203             : 
    8204             :   SDLoc DL(N);
    8205             :   const unsigned NumSplits =
    8206         212 :       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
    8207             :   const unsigned Stride = SplitSrcVT.getStoreSize();
    8208             :   SmallVector<SDValue, 4> Loads;
    8209             :   SmallVector<SDValue, 4> Chains;
    8210             : 
    8211         212 :   SDValue BasePtr = LN0->getBasePtr();
    8212         645 :   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
    8213         433 :     const unsigned Offset = Idx * Stride;
    8214         433 :     const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
    8215             : 
    8216         433 :     SDValue SplitLoad = DAG.getExtLoad(
    8217         433 :         ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
    8218             :         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
    8219        1732 :         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
    8220             : 
    8221         433 :     BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
    8222         433 :                           DAG.getConstant(Stride, DL, BasePtr.getValueType()));
    8223             : 
    8224         433 :     Loads.push_back(SplitLoad.getValue(0));
    8225         433 :     Chains.push_back(SplitLoad.getValue(1));
    8226             :   }
    8227             : 
    8228         424 :   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
    8229         424 :   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
    8230             : 
    8231             :   // Simplify TF.
    8232         212 :   AddToWorklist(NewChain.getNode());
    8233             : 
    8234         212 :   CombineTo(N, NewValue);
    8235             : 
    8236             :   // Replace uses of the original load (before extension)
    8237             :   // with a truncate of the concatenated sextloaded vectors.
    8238             :   SDValue Trunc =
    8239         218 :       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
    8240         424 :   ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
    8241             :   CombineTo(N0.getNode(), Trunc, NewChain);
    8242         212 :   return SDValue(N, 0); // Return N so it doesn't get rechecked!
    8243             : }
    8244             : 
    8245             : // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
    8246             : //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
    8247      100653 : SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
    8248             :   assert(N->getOpcode() == ISD::ZERO_EXTEND);
    8249      100653 :   EVT VT = N->getValueType(0);
    8250             : 
    8251             :   // and/or/xor
    8252      100653 :   SDValue N0 = N->getOperand(0);
    8253       98358 :   if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
    8254       15638 :         N0.getOpcode() == ISD::XOR) ||
    8255      113996 :       N0.getOperand(1).getOpcode() != ISD::Constant ||
    8256       13358 :       (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
    8257       87295 :     return SDValue();
    8258             : 
    8259             :   // shl/shr
    8260       13358 :   SDValue N1 = N0->getOperand(0);
    8261       13358 :   if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
    8262       14388 :       N1.getOperand(1).getOpcode() != ISD::Constant ||
    8263         332 :       (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
    8264       12313 :     return SDValue();
    8265             : 
    8266             :   // load
    8267        1045 :   if (!isa<LoadSDNode>(N1.getOperand(0)))
    8268         991 :     return SDValue();
    8269             :   LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
    8270             :   EVT MemVT = Load->getMemoryVT();
    8271         104 :   if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
    8272         103 :       Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
    8273           5 :     return SDValue();
    8274             : 
    8275             : 
    8276             :   // If the shift op is SHL, the logic op must be AND, otherwise the result
    8277             :   // will be wrong.
    8278          49 :   if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
    8279           2 :     return SDValue();
    8280             : 
    8281          82 :   if (!N0.hasOneUse() || !N1.hasOneUse())
    8282          12 :     return SDValue();
    8283             : 
    8284             :   SmallVector<SDNode*, 4> SetCCs;
    8285          70 :   if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
    8286             :                                ISD::ZERO_EXTEND, SetCCs, TLI))
    8287           9 :     return SDValue();
    8288             : 
    8289             :   // Actually do the transformation.
    8290          26 :   SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
    8291             :                                    Load->getChain(), Load->getBasePtr(),
    8292          34 :                                    Load->getMemoryVT(), Load->getMemOperand());
    8293             : 
    8294             :   SDLoc DL1(N1);
    8295          26 :   SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
    8296          26 :                               N1.getOperand(1));
    8297             : 
    8298          26 :   APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
    8299          52 :   Mask = Mask.zext(VT.getSizeInBits());
    8300             :   SDLoc DL0(N0);
    8301          26 :   SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
    8302          26 :                             DAG.getConstant(Mask, DL0, VT));
    8303             : 
    8304          26 :   ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
    8305          26 :   CombineTo(N, And);
    8306          26 :   if (SDValue(Load, 0).hasOneUse()) {
    8307          50 :     DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
    8308             :   } else {
    8309           1 :     SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
    8310           2 :                                 Load->getValueType(0), ExtLoad);
    8311             :     CombineTo(Load, Trunc, ExtLoad.getValue(1));
    8312             :   }
    8313          26 :   return SDValue(N,0); // Return N so it doesn't get rechecked!
    8314             : }
    8315             : 
    8316             : /// If we're narrowing or widening the result of a vector select and the final
    8317             : /// size is the same size as a setcc (compare) feeding the select, then try to
    8318             : /// apply the cast operation to the select's operands because matching vector
    8319             : /// sizes for a select condition and other operands should be more efficient.
    8320      949740 : SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
    8321      949740 :   unsigned CastOpcode = Cast->getOpcode();
    8322             :   assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
    8323             :           CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
    8324             :           CastOpcode == ISD::FP_ROUND) &&
    8325             :          "Unexpected opcode for vector select narrowing/widening");
    8326             : 
    8327             :   // We only do this transform before legal ops because the pattern may be
    8328             :   // obfuscated by target-specific operations after legalization. Do not create
    8329             :   // an illegal select op, however, because that may be difficult to lower.
    8330      949740 :   EVT VT = Cast->getValueType(0);
    8331      949740 :   if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
    8332      477945 :     return SDValue();
    8333             : 
    8334      471795 :   SDValue VSel = Cast->getOperand(0);
    8335      471829 :   if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
    8336          34 :       VSel.getOperand(0).getOpcode() != ISD::SETCC)
    8337      471761 :     return SDValue();
    8338             : 
    8339             :   // Does the setcc have the same vector size as the casted select?
    8340          34 :   SDValue SetCC = VSel.getOperand(0);
    8341          68 :   EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
    8342          34 :   if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
    8343          18 :     return SDValue();
    8344             : 
    8345             :   // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
    8346          16 :   SDValue A = VSel.getOperand(1);
    8347          16 :   SDValue B = VSel.getOperand(2);
    8348          16 :   SDValue CastA, CastB;
    8349             :   SDLoc DL(Cast);
    8350          16 :   if (CastOpcode == ISD::FP_ROUND) {
    8351             :     // FP_ROUND (fptrunc) has an extra flag operand to pass along.
    8352          12 :     CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
    8353          12 :     CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
    8354             :   } else {
    8355          24 :     CastA = DAG.getNode(CastOpcode, DL, VT, A);
    8356          24 :     CastB = DAG.getNode(CastOpcode, DL, VT, B);
    8357             :   }
    8358          32 :   return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
    8359             : }
    8360             : 
    8361             : // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
    8362             : // fold ([s|z]ext (     extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
    8363      131172 : static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
    8364             :                                      const TargetLowering &TLI, EVT VT,
    8365             :                                      bool LegalOperations, SDNode *N,
    8366             :                                      SDValue N0, ISD::LoadExtType ExtLoadType) {
    8367             :   SDNode *N0Node = N0.getNode();
    8368      131172 :   bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
    8369             :                                                    : ISD::isZEXTLoad(N0Node);
    8370             :   if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
    8371      131379 :       !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
    8372      131083 :     return {};
    8373             : 
    8374             :   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
    8375          89 :   EVT MemVT = LN0->getMemoryVT();
    8376         170 :   if ((LegalOperations || LN0->isVolatile()) &&
    8377          81 :       !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
    8378          68 :     return {};
    8379             : 
    8380             :   SDValue ExtLoad =
    8381          21 :       DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
    8382          23 :                      LN0->getBasePtr(), MemVT, LN0->getMemOperand());
    8383          21 :   Combiner.CombineTo(N, ExtLoad);
    8384          42 :   DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
    8385          21 :   return SDValue(N, 0); // Return N so it doesn't get rechecked!
    8386             : }
    8387             : 
    8388             : // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
    8389             : // Only generate vector extloads when 1) they're legal, and 2) they are
    8390             : // deemed desirable by the target.
    8391      151772 : static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
    8392             :                                   const TargetLowering &TLI, EVT VT,
    8393             :                                   bool LegalOperations, SDNode *N, SDValue N0,
    8394             :                                   ISD::LoadExtType ExtLoadType,
    8395             :                                   ISD::NodeType ExtOpc) {
    8396             :   if (!ISD::isNON_EXTLoad(N0.getNode()) ||
    8397       24159 :       !ISD::isUNINDEXEDLoad(N0.getNode()) ||
    8398       22628 :       ((LegalOperations || VT.isVector() ||
    8399       24121 :         cast<LoadSDNode>(N0)->isVolatile()) &&
    8400        5628 :        !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
    8401      129514 :     return {};
    8402             : 
    8403             :   bool DoXform = true;
    8404             :   SmallVector<SDNode *, 4> SetCCs;
    8405       22258 :   if (!N0.hasOneUse())
    8406        2346 :     DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
    8407       22258 :   if (VT.isVector())
    8408        4832 :     DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
    8409       22258 :   if (!DoXform)
    8410        1926 :     return {};
    8411             : 
    8412             :   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
    8413       20332 :   SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
    8414             :                                    LN0->getBasePtr(), N0.getValueType(),
    8415       22201 :                                    LN0->getMemOperand());
    8416       20332 :   Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
    8417             :   // If the load value is used only by N, replace it via CombineTo N.
    8418             :   bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
    8419       20332 :   Combiner.CombineTo(N, ExtLoad);
    8420       20332 :   if (NoReplaceTrunc) {
    8421       19570 :     DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
    8422             :   } else {
    8423             :     SDValue Trunc =
    8424        1524 :         DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
    8425             :     Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
    8426             :   }
    8427       20332 :   return SDValue(N, 0); // Return N so it doesn't get rechecked!
    8428             : }
    8429             : 
    8430      131150 : static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
    8431             :                                        bool LegalOperations) {
    8432             :   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
    8433             :           N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
    8434             : 
    8435      131150 :   SDValue SetCC = N->getOperand(0);
    8436      118172 :   if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
    8437      131150 :       !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
    8438      125939 :     return SDValue();
    8439             : 
    8440        5211 :   SDValue X = SetCC.getOperand(0);
    8441        5211 :   SDValue Ones = SetCC.getOperand(1);
    8442        5211 :   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
    8443       10422 :   EVT VT = N->getValueType(0);
    8444             :   EVT XVT = X.getValueType();
    8445             :   // setge X, C is canonicalized to setgt, so we do not need to match that
    8446             :   // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
    8447             :   // not require the 'not' op.
    8448        5211 :   if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
    8449             :     // Invert and smear/shift the sign bit:
    8450             :     // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
    8451             :     // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
    8452             :     SDLoc DL(N);
    8453          32 :     SDValue NotX = DAG.getNOT(DL, X, VT);
    8454          32 :     SDValue ShiftAmount = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
    8455          32 :     auto ShiftOpcode = N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
    8456          32 :     return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
    8457             :   }
    8458        5179 :   return SDValue();
    8459             : }
    8460             : 
    8461       51003 : SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
    8462       51003 :   SDValue N0 = N->getOperand(0);
    8463      102006 :   EVT VT = N->getValueType(0);
    8464             :   SDLoc DL(N);
    8465             : 
    8466       51003 :   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
    8467       51003 :                                               LegalOperations))
    8468          73 :     return SDValue(Res, 0);
    8469             : 
    8470             :   // fold (sext (sext x)) -> (sext x)
    8471             :   // fold (sext (aext x)) -> (sext x)
    8472      101860 :   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
    8473          12 :     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
    8474             : 
    8475       50924 :   if (N0.getOpcode() == ISD::TRUNCATE) {
    8476             :     // fold (sext (truncate (load x))) -> (sext (smaller load x))
    8477             :     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
    8478        6778 :     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
    8479          41 :       SDNode *oye = N0.getOperand(0).getNode();
    8480          41 :       if (NarrowLoad.getNode() != N0.getNode()) {
    8481          41 :         CombineTo(N0.getNode(), NarrowLoad);
    8482             :         // CombineTo deleted the truncate, if needed, but not what's under it.
    8483          41 :         AddToWorklist(oye);
    8484             :       }
    8485          41 :       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
    8486             :     }
    8487             : 
    8488             :     // See if the value being truncated is already sign extended.  If so, just
    8489             :     // eliminate the trunc/sext pair.
    8490        6737 :     SDValue Op = N0.getOperand(0);
    8491        6737 :     unsigned OpBits   = Op.getScalarValueSizeInBits();
    8492        6737 :     unsigned MidBits  = N0.getScalarValueSizeInBits();
    8493             :     unsigned DestBits = VT.getScalarSizeInBits();
    8494        6737 :     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
    8495             : 
    8496        6737 :     if (OpBits == DestBits) {
    8497             :       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
    8498             :       // bits, it is already ready.
    8499        3880 :       if (NumSignBits > DestBits-MidBits)
    8500         900 :         return Op;
    8501        2857 :     } else if (OpBits < DestBits) {
    8502             :       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
    8503             :       // bits, just sext from i32.
    8504        2542 :       if (NumSignBits > OpBits-MidBits)
    8505          50 :         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
    8506             :     } else {
    8507             :       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
    8508             :       // bits, just truncate to i32.
    8509         315 :       if (NumSignBits > OpBits-MidBits)
    8510          44 :         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
    8511             :     }
    8512             : 
    8513             :     // fold (sext (truncate x)) -> (sextinreg x).
    8514        5790 :     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
    8515             :                                                  N0.getValueType())) {
    8516        5790 :       if (OpBits < DestBits)
    8517        2539 :         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
    8518        3273 :       else if (OpBits > DestBits)
    8519         297 :         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
    8520        5790 :       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
    8521       11580 :                          DAG.getValueType(N0.getValueType()));
    8522             :     }
    8523             :   }
    8524             : 
    8525             :   // Try to simplify (sext (load x)).
    8526       44146 :   if (SDValue foldedExt =
    8527       44146 :           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
    8528       44146 :                              ISD::SEXTLOAD, ISD::SIGN_EXTEND))
    8529       13459 :     return foldedExt;
    8530             : 
    8531             :   // fold (sext (load x)) to multiple smaller sextloads.
    8532             :   // Only on illegal but splittable vectors.
    8533       30687 :   if (SDValue ExtLoad = CombineExtLoad(N))
    8534         142 :     return ExtLoad;
    8535             : 
    8536             :   // Try to simplify (sext (sextload x)).
    8537       30545 :   if (SDValue foldedExt = tryToFoldExtOfExtload(
    8538       30545 :           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
    8539           4 :     return foldedExt;
    8540             : 
    8541             :   // fold (sext (and/or/xor (load x), cst)) ->
    8542             :   //      (and/or/xor (sextload x), (sext cst))
    8543       30541 :   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
    8544        1247 :        N0.getOpcode() == ISD::XOR) &&
    8545          31 :       isa<LoadSDNode>(N0.getOperand(0)) &&
    8546       30572 :       N0.getOperand(1).getOpcode() == ISD::Constant &&
    8547           1 :       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
    8548             :     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
    8549             :     EVT MemVT = LN00->getMemoryVT();
    8550           1 :     if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
    8551           2 :       LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
    8552             :       SmallVector<SDNode*, 4> SetCCs;
    8553           1 :       bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
    8554             :                                              ISD::SIGN_EXTEND, SetCCs, TLI);
    8555           1 :       if (DoXform) {
    8556           2 :         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
    8557             :                                          LN00->getChain(), LN00->getBasePtr(),
    8558             :                                          LN00->getMemoryVT(),
    8559           2 :                                          LN00->getMemOperand());
    8560           2 :         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
    8561           1 :         Mask = Mask.sext(VT.getSizeInBits());
    8562           1 :         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
    8563           1 :                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
    8564           2 :         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
    8565           1 :         bool NoReplaceTruncAnd = !N0.hasOneUse();
    8566             :         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
    8567           1 :         CombineTo(N, And);
    8568             :         // If N0 has multiple uses, change other uses as well.
    8569           1 :         if (NoReplaceTruncAnd) {
    8570             :           SDValue TruncAnd =
    8571           0 :               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
    8572           0 :           CombineTo(N0.getNode(), TruncAnd);
    8573             :         }
    8574           1 :         if (NoReplaceTrunc) {
    8575           2 :           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
    8576             :         } else {
    8577           0 :           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
    8578           0 :                                       LN00->getValueType(0), ExtLoad);
    8579             :           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
    8580             :         }
    8581           1 :         return SDValue(N,0); // Return N so it doesn't get rechecked!
    8582             :       }
    8583             :     }
    8584             :   }
    8585             : 
    8586       30540 :   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
    8587          10 :     return V;
    8588             : 
    8589       61060 :   if (N0.getOpcode() == ISD::SETCC) {
    8590        5425 :     SDValue N00 = N0.getOperand(0);
    8591        5425 :     SDValue N01 = N0.getOperand(1);
    8592        5425 :     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
    8593       10850 :     EVT N00VT = N0.getOperand(0).getValueType();
    8594             : 
    8595             :     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
    8596             :     // Only do this before legalize for now.
    8597        5425 :     if (VT.isVector() && !LegalOperations &&
    8598        2921 :         TLI.getBooleanContents(N00VT) ==
    8599             :             TargetLowering::ZeroOrNegativeOneBooleanContent) {
    8600             :       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
    8601             :       // of the same size as the compared operands. Only optimize sext(setcc())
    8602             :       // if this is the case.
    8603        2892 :       EVT SVT = getSetCCResultType(N00VT);
    8604             : 
    8605             :       // We know that the # elements of the results is the same as the
    8606             :       // # elements of the compare (and the # elements of the compare result
    8607             :       // for that matter).  Check to see that they are the same size.  If so,
    8608             :       // we know that the element size of the sext'd result matches the
    8609             :       // element size of the compare operands.
    8610        2892 :       if (VT.getSizeInBits() == SVT.getSizeInBits())
    8611        2653 :         return DAG.getSetCC(DL, VT, N00, N01, CC);
    8612             : 
    8613             :       // If the desired elements are smaller or larger than the source
    8614             :       // elements, we can use a matching integer vector type and then
    8615             :       // truncate/sign extend.
    8616         520 :       EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
    8617         523 :       if (SVT == MatchingVecType) {
    8618         281 :         SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
    8619         281 :         return DAG.getSExtOrTrunc(VsetCC, DL, VT);
    8620             :       }
    8621             :     }
    8622             : 
    8623             :     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
    8624             :     // Here, T can be 1 or -1, depending on the type of the setcc and
    8625             :     // getBooleanContents().
    8626        2772 :     unsigned SetCCWidth = N0.getScalarValueSizeInBits();
    8627             : 
    8628             :     // To determine the "true" side of the select, we need to know the high bit
    8629             :     // of the value returned by the setcc if it evaluates to true.
    8630             :     // If the type of the setcc is i1, then the true case of the select is just
    8631             :     // sext(i1 1), that is, -1.
    8632             :     // If the type of the setcc is larger (say, i8) then the value of the high
    8633             :     // bit depends on getBooleanContents(), so ask TLI for a real "true" value
    8634             :     // of the appropriate width.
    8635             :     SDValue ExtTrueVal = (SetCCWidth == 1)
    8636        2771 :                              ? DAG.getAllOnesConstant(DL, VT)
    8637        2772 :                              : DAG.getBoolConstant(true, DL, VT, N00VT);
    8638        2772 :     SDValue Zero = DAG.getConstant(0, DL, VT);
    8639        2772 :     if (SDValue SCC =
    8640        2772 :             SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
    8641          22 :       return SCC;
    8642             : 
    8643        2750 :     if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
    8644         871 :       EVT SetCCVT = getSetCCResultType(N00VT);
    8645             :       // Don't do this transform for i1 because there's a select transform
    8646             :       // that would reverse it.
    8647             :       // TODO: We should not do this transform at all without a target hook
    8648             :       // because a sext is likely cheaper than a select?
    8649         871 :       if (SetCCVT.getScalarSizeInBits() != 1 &&
    8650         107 :           (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
    8651         107 :         SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
    8652         107 :         return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
    8653             :       }
    8654             :     }
    8655             :   }
    8656             : 
    8657             :   // fold (sext x) -> (zext x) if the sign bit is known zero.
    8658       54816 :   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
    8659       27068 :       DAG.SignBitIsZero(N0))
    8660         884 :     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
    8661             : 
    8662       27306 :   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
    8663           4 :     return NewVSel;
    8664             : 
    8665       27302 :   return SDValue();
    8666             : }
    8667             : 
    8668             : // isTruncateOf - If N is a truncate of some other value, return true, record
    8669             : // the value being truncated in Op and which of Op's bits are zero/one in Known.
    8670             : // This function computes KnownBits to avoid a duplicated call to
    8671             : // computeKnownBits in the caller.
    8672           0 : static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
    8673             :                          KnownBits &Known) {
    8674           0 :   if (N->getOpcode() == ISD::TRUNCATE) {
    8675           0 :     Op = N->getOperand(0);
    8676           0 :     DAG.computeKnownBits(Op, Known);
    8677           0 :     return true;
    8678             :   }
    8679             : 
    8680           0 :   if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
    8681           0 :       cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
    8682           0 :     return false;
    8683             : 
    8684           0 :   SDValue Op0 = N->getOperand(0);
    8685           0 :   SDValue Op1 = N->getOperand(1);
    8686             :   assert(Op0.getValueType() == Op1.getValueType());
    8687             : 
    8688           0 :   if (isNullConstant(Op0))
    8689           0 :     Op = Op1;
    8690           0 :   else if (isNullConstant(Op1))
    8691           0 :     Op = Op0;
    8692             :   else
    8693           0 :     return false;
    8694             : 
    8695           0 :   DAG.computeKnownBits(Op, Known);
    8696             : 
    8697           0 :   if (!(Known.Zero | 1).isAllOnesValue())
    8698           0 :     return false;
    8699             : 
    8700             :   return true;
    8701             : }
    8702             : 
    8703      142725 : SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
    8704      142725 :   SDValue N0 = N->getOperand(0);
    8705      142725 :   EVT VT = N->getValueType(0);
    8706             : 
    8707      142725 :   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
    8708      142725 :                                               LegalOperations))
    8709         452 :     return SDValue(Res, 0);
    8710             : 
    8711             :   // fold (zext (zext x)) -> (zext x)
    8712             :   // fold (zext (aext x)) -> (zext x)
    8713      284546 :   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
    8714         279 :     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
    8715         558 :                        N0.getOperand(0));
    8716             : 
    8717             :   // fold (zext (truncate x)) -> (zext x) or
    8718             :   //      (zext (truncate x)) -> (truncate x)
    8719             :   // This is valid when the truncated bits of x are already zero.
    8720             :   // FIXME: We should extend this to work for vectors too.
    8721      141994 :   SDValue Op;
    8722      141994 :   KnownBits Known;
    8723      141994 :   if (!VT.isVector() && isTruncateOf(DAG, N0, Op, Known)) {
    8724             :     APInt TruncatedBits =
    8725       33830 :       (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
    8726             :       APInt(Op.getValueSizeInBits(), 0) :
    8727             :       APInt::getBitsSet(Op.getValueSizeInBits(),
    8728             :                         N0.getValueSizeInBits(),
    8729       33828 :                         std::min(Op.getValueSizeInBits(),
    8730       67660 :                                  VT.getSizeInBits()));
    8731       33830 :     if (TruncatedBits.isSubsetOf(Known.Zero))
    8732       31937 :       return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
    8733             :   }
    8734             : 
    8735             :   // fold (zext (truncate x)) -> (and x, mask)
    8736      253298 :   if (N0.getOpcode() == ISD::TRUNCATE) {
    8737             :     // fold (zext (truncate (load x))) -> (zext (smaller load x))
    8738             :     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
    8739       18670 :     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
    8740          13 :       SDNode *oye = N0.getOperand(0).getNode();
    8741          13 :       if (NarrowLoad.getNode() != N0.getNode()) {
    8742          13 :         CombineTo(N0.getNode(), NarrowLoad);
    8743             :         // CombineTo deleted the truncate, if needed, but not what's under it.
    8744          13 :         AddToWorklist(oye);
    8745             :       }
    8746          13 :       return SDValue(N, 0); // Return N so it doesn't get rechecked!
    8747             :     }
    8748             : 
    8749       37314 :     EVT SrcVT = N0.getOperand(0).getValueType();
    8750       18657 :     EVT MinVT = N0.getValueType();
    8751             : 
    8752             :     // Try to mask before the extension to avoid having to generate a larger mask,
    8753             :     // possibly over several sub-vectors.
    8754       30722 :     if (SrcVT.bitsLT(VT) && VT.isVector()) {
    8755          51 :       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
    8756             :                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
    8757          51 :         SDValue Op = N0.getOperand(0);
    8758         102 :         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
    8759          51 :         AddToWorklist(Op.getNode());
    8760          51 :         SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
    8761             :         // Transfer the debug info; the new node is equivalent to N0.
    8762          51 :         DAG.transferDbgValues(N0, ZExtOrTrunc);
    8763          51 :         return ZExtOrTrunc;
    8764             :       }
    8765             :     }
    8766             : 
    8767       18606 :     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
    8768       18720 :       SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
    8769       18606 :       AddToWorklist(Op.getNode());
    8770       37212 :       SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
    8771             :       // We may safely transfer the debug info describing the truncate node over
    8772             :       // to the equivalent and operation.
    8773       18606 :       DAG.transferDbgValues(N0, And);
    8774       18606 :       return And;
    8775             :     }
    8776             :   }
    8777             : 
    8778             :   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
    8779             :   // if either of the casts is not free.
    8780      107979 :   if (N0.getOpcode() == ISD::AND &&
    8781        2667 :       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
    8782      108834 :       N0.getOperand(1).getOpcode() == ISD::Constant &&
    8783         712 :       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
    8784         411 :                            N0.getValueType()) ||
    8785         110 :        !TLI.isZExtFree(N0.getValueType(), VT))) {
    8786         706 :     SDValue X = N0.getOperand(0).getOperand(0);
    8787         361 :     X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
    8788         706 :     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
    8789         706 :     Mask = Mask.zext(VT.getSizeInBits());
    8790             :     SDLoc DL(N);
    8791         353 :     return DAG.getNode(ISD::AND, DL, VT,
    8792         353 :                        X, DAG.getConstant(Mask, DL, VT));
    8793             :   }
    8794             : 
    8795             :   // Try to simplify (zext (load x)).
    8796      107626 :   if (SDValue foldedExt =
    8797      107626 :           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
    8798      107626 :                              ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
    8799        6873 :     return foldedExt;
    8800             : 
    8801             :   // fold (zext (load x)) to multiple smaller zextloads.
    8802             :   // Only on illegal but splittable vectors.
    8803      100753 :   if (SDValue ExtLoad = CombineExtLoad(N))
    8804          70 :     return ExtLoad;
    8805             : 
    8806             :   // fold (zext (and/or/xor (load x), cst)) ->
    8807             :   //      (and/or/xor (zextload x), (zext cst))
    8808             :   // Unless (and (load x) cst) will match as a zextload already and has
    8809             :   // additional users.
    8810      100683 :   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
    8811       15668 :        N0.getOpcode() == ISD::XOR) &&
    8812         200 :       isa<LoadSDNode>(N0.getOperand(0)) &&
    8813      100883 :       N0.getOperand(1).getOpcode() == ISD::Constant &&
    8814          83 :       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
    8815             :     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
    8816             :     EVT MemVT = LN00->getMemoryVT();
    8817          30 :     if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
    8818          70 :         LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
    8819             :       bool DoXform = true;
    8820             :       SmallVector<SDNode*, 4> SetCCs;
    8821          30 :       if (!N0.hasOneUse()) {
    8822          18 :         if (N0.getOpcode() == ISD::AND) {
    8823             :           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
    8824           0 :           EVT LoadResultTy = AndC->getValueType(0);
    8825           0 :           EVT ExtVT;
    8826           0 :           if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
    8827             :             DoXform = false;
    8828             :         }
    8829             :       }
    8830             :       if (DoXform)
    8831          60 :         DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
    8832             :                                           ISD::ZERO_EXTEND, SetCCs, TLI);
    8833          30 :       if (DoXform) {
    8834          60 :         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
    8835             :                                          LN00->getChain(), LN00->getBasePtr(),
    8836             :                                          LN00->getMemoryVT(),
    8837          30 :                                          LN00->getMemOperand());
    8838          60 :         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
    8839          60 :         Mask = Mask.zext(VT.getSizeInBits());
    8840             :         SDLoc DL(N);
    8841          30 :         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
    8842          30 :                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
    8843          60 :         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
    8844          30 :         bool NoReplaceTruncAnd = !N0.hasOneUse();
    8845             :         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
    8846          30 :         CombineTo(N, And);
    8847             :         // If N0 has multiple uses, change other uses as well.
    8848          30 :         if (NoReplaceTruncAnd) {
    8849             :           SDValue TruncAnd =
    8850          27 :               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
    8851           9 :           CombineTo(N0.getNode(), TruncAnd);
    8852             :         }
    8853          30 :         if (NoReplaceTrunc) {
    8854          58 :           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
    8855             :         } else {
    8856           1 :           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
    8857           2 :                                       LN00->getValueType(0), ExtLoad);
    8858             :           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
    8859             :         }
    8860          30 :         return SDValue(N,0); // Return N so it doesn't get rechecked!
    8861             :       }
    8862             :     }
    8863             :   }
    8864             : 
    8865             :   // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
    8866             :   //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
    8867      100653 :   if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
    8868          26 :     return ZExtLoad;
    8869             : 
    8870             :   // Try to simplify (zext (zextload x)).
    8871      100627 :   if (SDValue foldedExt = tryToFoldExtOfExtload(
    8872      100627 :           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
    8873          17 :     return foldedExt;
    8874             : 
    8875      100610 :   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
    8876          22 :     return V;
    8877             : 
    8878      201176 :   if (N0.getOpcode() == ISD::SETCC) {
    8879             :     // Only do this before legalize for now.
    8880       54757 :     if (!LegalOperations && VT.isVector() &&
    8881       28342 :         N0.getValueType().getVectorElementType() == MVT::i1) {
    8882         678 :       EVT N00VT = N0.getOperand(0).getValueType();
    8883        1017 :       if (getSetCCResultType(N00VT) == N0.getValueType())
    8884         142 :         return SDValue();
    8885             : 
    8886             :       // We know that the # elements of the results is the same as the #
    8887             :       // elements of the compare (and the # elements of the compare result for
    8888             :       // that matter). Check to see that they are the same size. If so, we know
    8889             :       // that the element size of the sext'd result matches the element size of
    8890             :       // the compare operands.
    8891             :       SDLoc DL(N);
    8892         197 :       SDValue VecOnes = DAG.getConstant(1, DL, VT);
    8893         197 :       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
    8894             :         // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
    8895         178 :         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
    8896         356 :                                      N0.getOperand(1), N0.getOperand(2));
    8897         356 :         return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
    8898             :       }
    8899             : 
    8900             :       // If the desired elements are smaller or larger than the source
    8901             :       // elements we can use a matching integer vector type and then
    8902             :       // truncate/sign extend.
    8903          19 :       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
    8904             :       SDValue VsetCC =
    8905          19 :           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
    8906          38 :                       N0.getOperand(1), N0.getOperand(2));
    8907          19 :       return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
    8908          19 :                          VecOnes);
    8909             :     }
    8910             : 
    8911             :     // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
    8912             :     SDLoc DL(N);
    8913       27662 :     if (SDValue SCC = SimplifySelectCC(
    8914       27662 :             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
    8915       27662 :             DAG.getConstant(0, DL, VT),
    8916       55324 :             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
    8917         137 :       return SCC;
    8918             :   }
    8919             : 
    8920             :   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
    8921      100112 :   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
    8922        7229 :       isa<ConstantSDNode>(N0.getOperand(1)) &&
    8923      107352 :       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
    8924          11 :       N0.hasOneUse()) {
    8925          10 :     SDValue ShAmt = N0.getOperand(1);
    8926          20 :     unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
    8927          10 :     if (N0.getOpcode() == ISD::SHL) {
    8928           9 :       SDValue InnerZExt = N0.getOperand(0);
    8929             :       // If the original shl may be shifting out bits, do not perform this
    8930             :       // transformation.
    8931           9 :       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
    8932          18 :         InnerZExt.getOperand(0).getValueSizeInBits();
    8933           9 :       if (ShAmtVal > KnownZeroBits)
    8934           0 :         return SDValue();
    8935             :     }
    8936             : 
    8937             :     SDLoc DL(N);
    8938             : 
    8939             :     // Ensure that the shift amount is wide enough for the shifted value.
    8940          10 :     if (VT.getSizeInBits() >= 256)
    8941           0 :       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
    8942             : 
    8943          10 :     return DAG.getNode(N0.getOpcode(), DL, VT,
    8944          10 :                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
    8945          10 :                        ShAmt);
    8946             :   }
    8947             : 
    8948      100102 :   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
    8949           2 :     return NewVSel;
    8950             : 
    8951      100100 :   return SDValue();
    8952             : }
    8953             : 
    8954       88162 : SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
    8955       88162 :   SDValue N0 = N->getOperand(0);
    8956       88162 :   EVT VT = N->getValueType(0);
    8957             : 
    8958       88162 :   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
    8959       88162 :                                               LegalOperations))
    8960          78 :     return SDValue(Res, 0);
    8961             : 
    8962             :   // fold (aext (aext x)) -> (aext x)
    8963             :   // fold (aext (zext x)) -> (zext x)
    8964             :   // fold (aext (sext x)) -> (sext x)
    8965       88084 :   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
    8966      176151 :       N0.getOpcode() == ISD::ZERO_EXTEND ||
    8967             :       N0.getOpcode() == ISD::SIGN_EXTEND)
    8968          44 :     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
    8969             : 
    8970             :   // fold (aext (truncate (load x))) -> (aext (smaller load x))
    8971             :   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
    8972       88062 :   if (N0.getOpcode() == ISD::TRUNCATE) {
    8973        7527 :     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
    8974          13 :       SDNode *oye = N0.getOperand(0).getNode();
    8975          13 :       if (NarrowLoad.getNode() != N0.getNode()) {
    8976          13 :         CombineTo(N0.getNode(), NarrowLoad);
    8977             :         // CombineTo deleted the truncate, if needed, but not what's under it.
    8978          13 :         AddToWorklist(oye);
    8979             :       }
    8980          13 :       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
    8981             :     }
    8982             :   }
    8983             : 
    8984             :   // fold (aext (truncate x))
    8985       88049 :   if (N0.getOpcode() == ISD::TRUNCATE)
    8986       15117 :     return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
    8987             : 
    8988             :   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
    8989             :   // if the trunc is not free.
    8990        4001 :   if (N0.getOpcode() == ISD::AND &&
    8991        4001 :       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
    8992       86633 :       N0.getOperand(1).getOpcode() == ISD::Constant &&
    8993        5988 :       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
    8994        2994 :                           N0.getValueType())) {
    8995             :     SDLoc DL(N);
    8996           8 :     SDValue X = N0.getOperand(0).getOperand(0);
    8997           8 :     X = DAG.getAnyExtOrTrunc(X, DL, VT);
    8998           8 :     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
    8999           8 :     Mask = Mask.zext(VT.getSizeInBits());
    9000           8 :     return DAG.getNode(ISD::AND, DL, VT,
    9001           8 :                        X, DAG.getConstant(Mask, DL, VT));
    9002             :   }
    9003             : 
    9004             :   // fold (aext (load x)) -> (aext (truncate (extload x)))
    9005             :   // None of the supported targets knows how to perform load and any_ext
    9006             :   // on vectors in one instruction.  We only perform this transformation on
    9007             :   // scalars.
    9008       15059 :   if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
    9009       14875 :       ISD::isUNINDEXEDLoad(N0.getNode()) &&
    9010       15011 :       TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
    9011             :     bool DoXform = true;
    9012             :     SmallVector<SDNode*, 4> SetCCs;
    9013       14670 :     if (!N0.hasOneUse())
    9014       10994 :       DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs,
    9015             :                                         TLI);
    9016       10994 :     if (DoXform) {
    9017             :       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
    9018       28980 :       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
    9019             :                                        LN0->getChain(),
    9020             :                                        LN0->getBasePtr(), N0.getValueType(),
    9021       14568 :                                        LN0->getMemOperand());
    9022       14490 :       ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
    9023             :       // If the load value is used only by N, replace it via CombineTo N.
    9024             :       bool NoReplaceTrunc = N0.hasOneUse();
    9025       14490 :       CombineTo(N, ExtLoad);
    9026       14490 :       if (NoReplaceTrunc) {
    9027        7352 :         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
    9028             :       } else {
    9029       10814 :         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
    9030       21628 :                                     N0.getValueType(), ExtLoad);
    9031             :         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
    9032             :       }
    9033       14490 :       return SDValue(N, 0); // Return N so it doesn't get rechecked!
    9034             :     }
    9035             :   }
    9036             : 
    9037             :   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
    9038             :   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
    9039             :   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
    9040             :   if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
    9041       66472 :       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
    9042             :     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
    9043             :     ISD::LoadExtType ExtType = LN0->getExtensionType();
    9044         263 :     EVT MemVT = LN0->getMemoryVT();
    9045         308 :     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
    9046         239 :       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
    9047             :                                        VT, LN0->getChain(), LN0->getBasePtr(),
    9048         239 :                                        MemVT, LN0->getMemOperand());
    9049         239 :       CombineTo(N, ExtLoad);
    9050         478 :       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
    9051         239 :       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
    9052             :     }
    9053             :   }
    9054             : 
    9055       65798 :   if (N0.getOpcode() == ISD::SETCC) {
    9056             :     // For vectors:
    9057             :     // aext(setcc) -> vsetcc
    9058             :     // aext(setcc) -> truncate(vsetcc)
    9059             :     // aext(setcc) -> aext(vsetcc)
    9060             :     // Only do this before legalize for now.
    9061        3230 :     if (VT.isVector() && !LegalOperations) {
    9062         948 :       EVT N00VT = N0.getOperand(0).getValueType();
    9063         948 :       if (getSetCCResultType(N00VT) == N0.getValueType())
    9064          89 :         return SDValue();
    9065             : 
    9066             :       // We know that the # elements of the results is the same as the
    9067             :       // # elements of the compare (and the # elements of the compare result
    9068             :       // for that matter).  Check to see that they are the same size.  If so,
    9069             :       // we know that the element size of the sext'd result matches the
    9070             :       // element size of the compare operands.
    9071         385 :       if (VT.getSizeInBits() == N00VT.getSizeInBits())
    9072         558 :         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
    9073             :                              N0.getOperand(1),
    9074         279 :                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
    9075             :       // If the desired elements are smaller or larger than the source
    9076             :       // elements we can use a matching integer vector type and then
    9077             :       // truncate/any extend
    9078             :       else {
    9079         106 :         EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
    9080             :         SDValue VsetCC =
    9081         106 :           DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
    9082             :                         N0.getOperand(1),
    9083         106 :                         cast<CondCodeSDNode>(N0.getOperand(2))->get());
    9084         212 :         return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
    9085             :       }
    9086             :     }
    9087             : 
    9088             :     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
    9089             :     SDLoc DL(N);
    9090        2756 :     if (SDValue SCC = SimplifySelectCC(
    9091        2756 :             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
    9092        2756 :             DAG.getConstant(0, DL, VT),
    9093        2756 :             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
    9094         138 :       return SCC;
    9095             :   }
    9096             : 
    9097       65186 :   return SDValue();
    9098             : }
    9099             : 
    9100           0 : SDValue DAGCombiner::visitAssertExt(SDNode *N) {
    9101           0 :   unsigned Opcode = N->getOpcode();
    9102           0 :   SDValue N0 = N->getOperand(0);
    9103           0 :   SDValue N1 = N->getOperand(1);
    9104           0 :   EVT AssertVT = cast<VTSDNode>(N1)->getVT();
    9105             : 
    9106             :   // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
    9107           0 :   if (N0.getOpcode() == Opcode &&
    9108             :       AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
    9109           0 :     return N0;
    9110             : 
    9111           0 :   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
    9112           0 :       N0.getOperand(0).getOpcode() == Opcode) {
    9113             :     // We have an assert, truncate, assert sandwich. Make one stronger assert
    9114             :     // by asserting on the smallest asserted type to the larger source type.
    9115             :     // This eliminates the later assert:
    9116             :     // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
    9117             :     // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
    9118           0 :     SDValue BigA = N0.getOperand(0);
    9119           0 :     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
    9120             :     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
    9121             :            "Asserting zero/sign-extended bits to a type larger than the "
    9122             :            "truncated destination does not provide information");
    9123             : 
    9124             :     SDLoc DL(N);
    9125           0 :     EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
    9126           0 :     SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
    9127           0 :     SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
    9128           0 :                                     BigA.getOperand(0), MinAssertVTVal);
    9129           0 :     return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
    9130             :   }
    9131             : 
    9132           0 :   return SDValue();
    9133             : }
    9134             : 
    9135             : /// If the result of a wider load is shifted to right of N  bits and then
    9136             : /// truncated to a narrower type and where N is a multiple of number of bits of
    9137             : /// the narrower type, transform it to a narrower load from address + N / num of
    9138             : /// bits of new type. Also narrow the load if the result is masked with an AND
    9139             : /// to effectively produce a smaller type. If the result is to be extended, also
    9140             : /// fold the extension to form a extending load.
    9141     1115566 : SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
    9142     1115566 :   unsigned Opc = N->getOpcode();
    9143             : 
    9144             :   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
    9145     1115566 :   SDValue N0 = N->getOperand(0);
    9146     1115566 :   EVT VT = N->getValueType(0);
    9147     1115566 :   EVT ExtVT = VT;
    9148             : 
    9149             :   // This transformation isn't valid for vector loads.
    9150     1115566 :   if (VT.isVector())
    9151       16240 :     return SDValue();
    9152             : 
    9153             :   unsigned ShAmt = 0;
    9154             :   bool HasShiftedOffset = false;
    9155             :   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
    9156             :   // extended to VT.
    9157     1099326 :   if (Opc == ISD::SIGN_EXTEND_INREG) {
    9158             :     ExtType = ISD::SEXTLOAD;
    9159       41766 :     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
    9160     1057560 :   } else if (Opc == ISD::SRL) {
    9161             :     // Another special-case: SRL is basically zero-extending a narrower value,
    9162             :     // or it maybe shifting a higher subword, half or byte into the lowest
    9163             :     // bits.
    9164             :     ExtType = ISD::ZEXTLOAD;
    9165      138786 :     N0 = SDValue(N, 0);
    9166             : 
    9167             :     auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
    9168             :     auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
    9169      138786 :     if (!N01 || !LN0)
    9170      116385 :       return SDValue();
    9171             : 
    9172       22401 :     uint64_t ShiftAmt = N01->getZExtValue();
    9173       22401 :     uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits();
    9174       22401 :     if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
    9175       22232 :       ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
    9176             :     else
    9177         169 :       ExtVT = EVT::getIntegerVT(*DAG.getContext(),
    9178         169 :                                 VT.getSizeInBits() - ShiftAmt);
    9179      918774 :   } else if (Opc == ISD::AND) {
    9180             :     // An AND with a constant mask is the same as a truncate + zero-extend.
    9181             :     auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
    9182             :     if (!AndC)
    9183           0 :       return SDValue();
    9184             : 
    9185       80218 :     const APInt &Mask = AndC->getAPIntValue();
    9186             :     unsigned ActiveBits = 0;
    9187       80218 :     if (Mask.isMask()) {
    9188             :       ActiveBits = Mask.countTrailingOnes();
    9189        8824 :     } else if (Mask.isShiftedMask()) {
    9190        7785 :       ShAmt = Mask.countTrailingZeros();
    9191        7785 :       APInt ShiftedMask = Mask.lshr(ShAmt);
    9192             :       ActiveBits = ShiftedMask.countTrailingOnes();
    9193             :       HasShiftedOffset = true;
    9194             :     } else
    9195        1039 :       return SDValue();
    9196             : 
    9197             :     ExtType = ISD::ZEXTLOAD;
    9198       79179 :     ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
    9199             :   }
    9200             : 
    9201     1963804 :   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
    9202       63686 :     SDValue SRL = N0;
    9203             :     if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
    9204       62198 :       ShAmt = ConstShift->getZExtValue();
    9205       62198 :       unsigned EVTBits = ExtVT.getSizeInBits();
    9206             :       // Is the shift amount a multiple of size of VT?
    9207       62198 :       if ((ShAmt & (EVTBits-1)) == 0) {
    9208       53583 :         N0 = N0.getOperand(0);
    9209             :         // Is the load width a multiple of size of VT?
    9210       53583 :         if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
    9211          21 :           return SDValue();
    9212             :       }
    9213             : 
    9214             :       // At this point, we must have a load or else we can't do the transform.
    9215       62177 :       if (!isa<LoadSDNode>(N0)) return SDValue();
    9216             : 
    9217             :       auto *LN0 = cast<LoadSDNode>(N0);
    9218             : 
    9219             :       // Because a SRL must be assumed to *need* to zero-extend the high bits
    9220             :       // (as opposed to anyext the high bits), we can't combine the zextload
    9221             :       // lowering of SRL and an sextload.
    9222       22980 :       if (LN0->getExtensionType() == ISD::SEXTLOAD)
    9223         188 :         return SDValue();
    9224             : 
    9225             :       // If the shift amount is larger than the input type then we're not
    9226             :       // accessing any of the loaded bytes.  If the load was a zextload/extload
    9227             :       // then the result of the shift+trunc is zero/undef (handled elsewhere).
    9228       22792 :       if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
    9229           4 :         return SDValue();
    9230             : 
    9231             :       // If the SRL is only used by a masking AND, we may be able to adjust
    9232             :       // the ExtVT to make the AND redundant.
    9233       22788 :       SDNode *Mask = *(SRL->use_begin());
    9234       22788 :       if (Mask->getOpcode() == ISD::AND &&
    9235        7819 :           isa<ConstantSDNode>(Mask->getOperand(1))) {
    9236             :         const APInt &ShiftMask =
    9237        7812 :           cast<ConstantSDNode>(Mask->getOperand(1))->getAPIntValue();
    9238        7812 :         if (ShiftMask.isMask()) {
    9239        6141 :           EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
    9240        6141 :                                            ShiftMask.countTrailingOnes());
    9241             :           // If the mask is smaller, recompute the type.
    9242        9738 :           if ((ExtVT.getSizeInBits() > MaskedVT.getSizeInBits()) &&
    9243       10254 :               TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
    9244         773 :             ExtVT = MaskedVT;
    9245             :         }
    9246             :       }
    9247             :     }
    9248             :   }
    9249             : 
    9250             :   // If the load is shifted left (and the result isn't shifted back right),
    9251             :   // we can fold the truncate through the shift.
    9252             :   unsigned ShLeftAmt = 0;
    9253     1823842 :   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
    9254      947018 :       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
    9255         393 :     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
    9256           9 :       ShLeftAmt = N01->getZExtValue();
    9257           9 :       N0 = N0.getOperand(0);
    9258             :     }
    9259             :   }
    9260             : 
    9261             :   // If we haven't found a load, we can't narrow it.
    9262      942492 :   if (!isa<LoadSDNode>(N0))
    9263      792923 :     return SDValue();
    9264             : 
    9265      149569 :   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
    9266      149569 :   if (!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
    9267      146449 :     return SDValue();
    9268             : 
    9269             :   auto AdjustBigEndianShift = [&](unsigned ShAmt) {
    9270             :     unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
    9271             :     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
    9272             :     return LVTStoreBits - EVTStoreBits - ShAmt;
    9273             :   };
    9274             : 
    9275             :   // For big endian targets, we need to adjust the offset to the pointer to
    9276             :   // load the correct bytes.
    9277        3120 :   if (DAG.getDataLayout().isBigEndian())
    9278         167 :     ShAmt = AdjustBigEndianShift(ShAmt);
    9279             : 
    9280        6240 :   EVT PtrType = N0.getOperand(1).getValueType();
    9281        3120 :   uint64_t PtrOff = ShAmt / 8;
    9282        3120 :   unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
    9283        3120 :   SDLoc DL(LN0);
    9284             :   // The original load itself didn't wrap, so an offset within it doesn't.
    9285             :   SDNodeFlags Flags;
    9286             :   Flags.setNoUnsignedWrap(true);
    9287        3120 :   SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
    9288        3120 :                                PtrType, LN0->getBasePtr(),
    9289             :                                DAG.getConstant(PtrOff, DL, PtrType),
    9290        3120 :                                Flags);
    9291        3120 :   AddToWorklist(NewPtr.getNode());
    9292             : 
    9293             :   SDValue Load;
    9294        3120 :   if (ExtType == ISD::NON_EXTLOAD)
    9295        5022 :     Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
    9296             :                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
    9297       12555 :                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
    9298             :   else
    9299        1218 :     Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr,
    9300             :                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
    9301         609 :                           NewAlign, LN0->getMemOperand()->getFlags(),
    9302        3045 :                           LN0->getAAInfo());
    9303             : 
    9304             :   // Replace the old load's chain with the new load's chain.
    9305             :   WorklistRemover DeadNodes(*this);
    9306        3120 :   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
    9307             : 
    9308             :   // Shift the result left, if we've swallowed a left shift.
    9309        3120 :   SDValue Result = Load;
    9310        3120 :   if (ShLeftAmt != 0) {
    9311           9 :     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
    9312           9 :     if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
    9313           0 :       ShImmTy = VT;
    9314             :     // If the shift amount is as large as the result size (but, presumably,
    9315             :     // no larger than the source) then the useful bits of the result are
    9316             :     // zero; we can't simply return the shortened shift, because the result
    9317             :     // of that operation is undefined.
    9318             :     SDLoc DL(N0);
    9319           9 :     if (ShLeftAmt >= VT.getSizeInBits())
    9320           7 :       Result = DAG.getConstant(0, DL, VT);
    9321             :     else
    9322           2 :       Result = DAG.getNode(ISD::SHL, DL, VT,
    9323           2 :                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
    9324             :   }
    9325             : 
    9326        3120 :   if (HasShiftedOffset) {
    9327             :     // Recalculate the shift amount after it has been altered to calculate
    9328             :     // the offset.
    9329          80 :     if (DAG.getDataLayout().isBigEndian())
    9330          16 :       ShAmt = AdjustBigEndianShift(ShAmt);
    9331             : 
    9332             :     // We're using a shifted mask, so the load now has an offset. This means we
    9333             :     // now need to shift right the mask to match the new load and then shift
    9334             :     // right the result of the AND.
    9335         160 :     const APInt &Mask = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
    9336          80 :     APInt ShiftedMask = Mask.lshr(ShAmt);
    9337          80 :     DAG.UpdateNodeOperands(N, Result, DAG.getConstant(ShiftedMask, DL, VT));
    9338          80 :     SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
    9339          80 :     SDValue Shifted = DAG.getNode(ISD::SHL, DL, VT, SDValue(N, 0),
    9340          80 :                                   ShiftC);
    9341         160 :     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Shifted);
    9342         160 :     DAG.UpdateNodeOperands(Shifted.getNode(), SDValue(N, 0), ShiftC);
    9343             :   }
    9344             :   // Return the new loaded value.
    9345        3120 :   return Result;
    9346             : }
    9347             : 
    9348       46974 : SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
    9349       46974 :   SDValue N0 = N->getOperand(0);
    9350       46974 :   SDValue N1 = N->getOperand(1);
    9351       46974 :   EVT VT = N->getValueType(0);
    9352       46974 :   EVT EVT = cast<VTSDNode>(N1)->getVT();
    9353             :   unsigned VTBits = VT.getScalarSizeInBits();
    9354             :   unsigned EVTBits = EVT.getScalarSizeInBits();
    9355             : 
    9356       46974 :   if (N0.isUndef())
    9357           1 :     return DAG.getUNDEF(VT);
    9358             : 
    9359             :   // fold (sext_in_reg c1) -> c1
    9360       46973 :   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
    9361          24 :     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
    9362             : 
    9363             :   // If the input is already sign extended, just drop the extension.
    9364       46961 :   if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
    9365        1473 :     return N0;
    9366             : 
    9367             :   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
    9368       45488 :   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
    9369           4 :       EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
    9370           4 :     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
    9371           8 :                        N0.getOperand(0), N1);
    9372             : 
    9373             :   // fold (sext_in_reg (sext x)) -> (sext x)
    9374             :   // fold (sext_in_reg (aext x)) -> (sext x)
    9375             :   // if x is small enough.
    9376       45484 :   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
    9377        9486 :     SDValue N00 = N0.getOperand(0);
    9378        9486 :     if (N00.getScalarValueSizeInBits() <= EVTBits &&
    9379           4 :         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
    9380           4 :       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
    9381             :   }
    9382             : 
    9383             :   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
    9384       45468 :   if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
    9385       45468 :        N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
    9386       45496 :        N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
    9387          14 :       N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
    9388           0 :     if (!LegalOperations ||
    9389           0 :         TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
    9390           0 :       return DAG.getSignExtendVectorInReg(N0.getOperand(0), SDLoc(N), VT);
    9391             :   }
    9392             : 
    9393             :   // fold (sext_in_reg (zext x)) -> (sext x)
    9394             :   // iff we are extending the source sign bit.
    9395       45482 :   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
    9396           6 :     SDValue N00 = N0.getOperand(0);
    9397           6 :     if (N00.getScalarValueSizeInBits() == EVTBits &&
    9398           4 :         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
    9399           8 :       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
    9400             :   }
    9401             : 
    9402             :   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
    9403       90956 :   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1)))
    9404           6 :     return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
    9405             : 
    9406             :   // fold operands of sext_in_reg based on knowledge that the top bits are not
    9407             :   // demanded.
    9408       45476 :   if (SimplifyDemandedBits(SDValue(N, 0)))
    9409        2118 :     return SDValue(N, 0);
    9410             : 
    9411             :   // fold (sext_in_reg (load x)) -> (smaller sextload x)
    9412             :   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
    9413       43358 :   if (SDValue NarrowLoad = ReduceLoadWidth(N))
    9414         206 :     return NarrowLoad;
    9415             : 
    9416             :   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
    9417             :   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
    9418             :   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
    9419       43152 :   if (N0.getOpcode() == ISD::SRL) {
    9420             :     if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
    9421       20196 :       if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
    9422             :         // We can turn this into an SRA iff the input to the SRL is already sign
    9423             :         // extended enough.
    9424       20194 :         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
    9425       20194 :         if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
    9426        2358 :           return DAG.getNode(ISD::SRA, SDLoc(N), VT,
    9427        4716 :                              N0.getOperand(0), N0.getOperand(1));
    9428             :       }
    9429             :   }
    9430             : 
    9431             :   // fold (sext_inreg (extload x)) -> (sextload x)
    9432             :   // If sextload is not supported by target, we can only do the combine when
    9433             :   // load has one use. Doing otherwise can block folding the extload with other
    9434             :   // extends that the target does support.
    9435             :   if (ISD::isEXTLoad(N0.getNode()) &&
    9436          40 :       ISD::isUNINDEXEDLoad(N0.getNode()) &&
    9437        1700 :       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
    9438        2412 :       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile() &&
    9439        1043 :         N0.hasOneUse()) ||
    9440        1061 :        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
    9441             :     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
    9442         727 :     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
    9443             :                                      LN0->getChain(),
    9444             :                                      LN0->getBasePtr(), EVT,
    9445         728 :                                      LN0->getMemOperand());
    9446         727 :     CombineTo(N, ExtLoad);
    9447             :     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
    9448         727 :     AddToWorklist(ExtLoad.getNode());
    9449         727 :     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
    9450             :   }
    9451             :   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
    9452          73 :   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
    9453           0 :       N0.hasOneUse() &&
    9454           0 :       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
    9455           0 :       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
    9456           0 :        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
    9457             :     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
    9458           0 :     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
    9459             :                                      LN0->getChain(),
    9460             :                                      LN0->getBasePtr(), EVT,
    9461           0 :                                      LN0->getMemOperand());
    9462           0 :     CombineTo(N, ExtLoad);
    9463             :     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
    9464           0 :     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
    9465             :   }
    9466             : 
    9467             :   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
    9468       40067 :   if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
    9469          58 :     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
    9470          58 :                                            N0.getOperand(1), false))
    9471           8 :       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
    9472          16 :                          BSwap, N1);
    9473             :   }
    9474             : 
    9475       40059 :   return SDValue();
    9476             : }
    9477             : 
    9478        3049 : SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
    9479        3049 :   SDValue N0 = N->getOperand(0);
    9480        6098 :   EVT VT = N->getValueType(0);
    9481             : 
    9482        3049 :   if (N0.isUndef())
    9483           0 :     return DAG.getUNDEF(VT);
    9484             : 
    9485        3049 :   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
    9486        3049 :                                               LegalOperations))
    9487          20 :     return SDValue(Res, 0);
    9488             : 
    9489        3029 :   return SDValue();
    9490             : }
    9491             : 
    9492        5864 : SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
    9493        5864 :   SDValue N0 = N->getOperand(0);
    9494       11728 :   EVT VT = N->getValueType(0);
    9495             : 
    9496        5864 :   if (N0.isUndef())
    9497           0 :     return DAG.getUNDEF(VT);
    9498             : 
    9499        5864 :   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
    9500        5864 :                                               LegalOperations))
    9501          22 :     return SDValue(Res, 0);
    9502             : 
    9503        5842 :   return SDValue();
    9504             : }
    9505             : 
    9506      858614 : SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
    9507      858614 :   SDValue N0 = N->getOperand(0);
    9508      858614 :   EVT VT = N->getValueType(0);
    9509      858614 :   bool isLE = DAG.getDataLayout().isLittleEndian();
    9510             : 
    9511             :   // noop truncate
    9512     1717674 :   if (N0.getValueType() == N->getValueType(0))
    9513           0 :     return N0;
    9514             : 
    9515             :   // fold (truncate (truncate x)) -> (truncate x)
    9516      858614 :   if (N0.getOpcode() == ISD::TRUNCATE)
    9517        4587 :     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
    9518             : 
    9519             :   // fold (truncate c1) -> c1
    9520      856331 :   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
    9521        3355 :     SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
    9522        3355 :     if (C.getNode() != N)
    9523        3332 :       return C;
    9524             :   }
    9525             : 
    9526             :   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
    9527      852999 :   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
    9528     1703424 :       N0.getOpcode() == ISD::SIGN_EXTEND ||
    9529             :       N0.getOpcode() == ISD::ANY_EXTEND) {
    9530             :     // if the source is smaller than the dest, we still need an extend.
    9531       12544 :     if (N0.getOperand(0).getValueType().bitsLT(VT))
    9532        2422 :       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
    9533             :     // if the source is larger than the dest, than we just need the truncate.
    9534        5061 :     if (N0.getOperand(0).getValueType().bitsGT(VT))
    9535        2276 :       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
    9536             :     // if the source and dest are the same type, we can drop both the extend
    9537             :     // and the truncate.
    9538        3967 :     return N0.getOperand(0);
    9539             :   }
    9540             : 
    9541             :   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
    9542      831141 :   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
    9543        1931 :     return SDValue();
    9544             : 
    9545             :   // Fold extract-and-trunc into a narrow extract. For example:
    9546             :   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
    9547             :   //   i32 y = TRUNCATE(i64 x)
    9548             :   //        -- becomes --
    9549             :   //   v16i8 b = BITCAST (v2i64 val)
    9550             :   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
    9551             :   //
    9552             :   // Note: We only run this optimization after type legalization (which often
    9553             :   // creates this pattern) and before operation legalization after which
    9554             :   // we need to be more careful about the vector instructions that we generate.
    9555       17435 :   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
    9556      852112 :       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
    9557        7316 :     EVT VecTy = N0.getOperand(0).getValueType();
    9558        7316 :     EVT ExTy = N0.getValueType();
    9559       14632 :     EVT TrTy = N->getValueType(0);
    9560             : 
    9561             :     unsigned NumElem = VecTy.getVectorNumElements();
    9562        7316 :     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
    9563             : 
    9564        7316 :     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
    9565             :     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
    9566             : 
    9567        7316 :     SDValue EltNo = N0->getOperand(1);
    9568        7316 :     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
    9569        7102 :       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
    9570        7102 :       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
    9571        7102 :       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
    9572             : 
    9573             :       SDLoc DL(N);
    9574        7102 :       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
    9575        7102 :                          DAG.getBitcast(NVT, N0.getOperand(0)),
    9576        7102 :                          DAG.getConstant(Index, DL, IndexTy));
    9577             :     }
    9578             :   }
    9579             : 
    9580             :   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
    9581     1675388 :   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
    9582         124 :     EVT SrcVT = N0.getValueType();
    9583         248 :     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
    9584         124 :         TLI.isTruncateFree(SrcVT, VT)) {
    9585             :       SDLoc SL(N0);
    9586          62 :       SDValue Cond = N0.getOperand(0);
    9587         124 :       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
    9588         186 :       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
    9589         126 :       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
    9590             :     }
    9591             :   }
    9592             : 
    9593             :   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
    9594      837632 :   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
    9595      840182 :       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
    9596        1275 :       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
    9597        1888 :     SDValue Amt = N0.getOperand(1);
    9598         442 :     KnownBits Known;
    9599         944 :     DAG.computeKnownBits(Amt, Known);
    9600             :     unsigned Size = VT.getScalarSizeInBits();
    9601        1888 :     if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
    9602             :       SDLoc SL(N);
    9603         502 :       EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
    9604             : 
    9605        1506 :       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
    9606         502 :       if (AmtVT != Amt.getValueType()) {
    9607           7 :         Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
    9608           7 :         AddToWorklist(Amt.getNode());
    9609             :       }
    9610        1004 :       return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
    9611             :     }
    9612             :   }
    9613             : 
    9614             :   // Fold a series of buildvector, bitcast, and truncate if possible.
    9615             :   // For example fold
    9616             :   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
    9617             :   //   (2xi32 (buildvector x, y)).
    9618       13549 :   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
    9619        1181 :       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
    9620      837161 :       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
    9621           1 :       N0.getOperand(0).hasOneUse()) {
    9622           2 :     SDValue BuildVect = N0.getOperand(0);
    9623           1 :     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
    9624           1 :     EVT TruncVecEltTy = VT.getVectorElementType();
    9625             : 
    9626             :     // Check that the element types match.
    9627           1 :     if (BuildVectEltTy == TruncVecEltTy) {
    9628             :       // Now we only need to compute the offset of the truncated elements.
    9629             :       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
    9630             :       unsigned TruncVecNumElts = VT.getVectorNumElements();
    9631           1 :       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
    9632             : 
    9633             :       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
    9634             :              "Invalid number of elements");
    9635             : 
    9636             :       SmallVector<SDValue, 8> Opnds;
    9637           3 :       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
    9638           2 :         Opnds.push_back(BuildVect.getOperand(i));
    9639             : 
    9640           2 :       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
    9641             :     }
    9642             :   }
    9643             : 
    9644             :   // See if we can simplify the input to this truncate through knowledge that
    9645             :   // only the low bits are being used.
    9646             :   // For example "trunc (or (shl x, 8), y)" // -> trunc y
    9647             :   // Currently we only perform this optimization on scalars because vectors
    9648             :   // may have different active low bits.
    9649      837129 :   if (!VT.isVector()) {
    9650             :     APInt Mask =
    9651      826969 :         APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
    9652      826969 :     if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
    9653        3195 :       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
    9654             :   }
    9655             : 
    9656             :   // fold (truncate (load x)) -> (smaller load x)
    9657             :   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
    9658      835539 :   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
    9659      816111 :     if (SDValue Reduced = ReduceLoadWidth(N))
    9660        2444 :       return Reduced;
    9661             : 
    9662             :     // Handle the case where the load remains an extending load even
    9663             :     // after truncation.
    9664      813667 :     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
    9665             :       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
    9666       12528 :       if (!LN0->isVolatile() &&
    9667       24941 :           LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
    9668         548 :         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
    9669             :                                          VT, LN0->getChain(), LN0->getBasePtr(),
    9670             :                                          LN0->getMemoryVT(),
    9671         274 :                                          LN0->getMemOperand());
    9672         274 :         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
    9673         274 :         return NewLoad;
    9674             :       }
    9675             :     }
    9676             :   }
    9677             : 
    9678             :   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
    9679             :   // where ... are all 'undef'.
    9680     1665642 :   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
    9681             :     SmallVector<EVT, 8> VTs;
    9682             :     SDValue V;
    9683             :     unsigned Idx = 0;
    9684             :     unsigned NumDefs = 0;
    9685             : 
    9686         398 :     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
    9687         776 :       SDValue X = N0.getOperand(i);
    9688         388 :       if (!X.isUndef()) {
    9689         362 :         V = X;
    9690             :         Idx = i;
    9691         362 :         NumDefs++;
    9692             :       }
    9693             :       // Stop if more than one members are non-undef.
    9694         388 :       if (NumDefs > 1)
    9695             :         break;
    9696         212 :       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
    9697             :                                      VT.getVectorElementType(),
    9698         636 :                                      X.getValueType().getVectorNumElements()));
    9699             :     }
    9700             : 
    9701         186 :     if (NumDefs == 0)
    9702           0 :       return DAG.getUNDEF(VT);
    9703             : 
    9704         186 :     if (NumDefs == 1) {
    9705             :       assert(V.getNode() && "The single defined operand is empty!");
    9706             :       SmallVector<SDValue, 8> Opnds;
    9707          46 :       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
    9708          36 :         if (i != Idx) {
    9709          52 :           Opnds.push_back(DAG.getUNDEF(VTs[i]));
    9710          26 :           continue;
    9711             :         }
    9712          20 :         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
    9713          10 :         AddToWorklist(NV.getNode());
    9714          10 :         Opnds.push_back(NV);
    9715             :       }
    9716          20 :       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
    9717             :     }
    9718             :   }
    9719             : 
    9720             :   // Fold truncate of a bitcast of a vector to an extract of the low vector
    9721             :   // element.
    9722             :   //
    9723             :   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
    9724     1682853 :   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
    9725       17014 :     SDValue VecSrc = N0.getOperand(0);
    9726       17014 :     EVT SrcVT = VecSrc.getValueType();
    9727       28694 :     if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
    9728       11680 :         (!LegalOperations ||
    9729       10767 :          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
    9730             :       SDLoc SL(N);
    9731             : 
    9732       10483 :       EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
    9733       10486 :       unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1;
    9734       10483 :       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
    9735       10483 :                          VecSrc, DAG.getConstant(Idx, SL, IdxVT));
    9736             :     }
    9737             :   }
    9738             : 
    9739             :   // Simplify the operands using demanded-bits information.
    9740     1634507 :   if (!VT.isVector() &&
    9741      812179 :       SimplifyDemandedBits(SDValue(N, 0)))
    9742       12314 :     return SDValue(N, 0);
    9743             : 
    9744             :   // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
    9745             :   // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
    9746             :   // When the adde's carry is not used.
    9747      810037 :   if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
    9748      810058 :       N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
    9749          21 :       (!LegalOperations || TLI.isOperationLegal(N0.getOpcode(), VT))) {
    9750             :     SDLoc SL(N);
    9751          60 :     auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
    9752          60 :     auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
    9753          40 :     auto VTs = DAG.getVTList(VT, N0->getValueType(1));
    9754          40 :     return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
    9755             :   }
    9756             : 
    9757             :   // fold (truncate (extract_subvector(ext x))) ->
    9758             :   //      (extract_subvector x)
    9759             :   // TODO: This can be generalized to cover cases where the truncate and extract
    9760             :   // do not fully cancel each other out.
    9761      809994 :   if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
    9762         153 :     SDValue N00 = N0.getOperand(0);
    9763          25 :     if (N00.getOpcode() == ISD::SIGN_EXTEND ||
    9764         170 :         N00.getOpcode() == ISD::ZERO_EXTEND ||
    9765             :         N00.getOpcode() == ISD::ANY_EXTEND) {
    9766         272 :       if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
    9767             :           VT.getVectorElementType())
    9768         136 :         return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
    9769         272 :                            N00.getOperand(0), N0.getOperand(1));
    9770             :     }
    9771             :   }
    9772             : 
    9773      809858 :   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
    9774           4 :     return NewVSel;
    9775             : 
    9776      809854 :   return SDValue();
    9777             : }
    9778             : 
    9779             : static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
    9780       58415 :   SDValue Elt = N->getOperand(i);
    9781      116830 :   if (Elt.getOpcode() != ISD::MERGE_VALUES)
    9782             :     return Elt.getNode();
    9783         513 :   return Elt.getOperand(Elt.getResNo()).getNode();
    9784             : }
    9785             : 
    9786             : /// build_pair (load, load) -> load
    9787             : /// if load locations are consecutive.
    9788       58415 : SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
    9789             :   assert(N->getOpcode() == ISD::BUILD_PAIR);
    9790             : 
    9791             :   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
    9792             :   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
    9793             : 
    9794             :   // A BUILD_PAIR is always having the least significant part in elt 0 and the
    9795             :   // most significant part in elt 1. So when combining into one large load, we
    9796             :   // need to consider the endianness.
    9797       58415 :   if (DAG.getDataLayout().isBigEndian())
    9798             :     std::swap(LD1, LD2);
    9799             : 
    9800       61036 :   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
    9801             :       LD1->getAddressSpace() != LD2->getAddressSpace())
    9802       55794 :     return SDValue();
    9803        5242 :   EVT LD1VT = LD1->getValueType(0);
    9804             :   unsigned LD1Bytes = LD1VT.getStoreSize();
    9805        2621 :   if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
    9806        2621 :       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
    9807        2483 :     unsigned Align = LD1->getAlignment();
    9808        4966 :     unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
    9809        2483 :         VT.getTypeForEVT(*DAG.getContext()));
    9810             : 
    9811        2483 :     if (NewAlign <= Align &&
    9812        2256 :         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
    9813        2256 :       return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
    9814        4512 :                          LD1->getPointerInfo(), Align);
    9815             :   }
    9816             : 
    9817         365 :   return SDValue();
    9818             : }
    9819             : 
    9820             : static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
    9821             :   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
    9822             :   // and Lo parts; on big-endian machines it doesn't.
    9823          10 :   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
    9824             : }
    9825             : 
    9826      679701 : static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
    9827             :                                     const TargetLowering &TLI) {
    9828             :   // If this is not a bitcast to an FP type or if the target doesn't have
    9829             :   // IEEE754-compliant FP logic, we're done.
    9830      679701 :   EVT VT = N->getValueType(0);
    9831      679701 :   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
    9832      625359 :     return SDValue();
    9833             : 
    9834             :   // TODO: Handle cases where the integer constant is a different scalar
    9835             :   // bitwidth to the FP.
    9836       54342 :   SDValue N0 = N->getOperand(0);
    9837       54342 :   EVT SourceVT = N0.getValueType();
    9838       54342 :   if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
    9839       19441 :     return SDValue();
    9840             : 
    9841             :   unsigned FPOpcode;
    9842             :   APInt SignMask;
    9843       34901 :   switch (N0.getOpcode()) {
    9844        2099 :   case ISD::AND:
    9845             :     FPOpcode = ISD::FABS;
    9846        2099 :     SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
    9847        2099 :     break;
    9848         689 :   case ISD::XOR:
    9849             :     FPOpcode = ISD::FNEG;
    9850         689 :     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
    9851         689 :     break;
    9852         971 :   case ISD::OR:
    9853             :     FPOpcode = ISD::FABS;
    9854         971 :     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
    9855         971 :     break;
    9856       31142 :   default:
    9857       31142 :     return SDValue();
    9858             :   }
    9859             : 
    9860             :   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
    9861             :   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
    9862             :   // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
    9863             :   //   fneg (fabs X)
    9864        3759 :   SDValue LogicOp0 = N0.getOperand(0);
    9865        3759 :   ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
    9866         650 :   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
    9867        3759 :       LogicOp0.getOpcode() == ISD::BITCAST &&
    9868          98 :       LogicOp0.getOperand(0).getValueType() == VT) {
    9869          98 :     SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
    9870             :     NumFPLogicOpsConv++;
    9871          49 :     if (N0.getOpcode() == ISD::OR)
    9872          20 :       return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
    9873          39 :     return FPOp;
    9874             :   }
    9875             : 
    9876        3710 :   return SDValue();
    9877             : }
    9878             : 
    9879      821241 : SDValue DAGCombiner::visitBITCAST(SDNode *N) {
    9880      821241 :   SDValue N0 = N->getOperand(0);
    9881     1642482 :   EVT VT = N->getValueType(0);
    9882             : 
    9883      821241 :   if (N0.isUndef())
    9884         126 :     return DAG.getUNDEF(VT);
    9885             : 
    9886             :   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
    9887             :   // Only do this before legalize types, since we might create an illegal
    9888             :   // scalar type. Even if we knew we wouldn't create an illegal scalar type
    9889             :   // we can only do this before legalize ops, since the target maybe
    9890             :   // depending on the bitcast.
    9891             :   // First check to see if this is all constant.
    9892      151854 :   if (!LegalTypes &&
    9893       90104 :       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
    9894      910331 :       VT.isVector() && cast<BuildVectorSDNode>(N0)->isConstant())
    9895             :     return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
    9896       88248 :                                              VT.getVectorElementType());
    9897             : 
    9898             :   // If the input is a constant, let getNode fold it.
    9899             :   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
    9900             :     // If we can't allow illegal operations, we need to check that this is just
    9901             :     // a fp -> int or int -> conversion and that the resulting operation will
    9902             :     // be legal.
    9903         941 :     if (!LegalOperations ||
    9904         740 :         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
    9905         949 :          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
    9906         100 :         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
    9907          27 :          TLI.isOperationLegal(ISD::Constant, VT))) {
    9908         256 :       SDValue C = DAG.getBitcast(VT, N0);
    9909         256 :       if (C.getNode() != N)
    9910          98 :         return C;
    9911             :     }
    9912             :   }
    9913             : 
    9914             :   // (conv (conv x, t1), t2) -> (conv x, t2)
    9915      732769 :   if (N0.getOpcode() == ISD::BITCAST)
    9916       91980 :     return DAG.getBitcast(VT, N0.getOperand(0));
    9917             : 
    9918             :   // fold (conv (load x)) -> (load (conv*)x)
    9919             :   // If the resultant load doesn't need a higher alignment than the original!
    9920      196110 :   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
    9921             :       // Do not remove the cast if the types differ in endian layout.
    9922       96318 :       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
    9923       96318 :           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
    9924             :       // If the load is volatile, we only want to change the load type if the
    9925             :       // resulting load is legal. Otherwise we might increase the number of
    9926             :       // memory accesses. We don't care if the original type was legal or not
    9927             :       // as we assume software couldn't rely on the number of accesses of an
    9928             :       // illegal type.
    9929       96316 :       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
    9930       99152 :        TLI.isOperationLegal(ISD::LOAD, VT)) &&
    9931       31850 :       TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
    9932             :     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
    9933        7177 :     unsigned OrigAlign = LN0->getAlignment();
    9934             : 
    9935        7177 :     bool Fast = false;
    9936        7177 :     if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
    9937        7177 :                                LN0->getAddressSpace(), OrigAlign, &Fast) &&
    9938             :         Fast) {
    9939             :       SDValue Load =
    9940       14156 :           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
    9941             :                       LN0->getPointerInfo(), OrigAlign,
    9942       14168 :                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
    9943        7078 :       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
    9944        7078 :       return Load;
    9945             :     }
    9946             :   }
    9947             : 
    9948      679701 :   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
    9949          49 :     return V;
    9950             : 
    9951             :   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
    9952             :   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
    9953             :   //
    9954             :   // For ppc_fp128:
    9955             :   // fold (bitcast (fneg x)) ->
    9956             :   //     flipbit = signbit
    9957             :   //     (xor (bitcast x) (build_pair flipbit, flipbit))
    9958             :   //
    9959             :   // fold (bitcast (fabs x)) ->
    9960             :   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
    9961             :   //     (xor (bitcast x) (build_pair flipbit, flipbit))
    9962             :   // This often reduces constant pool loads.
    9963      680134 :   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
    9964         336 :        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
    9965         347 :       N0.getNode()->hasOneUse() && VT.isInteger() &&
    9966     1359404 :       !VT.isVector() && !N0.getValueType().isVector()) {
    9967          96 :     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
    9968          48 :     AddToWorklist(NewConv.getNode());
    9969             : 
    9970             :     SDLoc DL(N);
    9971          10 :     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
    9972             :       assert(VT.getSizeInBits() == 128);
    9973          10 :       SDValue SignBit = DAG.getConstant(
    9974          20 :           APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
    9975          10 :       SDValue FlipBit;
    9976          10 :       if (N0.getOpcode() == ISD::FNEG) {
    9977           5 :         FlipBit = SignBit;
    9978           5 :         AddToWorklist(FlipBit.getNode());
    9979             :       } else {
    9980             :         assert(N0.getOpcode() == ISD::FABS);
    9981             :         SDValue Hi =
    9982           5 :             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
    9983           5 :                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
    9984          15 :                                               SDLoc(NewConv)));
    9985           5 :         AddToWorklist(Hi.getNode());
    9986           5 :         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
    9987           5 :         AddToWorklist(FlipBit.getNode());
    9988             :       }
    9989             :       SDValue FlipBits =
    9990          10 :           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
    9991          10 :       AddToWorklist(FlipBits.getNode());
    9992          20 :       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
    9993             :     }
    9994          38 :     APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
    9995          38 :     if (N0.getOpcode() == ISD::FNEG)
    9996          28 :       return DAG.getNode(ISD::XOR, DL, VT,
    9997          28 :                          NewConv, DAG.getConstant(SignBit, DL, VT));
    9998             :     assert(N0.getOpcode() == ISD::FABS);
    9999          10 :     return DAG.getNode(ISD::AND, DL, VT,
   10000          20 :                        NewConv, DAG.getConstant(~SignBit, DL, VT));
   10001             :   }
   10002             : 
   10003             :   // fold (bitconvert (fcopysign cst, x)) ->
   10004             :   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
   10005             :   // Note that we don't handle (copysign x, cst) because this can always be
   10006             :   // folded to an fneg or fabs.
   10007             :   //
   10008             :   // For ppc_fp128:
   10009             :   // fold (bitcast (fcopysign cst, x)) ->
   10010             :   //     flipbit = (and (extract_element
   10011             :   //                     (xor (bitcast cst), (bitcast x)), 0),
   10012             :   //                    signbit)
   10013             :   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
   10014             :   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
   10015           6 :       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
   10016      679616 :       VT.isInteger() && !VT.isVector()) {
   10017           6 :     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
   10018           6 :     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
   10019           6 :     if (isTypeLegal(IntXVT)) {
   10020          12 :       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
   10021           6 :       AddToWorklist(X.getNode());
   10022             : 
   10023             :       // If X has a different width than the result/lhs, sext it or truncate it.
   10024           6 :       unsigned VTWidth = VT.getSizeInBits();
   10025           6 :       if (OrigXWidth < VTWidth) {
   10026           0 :         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
   10027           0 :         AddToWorklist(X.getNode());
   10028           6 :       } else if (OrigXWidth > VTWidth) {
   10029             :         // To get the sign bit in the right place, we have to shift it right
   10030             :         // before truncating.
   10031             :         SDLoc DL(X);
   10032           0 :         X = DAG.getNode(ISD::SRL, DL,
   10033             :                         X.getValueType(), X,
   10034           0 :                         DAG.getConstant(OrigXWidth-VTWidth, DL,
   10035           0 :                                         X.getValueType()));
   10036           0 :         AddToWorklist(X.getNode());
   10037           0 :         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
   10038           0 :         AddToWorklist(X.getNode());
   10039             :       }
   10040             : 
   10041           5 :       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
   10042           5 :         APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
   10043          10 :         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
   10044           5 :         AddToWorklist(Cst.getNode());
   10045          10 :         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
   10046           5 :         AddToWorklist(X.getNode());
   10047           5 :         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
   10048           5 :         AddToWorklist(XorResult.getNode());
   10049           5 :         SDValue XorResult64 = DAG.getNode(
   10050           5 :             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
   10051           5 :             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
   10052          10 :                                   SDLoc(XorResult)));
   10053           5 :         AddToWorklist(XorResult64.getNode());
   10054             :         SDValue FlipBit =
   10055           5 :             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
   10056          15 :                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
   10057           5 :         AddToWorklist(FlipBit.getNode());
   10058             :         SDValue FlipBits =
   10059           5 :             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
   10060           5 :         AddToWorklist(FlipBits.getNode());
   10061          10 :         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
   10062             :       }
   10063           1 :       APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
   10064           2 :       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
   10065           2 :                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
   10066           1 :       AddToWorklist(X.getNode());
   10067             : 
   10068           2 :       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
   10069           2 :       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
   10070           3 :                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
   10071           1 :       AddToWorklist(Cst.getNode());
   10072             : 
   10073           2 :       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
   10074             :     }
   10075             :   }
   10076             : 
   10077             :   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
   10078      679598 :   if (N0.getOpcode() == ISD::BUILD_PAIR)
   10079        7396 :     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
   10080          38 :       return CombineLD;
   10081             : 
   10082             :   // Remove double bitcasts from shuffles - this is often a legacy of
   10083             :   // XformToShuffleWithZero being used to combine bitmaskings (of
   10084             :   // float vectors bitcast to integer vectors) into shuffles.
   10085             :   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
   10086      333566 :   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
   10087      146810 :       N0->getOpcode() == ISD::VECTOR_SHUFFLE &&
   10088      694106 :       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
   10089      680375 :       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
   10090             :     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
   10091             : 
   10092             :     // If operands are a bitcast, peek through if it casts the original VT.
   10093             :     // If operands are a constant, just bitcast back to original VT.
   10094             :     auto PeekThroughBitcast = [&](SDValue Op) {
   10095             :       if (Op.getOpcode() == ISD::BITCAST &&
   10096             :           Op.getOperand(0).getValueType() == VT)
   10097             :         return SDValue(Op.getOperand(0));
   10098             :       if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
   10099             :           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
   10100             :         return DAG.getBitcast(VT, Op);
   10101             :       return SDValue();
   10102         815 :     };
   10103             : 
   10104             :     // FIXME: If either input vector is bitcast, try to convert the shuffle to
   10105             :     // the result type of this bitcast. This would eliminate at least one
   10106             :     // bitcast. See the transform in InstCombine.
   10107        1630 :     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
   10108        1630 :     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
   10109         815 :     if (!(SV0 && SV1))
   10110         594 :       return SDValue();
   10111             : 
   10112             :     int MaskScale =
   10113         442 :         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
   10114             :     SmallVector<int, 8> NewMask;
   10115        1109 :     for (int M : SVN->getMask())
   10116        3506 :       for (int i = 0; i != MaskScale; ++i)
   10117        2618 :         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
   10118             : 
   10119         442 :     bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
   10120         221 :     if (!LegalMask) {
   10121             :       std::swap(SV0, SV1);
   10122             :       ShuffleVectorSDNode::commuteMask(NewMask);
   10123          10 :       LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
   10124             :     }
   10125             : 
   10126         221 :     if (LegalMask)
   10127         432 :       return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
   10128             :   }
   10129             : 
   10130      678750 :   return SDValue();
   10131             : }
   10132             : 
   10133             : SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
   10134       51019 :   EVT VT = N->getValueType(0);
   10135       51019 :   return CombineConsecutiveLoads(N, VT);
   10136             : }
   10137             : 
   10138             : /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
   10139             : /// operands. DstEltVT indicates the destination element value type.
   10140       88394 : SDValue DAGCombiner::
   10141             : ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
   10142      176788 :   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
   10143             : 
   10144             :   // If this is already the right type, we're done.
   10145       88394 :   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
   10146             : 
   10147       88394 :   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
   10148       88394 :   unsigned DstBitSize = DstEltVT.getSizeInBits();
   10149             : 
   10150             :   // If this is a conversion of N elements of one type to N elements of another
   10151             :   // type, convert each element.  This handles FP<->INT cases.
   10152       88394 :   if (SrcBitSize == DstBitSize) {
   10153             :     SmallVector<SDValue, 8> Ops;
   10154        1045 :     for (SDValue Op : BV->op_values()) {
   10155             :       // If the vector element type is not legal, the BUILD_VECTOR operands
   10156             :       // are promoted and implicitly truncated.  Make that explicit here.
   10157         797 :       if (Op.getValueType() != SrcEltVT)
   10158           0 :         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
   10159         797 :       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
   10160         797 :       AddToWorklist(Ops.back().getNode());
   10161             :     }
   10162         248 :     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
   10163         744 :                               BV->getValueType(0).getVectorNumElements());
   10164         496 :     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
   10165             :   }
   10166             : 
   10167             :   // Otherwise, we're growing or shrinking the elements.  To avoid having to
   10168             :   // handle annoying details of growing/shrinking FP values, we convert them to
   10169             :   // int first.
   10170       88146 :   if (SrcEltVT.isFloatingPoint()) {
   10171             :     // Convert the input float vector to a int vector where the elements are the
   10172             :     // same sizes.
   10173          64 :     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
   10174          64 :     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
   10175          64 :     SrcEltVT = IntVT;
   10176             :   }
   10177             : 
   10178             :   // Now we know the input is an integer vector.  If the output is a FP type,
   10179             :   // convert to integer first, then to FP of the right size.
   10180       88146 :   if (DstEltVT.isFloatingPoint()) {
   10181          41 :     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
   10182          41 :     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
   10183             : 
   10184             :     // Next, convert to FP elements of the same size.
   10185          41 :     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
   10186             :   }
   10187             : 
   10188             :   SDLoc DL(BV);
   10189             : 
   10190             :   // Okay, we know the src/dst types are both integers of differing types.
   10191             :   // Handling growing first.
   10192             :   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
   10193       88105 :   if (SrcBitSize < DstBitSize) {
   10194       87691 :     unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
   10195             : 
   10196             :     SmallVector<SDValue, 8> Ops;
   10197      263243 :     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
   10198      175552 :          i += NumInputsPerOutput) {
   10199      175552 :       bool isLE = DAG.getDataLayout().isLittleEndian();
   10200             :       APInt NewBits = APInt(DstBitSize, 0);
   10201             :       bool EltIsUndef = true;
   10202      527104 :       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
   10203             :         // Shift the previously computed bits over.
   10204      351552 :         NewBits <<= SrcBitSize;
   10205      351552 :         SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
   10206      351552 :         if (Op.isUndef()) continue;
   10207             :         EltIsUndef = false;
   10208             : 
   10209      351398 :         NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
   10210      702796 :                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
   10211             :       }
   10212             : 
   10213      175552 :       if (EltIsUndef)
   10214          16 :         Ops.push_back(DAG.getUNDEF(DstEltVT));
   10215             :       else
   10216      175536 :         Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
   10217             :     }
   10218             : 
   10219       87691 :     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
   10220      175382 :     return DAG.getBuildVector(VT, DL, Ops);
   10221             :   }
   10222             : 
   10223             :   // Finally, this must be the case where we are shrinking elements: each input
   10224             :   // turns into multiple outputs.
   10225         414 :   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
   10226         414 :   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
   10227         828 :                             NumOutputsPerInput*BV->getNumOperands());
   10228             :   SmallVector<SDValue, 8> Ops;
   10229             : 
   10230        2316 :   for (const SDValue &Op : BV->op_values()) {
   10231        3804 :     if (Op.isUndef()) {
   10232          13 :       Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
   10233          13 :       continue;
   10234             :     }
   10235             : 
   10236             :     APInt OpVal = cast<ConstantSDNode>(Op)->
   10237        3778 :                   getAPIntValue().zextOrTrunc(SrcBitSize);
   10238             : 
   10239        8221 :     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
   10240        6332 :       APInt ThisVal = OpVal.trunc(DstBitSize);
   10241        6332 :       Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
   10242             :       OpVal.lshrInPlace(DstBitSize);
   10243             :     }
   10244             : 
   10245             :     // For big endian targets, swap the order of the pieces of each element.
   10246        1889 :     if (DAG.getDataLayout().isBigEndian())
   10247          76 :       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
   10248             :   }
   10249             : 
   10250         828 :   return DAG.getBuildVector(VT, DL, Ops);
   10251             : }
   10252             : 
   10253             : static bool isContractable(SDNode *N) {
   10254       37613 :   SDNodeFlags F = N->getFlags();
   10255       37613 :   return F.hasAllowContract() || F.hasAllowReassociation();
   10256             : }
   10257             : 
   10258             : /// Try to perform FMA combining on a given FADD node.
   10259       43058 : SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
   10260       43058 :   SDValue N0 = N->getOperand(0);
   10261       43058 :   SDValue N1 = N->getOperand(1);
   10262       86116 :   EVT VT = N->getValueType(0);
   10263             :   SDLoc SL(N);
   10264             : 
   10265       43058 :   const TargetOptions &Options = DAG.getTarget().Options;
   10266             : 
   10267             :   // Floating-point multiply-add with intermediate rounding.
   10268       43058 :   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
   10269             : 
   10270             :   // Floating-point multiply-add without intermediate rounding.
   10271             :   bool HasFMA =
   10272       43058 :       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
   10273       11596 :       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
   10274             : 
   10275             :   // No valid opcode, do not combine.
   10276       43058 :   if (!HasFMAD && !HasFMA)
   10277       25817 :     return SDValue();
   10278             : 
   10279       17241 :   SDNodeFlags Flags = N->getFlags();
   10280       17241 :   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
   10281       17241 :   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
   10282       17241 :                               CanFuse || HasFMAD);
   10283             :   // If the addition is not contractable, do not combine.
   10284             :   if (!AllowFusionGlobally && !isContractable(N))
   10285        9443 :     return SDValue();
   10286             : 
   10287       15596 :   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
   10288        7798 :   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
   10289          56 :     return SDValue();
   10290             : 
   10291             :   // Always prefer FMAD to FMA for precision.
   10292        7742 :   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
   10293        7742 :   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
   10294             : 
   10295             :   // Is the node an FMUL and contractable either due to global flags or
   10296             :   // SDNodeFlags.
   10297             :   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
   10298        9453 :     if (N.getOpcode() != ISD::FMUL)
   10299             :       return false;
   10300        4005 :     return AllowFusionGlobally || isContractable(N.getNode());
   10301             :   };
   10302             :   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
   10303             :   // prefer to fold the multiply with fewer uses.
   10304        7742 :   if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
   10305         224 :     if (N0.getNode()->use_size() > N1.getNode()->use_size())
   10306             :       std::swap(N0, N1);
   10307             :   }
   10308             : 
   10309             :   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
   10310        2012 :   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
   10311        1993 :     return DAG.getNode(PreferredFusedOpcode, SL, VT,
   10312        1993 :                        N0.getOperand(0), N0.getOperand(1), N1, Flags);
   10313             :   }
   10314             : 
   10315             :   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
   10316             :   // Note: Commutes FADD operands.
   10317         326 :   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
   10318         299 :     return DAG.getNode(PreferredFusedOpcode, SL, VT,
   10319         299 :                        N1.getOperand(0), N1.getOperand(1), N0, Flags);
   10320             :   }
   10321             : 
   10322             :   // Look through FP_EXTEND nodes to do more combining.
   10323             : 
   10324             :   // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
   10325        5450 :   if (N0.getOpcode() == ISD::FP_EXTEND) {
   10326          17 :     SDValue N00 = N0.getOperand(0);
   10327           4 :     if (isContractableFMUL(N00) &&
   10328           8 :         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
   10329           3 :       return DAG.getNode(PreferredFusedOpcode, SL, VT,
   10330           3 :                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
   10331             :                                      N00.getOperand(0)),
   10332             :                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
   10333           3 :                                      N00.getOperand(1)), N1, Flags);
   10334             :     }
   10335             :   }
   10336             : 
   10337             :   // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
   10338             :   // Note: Commutes FADD operands.
   10339        5447 :   if (N1.getOpcode() == ISD::FP_EXTEND) {
   10340          16 :     SDValue N10 = N1.getOperand(0);
   10341           4 :     if (isContractableFMUL(N10) &&
   10342           8 :         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
   10343           3 :       return DAG.getNode(PreferredFusedOpcode, SL, VT,
   10344           3 :                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
   10345             :                                      N10.getOperand(0)),
   10346             :                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
   10347           3 :                                      N10.getOperand(1)), N0, Flags);
   10348             :     }
   10349             :   }
   10350             : 
   10351             :   // More folding opportunities when target permits.
   10352        5444 :   if (Aggressive) {
   10353             :     // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
   10354         322 :     if (CanFuse &&
   10355         322 :         N0.getOpcode() == PreferredFusedOpcode &&
   10356          20 :         N0.getOperand(2).getOpcode() == ISD::FMUL &&
   10357        2476 :         N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
   10358           7 :       return DAG.getNode(PreferredFusedOpcode, SL, VT,
   10359             :                          N0.getOperand(0), N0.getOperand(1),
   10360             :                          DAG.getNode(PreferredFusedOpcode, SL, VT,
   10361             :                                      N0.getOperand(2).getOperand(0),
   10362             :                                      N0.getOperand(2).getOperand(1),
   10363          28 :                                      N1, Flags), Flags);
   10364             :     }
   10365             : 
   10366             :     // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
   10367         315 :     if (CanFuse &&
   10368         630 :         N1->getOpcode() == PreferredFusedOpcode &&
   10369          20 :         N1.getOperand(2).getOpcode() == ISD::FMUL &&
   10370        2469 :         N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
   10371           2 :       return DAG.getNode(PreferredFusedOpcode, SL, VT,
   10372             :                          N1.getOperand(0), N1.getOperand(1),
   10373             :                          DAG.getNode(PreferredFusedOpcode, SL, VT,
   10374             :                                      N1.getOperand(2).getOperand(0),
   10375             :                                      N1.getOperand(2).getOperand(1),
   10376           2 :                                      N0, Flags), Flags);
   10377             :     }
   10378             : 
   10379             : 
   10380             :     // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
   10381             :     //   -> (fma x, y, (fma (fpext u), (fpext v), z))
   10382             :     auto FoldFAddFMAFPExtFMul = [&] (
   10383             :       SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
   10384             :       SDNodeFlags Flags) {
   10385             :       return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
   10386             :                          DAG.getNode(PreferredFusedOpcode, SL, VT,
   10387             :                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
   10388             :                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
   10389             :                                      Z, Flags), Flags);
   10390        2467 :     };
   10391        2467 :     if (N0.getOpcode() == PreferredFusedOpcode) {
   10392          86 :       SDValue N02 = N0.getOperand(2);
   10393          86 :       if (N02.getOpcode() == ISD::FP_EXTEND) {
   10394           8 :         SDValue N020 = N02.getOperand(0);
   10395           8 :         if (isContractableFMUL(N020) &&
   10396          16 :             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
   10397             :           return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
   10398             :                                       N020.getOperand(0), N020.getOperand(1),
   10399           6 :                                       N1, Flags);
   10400             :         }
   10401             :       }
   10402             :     }
   10403             : 
   10404             :     // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
   10405             :     //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
   10406             :     // FIXME: This turns two single-precision and one double-precision
   10407             :     // operation into two double-precision operations, which might not be
   10408             :     // interesting for all targets, especially GPUs.
   10409             :     auto FoldFAddFPExtFMAFMul = [&] (
   10410             :       SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
   10411             :       SDNodeFlags Flags) {
   10412             :       return DAG.getNode(PreferredFusedOpcode, SL, VT,
   10413             :                          DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
   10414             :                          DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
   10415             :                          DAG.getNode(PreferredFusedOpcode, SL, VT,
   10416             :                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
   10417             :                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
   10418             :                                      Z, Flags), Flags);
   10419        2461 :     };
   10420        2461 :     if (N0.getOpcode() == ISD::FP_EXTEND) {
   10421          14 :       SDValue N00 = N0.getOperand(0);
   10422          14 :       if (N00.getOpcode() == PreferredFusedOpcode) {
   10423           4 :         SDValue N002 = N00.getOperand(2);
   10424           4 :         if (isContractableFMUL(N002) &&
   10425           8 :             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
   10426             :           return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
   10427             :                                       N002.getOperand(0), N002.getOperand(1),
   10428           4 :                                       N1, Flags);
   10429             :         }
   10430             :       }
   10431             :     }
   10432             : 
   10433             :     // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
   10434             :     //   -> (fma y, z, (fma (fpext u), (fpext v), x))
   10435        2457 :     if (N1.getOpcode() == PreferredFusedOpcode) {
   10436          62 :       SDValue N12 = N1.getOperand(2);
   10437          62 :       if (N12.getOpcode() == ISD::FP_EXTEND) {
   10438           6 :         SDValue N120 = N12.getOperand(0);
   10439           6 :         if (isContractableFMUL(N120) &&
   10440          12 :             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
   10441             :           return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
   10442             :                                       N120.getOperand(0), N120.getOperand(1),
   10443           5 :                                       N0, Flags);
   10444             :         }
   10445             :       }
   10446             :     }
   10447             : 
   10448             :     // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
   10449             :     //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
   10450             :     // FIXME: This turns two single-precision and one double-precision
   10451             :     // operation into two double-precision operations, which might not be
   10452             :     // interesting for all targets, especially GPUs.
   10453        2452 :     if (N1.getOpcode() == ISD::FP_EXTEND) {
   10454          13 :       SDValue N10 = N1.getOperand(0);
   10455          13 :       if (N10.getOpcode() == PreferredFusedOpcode) {
   10456           4 :         SDValue N102 = N10.getOperand(2);
   10457           4 :         if (isContractableFMUL(N102) &&
   10458           8 :             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
   10459             :           return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
   10460             :                                       N102.getOperand(0), N102.getOperand(1),
   10461           4 :                                       N0, Flags);
   10462             :         }
   10463             :       }
   10464             :     }
   10465             :   }
   10466             : 
   10467        5416 :   return SDValue();
   10468             : }
   10469             : 
   10470             : /// Try to perform FMA combining on a given FSUB node.
   10471       12480 : SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
   10472       12480 :   SDValue N0 = N->getOperand(0);
   10473       12480 :   SDValue N1 = N->getOperand(1);
   10474       24960 :   EVT VT = N->getValueType(0);
   10475             :   SDLoc SL(N);
   10476             : 
   10477       12480 :   const TargetOptions &Options = DAG.getTarget().Options;
   10478             :   // Floating-point multiply-add with intermediate rounding.
   10479       12480 :   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
   10480             : 
   10481             :   // Floating-point multiply-add without intermediate rounding.
   10482             :   bool HasFMA =
   10483       12480 :       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
   10484        2945 :       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
   10485             : 
   10486             :   // No valid opcode, do not combine.
   10487       12480 :   if (!HasFMAD && !HasFMA)
   10488        8831 :     return SDValue();
   10489             : 
   10490        3649 :   const SDNodeFlags Flags = N->getFlags();
   10491        3649 :   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
   10492        3649 :   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
   10493        3649 :                               CanFuse || HasFMAD);
   10494             : 
   10495             :   // If the subtraction is not contractable, do not combine.
   10496             :   if (!AllowFusionGlobally && !isContractable(N))
   10497        1553 :     return SDValue();
   10498             : 
   10499        4192 :   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
   10500        2096 :   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
   10501          58 :     return SDValue();
   10502             : 
   10503             :   // Always prefer FMAD to FMA for precision.
   10504        2038 :   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
   10505        2038 :   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
   10506             : 
   10507             :   // Is the node an FMUL and contractable either due to global flags or
   10508             :   // SDNodeFlags.
   10509             :   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
   10510        2249 :     if (N.getOpcode() != ISD::FMUL)
   10511             :       return false;
   10512        1019 :     return AllowFusionGlobally || isContractable(N.getNode());
   10513             :   };
   10514             : 
   10515             :   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
   10516         306 :   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
   10517         298 :     return DAG.getNode(PreferredFusedOpcode, SL, VT,
   10518             :                        N0.getOperand(0), N0.getOperand(1),
   10519         298 :                        DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
   10520             :   }
   10521             : 
   10522             :   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
   10523             :   // Note: Commutes FSUB operands.
   10524         569 :   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
   10525         539 :     return DAG.getNode(PreferredFusedOpcode, SL, VT,
   10526             :                        DAG.getNode(ISD::FNEG, SL, VT,
   10527             :                                    N1.getOperand(0)),
   10528         539 :                        N1.getOperand(1), N0, Flags);
   10529             :   }
   10530             : 
   10531             :   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
   10532        1288 :   if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
   10533          66 :       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
   10534          87 :     SDValue N00 = N0.getOperand(0).getOperand(0);
   10535          87 :     SDValue N01 = N0.getOperand(0).getOperand(1);
   10536          87 :     return DAG.getNode(PreferredFusedOpcode, SL, VT,
   10537          87 :                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
   10538          87 :                        DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
   10539             :   }
   10540             : 
   10541             :   // Look through FP_EXTEND nodes to do more combining.
   10542             : 
   10543             :   // fold (fsub (fpext (fmul x, y)), z)
   10544             :   //   -> (fma (fpext x), (fpext y), (fneg z))
   10545        1114 :   if (N0.getOpcode() == ISD::FP_EXTEND) {
   10546          23 :     SDValue N00 = N0.getOperand(0);
   10547           8 :     if (isContractableFMUL(N00) &&
   10548          16 :         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
   10549           5 :       return DAG.getNode(PreferredFusedOpcode, SL, VT,
   10550           5 :                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
   10551             :                                      N00.getOperand(0)),
   10552           5 :                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
   10553             :                                      N00.getOperand(1)),
   10554           5 :                          DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
   10555             :     }
   10556             :   }
   10557             : 
   10558             :   // fold (fsub x, (fpext (fmul y, z)))
   10559             :   //   -> (fma (fneg (fpext y)), (fpext z), x)
   10560             :   // Note: Commutes FSUB operands.
   10561        1109 :   if (N1.getOpcode() == ISD::FP_EXTEND) {
   10562          16 :     SDValue N10 = N1.getOperand(0);
   10563           6 :     if (isContractableFMUL(N10) &&
   10564          12 :         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
   10565           3 :       return DAG.getNode(PreferredFusedOpcode, SL, VT,
   10566           3 :                          DAG.getNode(ISD::FNEG, SL, VT,
   10567             :                                      DAG.getNode(ISD::FP_EXTEND, SL, VT,
   10568             :                                                  N10.getOperand(0))),
   10569             :                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
   10570             :                                      N10.getOperand(1)),
   10571           3 :                          N0, Flags);
   10572             :     }
   10573             :   }
   10574             : 
   10575             :   // fold (fsub (fpext (fneg (fmul, x, y))), z)
   10576             :   //   -> (fneg (fma (fpext x), (fpext y), z))
   10577             :   // Note: This could be removed with appropriate canonicalization of the
   10578             :   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
   10579             :   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
   10580             :   // from implementing the canonicalization in visitFSUB.
   10581        1106 :   if (N0.getOpcode() == ISD::FP_EXTEND) {
   10582          18 :     SDValue N00 = N0.getOperand(0);
   10583          18 :     if (N00.getOpcode() == ISD::FNEG) {
   10584           2 :       SDValue N000 = N00.getOperand(0);
   10585           2 :       if (isContractableFMUL(N000) &&
   10586           4 :           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
   10587           2 :         return DAG.getNode(ISD::FNEG, SL, VT,
   10588             :                            DAG.getNode(PreferredFusedOpcode, SL, VT,
   10589           2 :                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
   10590             :                                                    N000.getOperand(0)),
   10591             :                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
   10592             :                                                    N000.getOperand(1)),
   10593           2 :                                        N1, Flags));
   10594             :       }
   10595             :     }
   10596             :   }
   10597             : 
   10598             :   // fold (fsub (fneg (fpext (fmul, x, y))), z)
   10599             :   //   -> (fneg (fma (fpext x)), (fpext y), z)
   10600             :   // Note: This could be removed with appropriate canonicalization of the
   10601             :   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
   10602             :   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
   10603             :   // from implementing the canonicalization in visitFSUB.
   10604        1104 :   if (N0.getOpcode() == ISD::FNEG) {
   10605          30 :     SDValue N00 = N0.getOperand(0);
   10606          30 :     if (N00.getOpcode() == ISD::FP_EXTEND) {
   10607           2 :       SDValue N000 = N00.getOperand(0);
   10608           2 :       if (isContractableFMUL(N000) &&
   10609           4 :           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N000.getValueType())) {
   10610           2 :         return DAG.getNode(ISD::FNEG, SL, VT,
   10611             :                            DAG.getNode(PreferredFusedOpcode, SL, VT,
   10612           2 :                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
   10613             :                                                    N000.getOperand(0)),
   10614             :                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
   10615             :                                                    N000.getOperand(1)),
   10616           2 :                                        N1, Flags));
   10617             :       }
   10618             :     }
   10619             :   }
   10620             : 
   10621             :   // More folding opportunities when target permits.
   10622        1102 :   if (Aggressive) {
   10623             :     // fold (fsub (fma x, y, (fmul u, v)), z)
   10624             :     //   -> (fma x, y (fma u, v, (fneg z)))
   10625          87 :     if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
   10626         596 :         isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
   10627          11 :         N0.getOperand(2)->hasOneUse()) {
   10628           7 :       return DAG.getNode(PreferredFusedOpcode, SL, VT,
   10629             :                          N0.getOperand(0), N0.getOperand(1),
   10630             :                          DAG.getNode(PreferredFusedOpcode, SL, VT,
   10631             :                                      N0.getOperand(2).getOperand(0),
   10632           7 :                                      N0.getOperand(2).getOperand(1),
   10633             :                                      DAG.getNode(ISD::FNEG, SL, VT,
   10634           7 :                                                  N1), Flags), Flags);
   10635             :     }
   10636             : 
   10637             :     // fold (fsub x, (fma y, z, (fmul u, v)))
   10638             :     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
   10639         572 :     if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
   10640           6 :         isContractableFMUL(N1.getOperand(2))) {
   10641           4 :       SDValue N20 = N1.getOperand(2).getOperand(0);
   10642           4 :       SDValue N21 = N1.getOperand(2).getOperand(1);
   10643           4 :       return DAG.getNode(PreferredFusedOpcode, SL, VT,
   10644           4 :                          DAG.getNode(ISD::FNEG, SL, VT,
   10645             :                                      N1.getOperand(0)),
   10646             :                          N1.getOperand(1),
   10647             :                          DAG.getNode(PreferredFusedOpcode, SL, VT,
   10648             :                                      DAG.getNode(ISD::FNEG, SL, VT, N20),
   10649           4 :                                      N21, N0, Flags), Flags);
   10650             :     }
   10651             : 
   10652             : 
   10653             :     // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
   10654             :     //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
   10655         568 :     if (N0.getOpcode() == PreferredFusedOpcode) {
   10656          30 :       SDValue N02 = N0.getOperand(2);
   10657          30 :       if (N02.getOpcode() == ISD::FP_EXTEND) {
   10658           6 :         SDValue N020 = N02.getOperand(0);
   10659           6 :         if (isContractableFMUL(N020) &&
   10660          12 :             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
   10661           5 :           return DAG.getNode(PreferredFusedOpcode, SL, VT,
   10662             :                              N0.getOperand(0), N0.getOperand(1),
   10663             :                              DAG.getNode(PreferredFusedOpcode, SL, VT,
   10664           5 :                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
   10665             :                                                      N020.getOperand(0)),
   10666           5 :                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
   10667             :                                                      N020.getOperand(1)),
   10668             :                                          DAG.getNode(ISD::FNEG, SL, VT,
   10669           5 :                                                      N1), Flags), Flags);
   10670             :         }
   10671             :       }
   10672             :     }
   10673             : 
   10674             :     // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
   10675             :     //   -> (fma (fpext x), (fpext y),
   10676             :     //           (fma (fpext u), (fpext v), (fneg z)))
   10677             :     // FIXME: This turns two single-precision and one double-precision
   10678             :     // operation into two double-precision operations, which might not be
   10679             :     // interesting for all targets, especially GPUs.
   10680         563 :     if (N0.getOpcode() == ISD::FP_EXTEND) {
   10681          16 :       SDValue N00 = N0.getOperand(0);
   10682          16 :       if (N00.getOpcode() == PreferredFusedOpcode) {
   10683           4 :         SDValue N002 = N00.getOperand(2);
   10684           4 :         if (isContractableFMUL(N002) &&
   10685           8 :             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
   10686           4 :           return DAG.getNode(PreferredFusedOpcode, SL, VT,
   10687           4 :                              DAG.getNode(ISD::FP_EXTEND, SL, VT,
   10688             :                                          N00.getOperand(0)),
   10689           4 :                              DAG.getNode(ISD::FP_EXTEND, SL, VT,
   10690             :                                          N00.getOperand(1)),
   10691             :                              DAG.getNode(PreferredFusedOpcode, SL, VT,
   10692           4 :                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
   10693             :                                                      N002.getOperand(0)),
   10694           4 :                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
   10695             :                                                      N002.getOperand(1)),
   10696             :                                          DAG.getNode(ISD::FNEG, SL, VT,
   10697           4 :                                                      N1), Flags), Flags);
   10698             :         }
   10699             :       }
   10700             :     }
   10701             : 
   10702             :     // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
   10703             :     //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
   10704         559 :     if (N1.getOpcode() == PreferredFusedOpcode &&
   10705          20 :         N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
   10706           6 :       SDValue N120 = N1.getOperand(2).getOperand(0);
   10707           6 :       if (isContractableFMUL(N120) &&
   10708          12 :           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
   10709           5 :         SDValue N1200 = N120.getOperand(0);
   10710           5 :         SDValue N1201 = N120.getOperand(1);
   10711           5 :         return DAG.getNode(PreferredFusedOpcode, SL, VT,
   10712           5 :                            DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
   10713             :                            N1.getOperand(1),
   10714             :                            DAG.getNode(PreferredFusedOpcode, SL, VT,
   10715           5 :                                        DAG.getNode(ISD::FNEG, SL, VT,
   10716             :                                                    DAG.getNode(ISD::FP_EXTEND, SL,
   10717             :                                                                VT, N1200)),
   10718             :                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
   10719             :                                                    N1201),
   10720           5 :                                        N0, Flags), Flags);
   10721             :       }
   10722             :     }
   10723             : 
   10724             :     // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
   10725             :     //   -> (fma (fneg (fpext y)), (fpext z),
   10726             :     //           (fma (fneg (fpext u)), (fpext v), x))
   10727             :     // FIXME: This turns two single-precision and one double-precision
   10728             :     // operation into two double-precision operations, which might not be
   10729             :     // interesting for all targets, especially GPUs.
   10730         554 :     if (N1.getOpcode() == ISD::FP_EXTEND &&
   10731          13 :         N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
   10732           4 :       SDValue CvtSrc = N1.getOperand(0);
   10733           4 :       SDValue N100 = CvtSrc.getOperand(0);
   10734           4 :       SDValue N101 = CvtSrc.getOperand(1);
   10735           4 :       SDValue N102 = CvtSrc.getOperand(2);
   10736           4 :       if (isContractableFMUL(N102) &&
   10737           8 :           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, CvtSrc.getValueType())) {
   10738           4 :         SDValue N1020 = N102.getOperand(0);
   10739           4 :         SDValue N1021 = N102.getOperand(1);
   10740           4 :         return DAG.getNode(PreferredFusedOpcode, SL, VT,
   10741           4 :                            DAG.getNode(ISD::FNEG, SL, VT,
   10742             :                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
   10743             :                                                    N100)),
   10744           4 :                            DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
   10745             :                            DAG.getNode(PreferredFusedOpcode, SL, VT,
   10746           4 :                                        DAG.getNode(ISD::FNEG, SL, VT,
   10747             :                                                    DAG.getNode(ISD::FP_EXTEND, SL,
   10748             :                                                                VT, N1020)),
   10749             :                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
   10750             :                                                    N1021),
   10751           4 :                                        N0, Flags), Flags);
   10752             :       }
   10753             :     }
   10754             :   }
   10755             : 
   10756        1073 :   return SDValue();
   10757             : }
   10758             : 
   10759             : /// Try to perform FMA combining on a given FMUL node based on the distributive
   10760             : /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
   10761             : /// subtraction instead of addition).
   10762       26193 : SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
   10763       26193 :   SDValue N0 = N->getOperand(0);
   10764       26193 :   SDValue N1 = N->getOperand(1);
   10765       52386 :   EVT VT = N->getValueType(0);
   10766             :   SDLoc SL(N);
   10767       26193 :   const SDNodeFlags Flags = N->getFlags();
   10768             : 
   10769             :   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
   10770             : 
   10771       26193 :   const TargetOptions &Options = DAG.getTarget().Options;
   10772             : 
   10773             :   // The transforms below are incorrect when x == 0 and y == inf, because the
   10774             :   // intermediate multiplication produces a nan.
   10775       26193 :   if (!Options.NoInfsFPMath)
   10776       25719 :     return SDValue();
   10777             : 
   10778             :   // Floating-point multiply-add without intermediate rounding.
   10779             :   bool HasFMA =
   10780         699 :       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
   10781         699 :       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
   10782         349 :       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
   10783             : 
   10784             :   // Floating-point multiply-add with intermediate rounding. This can result
   10785             :   // in a less precise result due to the changed rounding order.
   10786         796 :   bool HasFMAD = Options.UnsafeFPMath &&
   10787         474 :                  (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
   10788             : 
   10789             :   // No valid opcode, do not combine.
   10790         474 :   if (!HasFMAD && !HasFMA)
   10791         125 :     return SDValue();
   10792             : 
   10793             :   // Always prefer FMAD to FMA for precision.
   10794         349 :   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
   10795         349 :   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
   10796             : 
   10797             :   // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
   10798             :   // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
   10799             :   auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
   10800             :     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
   10801             :       if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
   10802             :         if (C->isExactlyValue(+1.0))
   10803             :           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
   10804             :                              Y, Flags);
   10805             :         if (C->isExactlyValue(-1.0))
   10806             :           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
   10807             :                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
   10808             :       }
   10809             :     }
   10810             :     return SDValue();
   10811         349 :   };
   10812             : 
   10813         349 :   if (SDValue FMA = FuseFADD(N0, N1, Flags))
   10814          18 :     return FMA;
   10815         331 :   if (SDValue FMA = FuseFADD(N1, N0, Flags))
   10816          26 :     return FMA;
   10817             : 
   10818             :   // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
   10819             :   // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
   10820             :   // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
   10821             :   // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
   10822             :   auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
   10823             :     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
   10824             :       if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
   10825             :         if (C0->isExactlyValue(+1.0))
   10826             :           return DAG.getNode(PreferredFusedOpcode, SL, VT,
   10827             :                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
   10828             :                              Y, Flags);
   10829             :         if (C0->isExactlyValue(-1.0))
   10830             :           return DAG.getNode(PreferredFusedOpcode, SL, VT,
   10831             :                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
   10832             :                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
   10833             :       }
   10834             :       if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
   10835             :         if (C1->isExactlyValue(+1.0))
   10836             :           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
   10837             :                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
   10838             :         if (C1->isExactlyValue(-1.0))
   10839             :           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
   10840             :                              Y, Flags);
   10841             :       }
   10842             :     }
   10843             :     return SDValue();
   10844         305 :   };
   10845             : 
   10846         305 :   if (SDValue FMA = FuseFSUB(N0, N1, Flags))
   10847          36 :     return FMA;
   10848         269 :   if (SDValue FMA = FuseFSUB(N1, N0, Flags))
   10849          86 :     return FMA;
   10850             : 
   10851         183 :   return SDValue();
   10852             : }
   10853             : 
   10854       43573 : SDValue DAGCombiner::visitFADD(SDNode *N) {
   10855       43573 :   SDValue N0 = N->getOperand(0);
   10856       43573 :   SDValue N1 = N->getOperand(1);
   10857       43573 :   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
   10858       43573 :   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
   10859       87146 :   EVT VT = N->getValueType(0);
   10860             :   SDLoc DL(N);
   10861       43573 :   const TargetOptions &Options = DAG.getTarget().Options;
   10862       43573 :   const SDNodeFlags Flags = N->getFlags();
   10863             : 
   10864             :   // fold vector ops
   10865       43573 :   if (VT.isVector())
   10866       14088 :     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   10867           6 :       return FoldedVOp;
   10868             : 
   10869             :   // fold (fadd c1, c2) -> c1 + c2
   10870       43567 :   if (N0CFP && N1CFP)
   10871           3 :     return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
   10872             : 
   10873             :   // canonicalize constant to RHS
   10874       43564 :   if (N0CFP && !N1CFP)
   10875          84 :     return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
   10876             : 
   10877             :   // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
   10878       43480 :   ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
   10879       48522 :   if (N1C && N1C->isZero())
   10880         718 :     if (N1C->isNegative() || Options.UnsafeFPMath || Flags.hasNoSignedZeros())
   10881          27 :       return N0;
   10882             : 
   10883       43453 :   if (SDValue NewSel = foldBinOpIntoSelect(N))
   10884           5 :     return NewSel;
   10885             : 
   10886             :   // fold (fadd A, (fneg B)) -> (fsub A, B)
   10887       83971 :   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
   10888       40523 :       isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
   10889         144 :     return DAG.getNode(ISD::FSUB, DL, VT, N0,
   10890         144 :                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
   10891             : 
   10892             :   // fold (fadd (fneg A), B) -> (fsub B, A)
   10893       83683 :   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
   10894       40379 :       isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
   10895         178 :     return DAG.getNode(ISD::FSUB, DL, VT, N1,
   10896         178 :                        GetNegatedExpression(N0, DAG, LegalOperations), Flags);
   10897             : 
   10898             :   auto isFMulNegTwo = [](SDValue FMul) {
   10899             :     if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
   10900             :       return false;
   10901             :     auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
   10902             :     return C && C->isExactlyValue(-2.0);
   10903             :   };
   10904             : 
   10905             :   // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
   10906       43126 :   if (isFMulNegTwo(N0)) {
   10907          15 :     SDValue B = N0.getOperand(0);
   10908          15 :     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
   10909          15 :     return DAG.getNode(ISD::FSUB, DL, VT, N1, Add, Flags);
   10910             :   }
   10911             :   // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
   10912       43111 :   if (isFMulNegTwo(N1)) {
   10913          15 :     SDValue B = N1.getOperand(0);
   10914          15 :     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
   10915          15 :     return DAG.getNode(ISD::FSUB, DL, VT, N0, Add, Flags);
   10916             :   }
   10917             : 
   10918             :   // No FP constant should be created after legalization as Instruction
   10919             :   // Selection pass has a hard time dealing with FP constants.
   10920       43096 :   bool AllowNewConst = (Level < AfterLegalizeDAG);
   10921             : 
   10922             :   // If 'unsafe math' or nnan is enabled, fold lots of things.
   10923       43096 :   if ((Options.UnsafeFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
   10924             :     // If allowed, fold (fadd (fneg x), x) -> 0.0
   10925        2714 :     if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
   10926           0 :       return DAG.getConstantFP(0.0, DL, VT);
   10927             : 
   10928             :     // If allowed, fold (fadd x, (fneg x)) -> 0.0
   10929        2714 :     if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
   10930           0 :       return DAG.getConstantFP(0.0, DL, VT);
   10931             :   }
   10932             : 
   10933             :   // If 'unsafe math' or reassoc and nsz, fold lots of things.
   10934             :   // TODO: break out portions of the transformations below for which Unsafe is
   10935             :   //       considered and which do not require both nsz and reassoc
   10936       40605 :   if ((Options.UnsafeFPMath ||
   10937       44423 :        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
   10938             :       AllowNewConst) {
   10939             :     // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
   10940        2476 :     if (N1CFP && N0.getOpcode() == ISD::FADD &&
   10941           7 :         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
   10942          14 :       SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags);
   10943          14 :       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags);
   10944             :     }
   10945             : 
   10946             :     // We can fold chains of FADD's of the same value into multiplications.
   10947             :     // This transform is not safe in general because we are reducing the number
   10948             :     // of rounding steps.
   10949        4628 :     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
   10950        2031 :       if (N0.getOpcode() == ISD::FMUL) {
   10951         405 :         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
   10952         405 :         bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
   10953             : 
   10954             :         // (fadd (fmul x, c), x) -> (fmul x, c+1)
   10955         405 :         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
   10956           7 :           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
   10957           7 :                                        DAG.getConstantFP(1.0, DL, VT), Flags);
   10958           7 :           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
   10959             :         }
   10960             : 
   10961             :         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
   10962          52 :         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
   10963         402 :             N1.getOperand(0) == N1.getOperand(1) &&
   10964           4 :             N0.getOperand(0) == N1.getOperand(0)) {
   10965           4 :           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
   10966           4 :                                        DAG.getConstantFP(2.0, DL, VT), Flags);
   10967           8 :           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
   10968             :         }
   10969             :       }
   10970             : 
   10971        2020 :       if (N1.getOpcode() == ISD::FMUL) {
   10972         541 :         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
   10973         541 :         bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
   10974             : 
   10975             :         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
   10976         541 :         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
   10977           4 :           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
   10978           4 :                                        DAG.getConstantFP(1.0, DL, VT), Flags);
   10979           4 :           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
   10980             :         }
   10981             : 
   10982             :         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
   10983          15 :         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
   10984         541 :             N0.getOperand(0) == N0.getOperand(1) &&
   10985           4 :             N1.getOperand(0) == N0.getOperand(0)) {
   10986           4 :           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
   10987           4 :                                        DAG.getConstantFP(2.0, DL, VT), Flags);
   10988           8 :           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
   10989             :         }
   10990             :       }
   10991             : 
   10992        2012 :       if (N0.getOpcode() == ISD::FADD) {
   10993         512 :         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
   10994             :         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
   10995         512 :         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
   10996             :             (N0.getOperand(0) == N1)) {
   10997           5 :           return DAG.getNode(ISD::FMUL, DL, VT,
   10998           5 :                              N1, DAG.getConstantFP(3.0, DL, VT), Flags);
   10999             :         }
   11000             :       }
   11001             : 
   11002        2007 :       if (N1.getOpcode() == ISD::FADD) {
   11003         260 :         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
   11004             :         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
   11005         260 :         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
   11006             :             N1.getOperand(0) == N0) {
   11007           4 :           return DAG.getNode(ISD::FMUL, DL, VT,
   11008           4 :                              N0, DAG.getConstantFP(3.0, DL, VT), Flags);
   11009             :         }
   11010             :       }
   11011             : 
   11012             :       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
   11013         507 :       if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
   11014          58 :           N0.getOperand(0) == N0.getOperand(1) &&
   11015        2006 :           N1.getOperand(0) == N1.getOperand(1) &&
   11016             :           N0.getOperand(0) == N1.getOperand(0)) {
   11017           3 :         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
   11018           3 :                            DAG.getConstantFP(4.0, DL, VT), Flags);
   11019             :       }
   11020             :     }
   11021             :   } // enable-unsafe-fp-math
   11022             : 
   11023             :   // FADD -> FMA combines:
   11024       43058 :   if (SDValue Fused = visitFADDForFMACombine(N)) {
   11025        2326 :     AddToWorklist(Fused.getNode());
   11026        2326 :     return Fused;
   11027             :   }
   11028       40732 :   return SDValue();
   11029             : }
   11030             : 
   11031       12783 : SDValue DAGCombiner::visitFSUB(SDNode *N) {
   11032       12783 :   SDValue N0 = N->getOperand(0);
   11033       12783 :   SDValue N1 = N->getOperand(1);
   11034       12783 :   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
   11035       12783 :   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
   11036       25566 :   EVT VT = N->getValueType(0);
   11037             :   SDLoc DL(N);
   11038       12783 :   const TargetOptions &Options = DAG.getTarget().Options;
   11039       12783 :   const SDNodeFlags Flags = N->getFlags();
   11040             : 
   11041             :   // fold vector ops
   11042       12783 :   if (VT.isVector())
   11043        3276 :     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   11044           1 :       return FoldedVOp;
   11045             : 
   11046             :   // fold (fsub c1, c2) -> c1-c2
   11047       12782 :   if (N0CFP && N1CFP)
   11048           0 :     return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
   11049             : 
   11050       12782 :   if (SDValue NewSel = foldBinOpIntoSelect(N))
   11051          10 :     return NewSel;
   11052             : 
   11053             :   // (fsub A, 0) -> A
   11054       13102 :   if (N1CFP && N1CFP->isZero()) {
   11055          29 :     if (!N1CFP->isNegative() || Options.UnsafeFPMath ||
   11056             :         Flags.hasNoSignedZeros()) {
   11057          28 :       return N0;
   11058             :     }
   11059             :   }
   11060             : 
   11061             :   if (N0 == N1) {
   11062             :     // (fsub x, x) -> 0.0
   11063         157 :     if (Options.UnsafeFPMath || Flags.hasNoNaNs())
   11064           9 :       return DAG.getConstantFP(0.0f, DL, VT);
   11065             :   }
   11066             : 
   11067             :   // (fsub -0.0, N1) -> -N1
   11068       13992 :   if (N0CFP && N0CFP->isZero()) {
   11069         402 :     if (N0CFP->isNegative() ||
   11070         394 :         (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
   11071          48 :       if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
   11072           3 :         return GetNegatedExpression(N1, DAG, LegalOperations);
   11073          45 :       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
   11074          41 :         return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
   11075             :     }
   11076             :   }
   11077             : 
   11078       11520 :   if ((Options.UnsafeFPMath ||
   11079          51 :       (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()))
   11080       12742 :       && N1.getOpcode() == ISD::FADD) {
   11081             :     // X - (X + Y) -> -Y
   11082          20 :     if (N0 == N1->getOperand(0))
   11083          12 :       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags);
   11084             :     // X - (Y + X) -> -Y
   11085          14 :     if (N0 == N1->getOperand(1))
   11086          12 :       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0), Flags);
   11087             :   }
   11088             : 
   11089             :   // fold (fsub A, (fneg B)) -> (fadd A, B)
   11090       12679 :   if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
   11091         199 :     return DAG.getNode(ISD::FADD, DL, VT, N0,
   11092         199 :                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
   11093             : 
   11094             :   // FSUB -> FMA combines:
   11095       12480 :   if (SDValue Fused = visitFSUBForFMACombine(N)) {
   11096         965 :     AddToWorklist(Fused.getNode());
   11097         965 :     return Fused;
   11098             :   }
   11099             : 
   11100       11515 :   return SDValue();
   11101             : }
   11102             : 
   11103       27386 : SDValue DAGCombiner::visitFMUL(SDNode *N) {
   11104       27386 :   SDValue N0 = N->getOperand(0);
   11105       27386 :   SDValue N1 = N->getOperand(1);
   11106       27386 :   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
   11107       27386 :   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
   11108       54772 :   EVT VT = N->getValueType(0);
   11109             :   SDLoc DL(N);
   11110       27386 :   const TargetOptions &Options = DAG.getTarget().Options;
   11111       27386 :   const SDNodeFlags Flags = N->getFlags();
   11112             : 
   11113             :   // fold vector ops
   11114       27386 :   if (VT.isVector()) {
   11115             :     // This just handles C1 * C2 for vectors. Other vector folds are below.
   11116        8696 :     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   11117           3 :       return FoldedVOp;
   11118             :   }
   11119             : 
   11120             :   // fold (fmul c1, c2) -> c1*c2
   11121       27383 :   if (N0CFP && N1CFP)
   11122           0 :     return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
   11123             : 
   11124             :   // canonicalize constant to RHS
   11125             :   if (isConstantFPBuildVectorOrConstantFP(N0) &&
   11126             :      !isConstantFPBuildVectorOrConstantFP(N1))
   11127         309 :     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
   11128             : 
   11129             :   // fold (fmul A, 1.0) -> A
   11130       27074 :   if (N1CFP && N1CFP->isExactlyValue(1.0))
   11131         338 :     return N0;
   11132             : 
   11133       26736 :   if (SDValue NewSel = foldBinOpIntoSelect(N))
   11134           3 :     return NewSel;
   11135             : 
   11136       22881 :   if (Options.UnsafeFPMath ||
   11137       29136 :       (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) {
   11138             :     // fold (fmul A, 0) -> 0
   11139        7192 :     if (N1CFP && N1CFP->isZero())
   11140           6 :       return N1;
   11141             :   }
   11142             : 
   11143       26727 :   if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
   11144             :     // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
   11145        1369 :     if (isConstantFPBuildVectorOrConstantFP(N1) &&
   11146             :         N0.getOpcode() == ISD::FMUL) {
   11147         251 :       SDValue N00 = N0.getOperand(0);
   11148         251 :       SDValue N01 = N0.getOperand(1);
   11149             :       // Avoid an infinite loop by making sure that N00 is not a constant
   11150             :       // (the inner multiply has not been constant folded yet).
   11151             :       if (isConstantFPBuildVectorOrConstantFP(N01) &&
   11152             :           !isConstantFPBuildVectorOrConstantFP(N00)) {
   11153         177 :         SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
   11154         177 :         return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
   11155             :       }
   11156             :     }
   11157             : 
   11158             :     // Match a special-case: we convert X * 2.0 into fadd.
   11159             :     // fmul (fadd X, X), C -> fmul X, 2.0 * C
   11160        6401 :     if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
   11161         207 :         N0.getOperand(0) == N0.getOperand(1)) {
   11162          25 :       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
   11163          25 :       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
   11164          50 :       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
   11165             :     }
   11166             :   }
   11167             : 
   11168             :   // fold (fmul X, 2.0) -> (fadd X, X)
   11169       26525 :   if (N1CFP && N1CFP->isExactlyValue(+2.0))
   11170         239 :     return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
   11171             : 
   11172             :   // fold (fmul X, -1.0) -> (fneg X)
   11173       26286 :   if (N1CFP && N1CFP->isExactlyValue(-1.0))
   11174          57 :     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
   11175         114 :       return DAG.getNode(ISD::FNEG, DL, VT, N0);
   11176             : 
   11177             :   // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
   11178       26229 :   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
   11179        1197 :     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
   11180             :       // Both can be negated for free, check to see if at least one is cheaper
   11181             :       // negated.
   11182         207 :       if (LHSNeg == 2 || RHSNeg == 2)
   11183          34 :         return DAG.getNode(ISD::FMUL, DL, VT,
   11184          34 :                            GetNegatedExpression(N0, DAG, LegalOperations),
   11185          34 :                            GetNegatedExpression(N1, DAG, LegalOperations),
   11186          34 :                            Flags);
   11187             :     }
   11188             :   }
   11189             : 
   11190             :   // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
   11191             :   // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
   11192        4373 :   if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
   11193       30438 :       (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
   11194           2 :       TLI.isOperationLegal(ISD::FABS, VT)) {
   11195           2 :     SDValue Select = N0, X = N1;
   11196           2 :     if (Select.getOpcode() != ISD::SELECT)
   11197             :       std::swap(Select, X);
   11198             : 
   11199           2 :     SDValue Cond = Select.getOperand(0);
   11200             :     auto TrueOpnd  = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
   11201             :     auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
   11202             : 
   11203           2 :     if (TrueOpnd && FalseOpnd &&
   11204           2 :         Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
   11205           4 :         isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
   11206           2 :         cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
   11207           2 :       ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
   11208             :       switch (CC) {
   11209             :       default: break;
   11210             :       case ISD::SETOLT:
   11211             :       case ISD::SETULT:
   11212             :       case ISD::SETOLE:
   11213             :       case ISD::SETULE:
   11214             :       case ISD::SETLT:
   11215             :       case ISD::SETLE:
   11216             :         std::swap(TrueOpnd, FalseOpnd);
   11217             :         LLVM_FALLTHROUGH;
   11218           2 :       case ISD::SETOGT:
   11219             :       case ISD::SETUGT:
   11220             :       case ISD::SETOGE:
   11221             :       case ISD::SETUGE:
   11222             :       case ISD::SETGT:
   11223             :       case ISD::SETGE:
   11224           2 :         if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
   11225           1 :             TLI.isOperationLegal(ISD::FNEG, VT))
   11226           1 :           return DAG.getNode(ISD::FNEG, DL, VT,
   11227           1 :                    DAG.getNode(ISD::FABS, DL, VT, X));
   11228           1 :         if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
   11229           2 :           return DAG.getNode(ISD::FABS, DL, VT, X);
   11230             : 
   11231             :         break;
   11232             :       }
   11233             :     }
   11234             :   }
   11235             : 
   11236             :   // FMUL -> FMA combines:
   11237       26193 :   if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
   11238         166 :     AddToWorklist(Fused.getNode());
   11239         166 :     return Fused;
   11240             :   }
   11241             : 
   11242       26027 :   return SDValue();
   11243             : }
   11244             : 
   11245        9771 : SDValue DAGCombiner::visitFMA(SDNode *N) {
   11246        9771 :   SDValue N0 = N->getOperand(0);
   11247        9771 :   SDValue N1 = N->getOperand(1);
   11248        9771 :   SDValue N2 = N->getOperand(2);
   11249             :   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   11250             :   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
   11251       19542 :   EVT VT = N->getValueType(0);
   11252             :   SDLoc DL(N);
   11253        9771 :   const TargetOptions &Options = DAG.getTarget().Options;
   11254             : 
   11255             :   // FMA nodes have flags that propagate to the created nodes.
   11256        9771 :   const SDNodeFlags Flags = N->getFlags();
   11257        9771 :   bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N);
   11258             : 
   11259             :   // Constant fold FMA.
   11260             :   if (isa<ConstantFPSDNode>(N0) &&
   11261             :       isa<ConstantFPSDNode>(N1) &&
   11262             :       isa<ConstantFPSDNode>(N2)) {
   11263           0 :     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
   11264             :   }
   11265             : 
   11266        9771 :   if (UnsafeFPMath) {
   11267        2366 :     if (N0CFP && N0CFP->isZero())
   11268           0 :       return N2;
   11269        2417 :     if (N1CFP && N1CFP->isZero())
   11270           0 :       return N2;
   11271             :   }
   11272             :   // TODO: The FMA node should have flags that propagate to these nodes.
   11273        9771 :   if (N0CFP && N0CFP->isExactlyValue(1.0))
   11274           0 :     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
   11275        9771 :   if (N1CFP && N1CFP->isExactlyValue(1.0))
   11276          10 :     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
   11277             : 
   11278             :   // Canonicalize (fma c, x, y) -> (fma x, c, y)
   11279             :   if (isConstantFPBuildVectorOrConstantFP(N0) &&
   11280             :      !isConstantFPBuildVectorOrConstantFP(N1))
   11281         132 :     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
   11282             : 
   11283        9700 :   if (UnsafeFPMath) {
   11284             :     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
   11285         110 :     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
   11286        2350 :         isConstantFPBuildVectorOrConstantFP(N1) &&
   11287          17 :         isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
   11288          17 :       return DAG.getNode(ISD::FMUL, DL, VT, N0,
   11289             :                          DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
   11290          17 :                                      Flags), Flags);
   11291             :     }
   11292             : 
   11293             :     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
   11294             :     if (N0.getOpcode() == ISD::FMUL &&
   11295        2333 :         isConstantFPBuildVectorOrConstantFP(N1) &&
   11296          17 :         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
   11297          17 :       return DAG.getNode(ISD::FMA, DL, VT,
   11298             :                          N0.getOperand(0),
   11299             :                          DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
   11300             :                                      Flags),
   11301          17 :                          N2);
   11302             :     }
   11303             :   }
   11304             : 
   11305             :   // (fma x, 1, y) -> (fadd x, y)
   11306             :   // (fma x, -1, y) -> (fadd (fneg x), y)
   11307        9666 :   if (N1CFP) {
   11308         340 :     if (N1CFP->isExactlyValue(1.0))
   11309             :       // TODO: The FMA node should have flags that propagate to this node.
   11310           0 :       return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
   11311             : 
   11312         340 :     if (N1CFP->isExactlyValue(-1.0) &&
   11313           1 :         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
   11314           2 :       SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
   11315           1 :       AddToWorklist(RHSNeg.getNode());
   11316             :       // TODO: The FMA node should have flags that propagate to this node.
   11317           2 :       return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
   11318             :     }
   11319             : 
   11320             :     // fma (fneg x), K, y -> fma x -K, y
   11321         339 :     if (N0.getOpcode() == ISD::FNEG &&
   11322          66 :         (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
   11323           0 :          (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT)))) {
   11324          18 :       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
   11325          18 :                          DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
   11326             :     }
   11327             :   }
   11328             : 
   11329        9647 :   if (UnsafeFPMath) {
   11330             :     // (fma x, c, x) -> (fmul x, (c+1))
   11331        2313 :     if (N1CFP && N0 == N2) {
   11332           1 :       return DAG.getNode(ISD::FMUL, DL, VT, N0,
   11333             :                          DAG.getNode(ISD::FADD, DL, VT, N1,
   11334             :                                      DAG.getConstantFP(1.0, DL, VT), Flags),
   11335           1 :                          Flags);
   11336             :     }
   11337             : 
   11338             :     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
   11339        2312 :     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
   11340           1 :       return DAG.getNode(ISD::FMUL, DL, VT, N0,
   11341             :                          DAG.getNode(ISD::FADD, DL, VT, N1,
   11342             :                                      DAG.getConstantFP(-1.0, DL, VT), Flags),
   11343           1 :                          Flags);
   11344             :     }
   11345             :   }
   11346             : 
   11347        9645 :   return SDValue();
   11348             : }
   11349             : 
   11350             : // Combine multiple FDIVs with the same divisor into multiple FMULs by the
   11351             : // reciprocal.
   11352             : // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
   11353             : // Notice that this is not always beneficial. One reason is different targets
   11354             : // may have different costs for FDIV and FMUL, so sometimes the cost of two
   11355             : // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
   11356             : // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
   11357        9461 : SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
   11358        9461 :   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
   11359        9461 :   const SDNodeFlags Flags = N->getFlags();
   11360        9461 :   if (!UnsafeMath && !Flags.hasAllowReciprocal())
   11361        8954 :     return SDValue();
   11362             : 
   11363             :   // Skip if current node is a reciprocal.
   11364         507 :   SDValue N0 = N->getOperand(0);
   11365             :   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   11366          87 :   if (N0CFP && N0CFP->isExactlyValue(1.0))
   11367          84 :     return SDValue();
   11368             : 
   11369             :   // Exit early if the target does not want this transform or if there can't
   11370             :   // possibly be enough uses of the divisor to make the transform worthwhile.
   11371         423 :   SDValue N1 = N->getOperand(1);
   11372         423 :   unsigned MinUses = TLI.combineRepeatedFPDivisors();
   11373         794 :   if (!MinUses || N1->use_size() < MinUses)
   11374         396 :     return SDValue();
   11375             : 
   11376             :   // Find all FDIV users of the same divisor.
   11377             :   // Use a set because duplicates may be present in the user list.
   11378          27 :   SetVector<SDNode *> Users;
   11379          86 :   for (auto *U : N1->uses()) {
   11380          59 :     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
   11381             :       // This division is eligible for optimization only if global unsafe math
   11382             :       // is enabled or if this division allows reciprocal formation.
   11383          57 :       if (UnsafeMath || U->getFlags().hasAllowReciprocal())
   11384          53 :         Users.insert(U);
   11385             :     }
   11386             :   }
   11387             : 
   11388             :   // Now that we have the actual number of divisor uses, make sure it meets
   11389             :   // the minimum threshold specified by the target.
   11390          27 :   if (Users.size() < MinUses)
   11391           7 :     return SDValue();
   11392             : 
   11393          40 :   EVT VT = N->getValueType(0);
   11394             :   SDLoc DL(N);
   11395          20 :   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
   11396          20 :   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
   11397             : 
   11398             :   // Dividend / Divisor -> Dividend * Reciprocal
   11399          65 :   for (auto *U : Users) {
   11400          45 :     SDValue Dividend = U->getOperand(0);
   11401             :     if (Dividend != FPOne) {
   11402          88 :       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
   11403          44 :                                     Reciprocal, Flags);
   11404          44 :       CombineTo(U, NewNode);
   11405           1 :     } else if (U != Reciprocal.getNode()) {
   11406             :       // In the absence of fast-math-flags, this user node is always the
   11407             :       // same node as Reciprocal, but with FMF they may be different nodes.
   11408           0 :       CombineTo(U, Reciprocal);
   11409             :     }
   11410             :   }
   11411          20 :   return SDValue(N, 0);  // N was replaced.
   11412             : }
   11413             : 
   11414       10233 : SDValue DAGCombiner::visitFDIV(SDNode *N) {
   11415       10233 :   SDValue N0 = N->getOperand(0);
   11416       10233 :   SDValue N1 = N->getOperand(1);
   11417             :   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   11418             :   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
   11419       20466 :   EVT VT = N->getValueType(0);
   11420             :   SDLoc DL(N);
   11421       10233 :   const TargetOptions &Options = DAG.getTarget().Options;
   11422       10233 :   SDNodeFlags Flags = N->getFlags();
   11423             : 
   11424             :   // fold vector ops
   11425       10233 :   if (VT.isVector())
   11426        1592 :     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   11427           0 :       return FoldedVOp;
   11428             : 
   11429             :   // fold (fdiv c1, c2) -> c1/c2
   11430       10233 :   if (N0CFP && N1CFP)
   11431           6 :     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
   11432             : 
   11433       10230 :   if (SDValue NewSel = foldBinOpIntoSelect(N))
   11434           8 :     return NewSel;
   11435             : 
   11436       10222 :   if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
   11437             :     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
   11438        1251 :     if (N1CFP) {
   11439             :       // Compute the reciprocal 1.0 / c2.
   11440          47 :       const APFloat &N1APF = N1CFP->getValueAPF();
   11441          47 :       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
   11442          47 :       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
   11443             :       // Only do the transform if the reciprocal is a legal fp immediate that
   11444             :       // isn't too nasty (eg NaN, denormal, ...).
   11445          47 :       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
   11446          43 :           (!LegalOperations ||
   11447             :            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
   11448             :            // backend)... we should handle this gracefully after Legalize.
   11449             :            // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
   11450           0 :            TLI.isOperationLegal(ISD::ConstantFP, VT) ||
   11451           0 :            TLI.isFPImmLegal(Recip, VT)))
   11452          43 :         return DAG.getNode(ISD::FMUL, DL, VT, N0,
   11453          43 :                            DAG.getConstantFP(Recip, DL, VT), Flags);
   11454             :     }
   11455             : 
   11456             :     // If this FDIV is part of a reciprocal square root, it may be folded
   11457             :     // into a target-specific square root estimate instruction.
   11458        1208 :     if (N1.getOpcode() == ISD::FSQRT) {
   11459         124 :       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) {
   11460          55 :         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
   11461             :       }
   11462        1084 :     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
   11463           2 :                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
   11464           2 :       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
   11465           2 :                                           Flags)) {
   11466           2 :         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
   11467           2 :         AddToWorklist(RV.getNode());
   11468           2 :         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
   11469             :       }
   11470        1082 :     } else if (N1.getOpcode() == ISD::FP_ROUND &&
   11471           2 :                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
   11472           2 :       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
   11473           2 :                                           Flags)) {
   11474           2 :         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
   11475           2 :         AddToWorklist(RV.getNode());
   11476           2 :         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
   11477             :       }
   11478        1080 :     } else if (N1.getOpcode() == ISD::FMUL) {
   11479             :       // Look through an FMUL. Even though this won't remove the FDIV directly,
   11480             :       // it's still worthwhile to get rid of the FSQRT if possible.
   11481             :       SDValue SqrtOp;
   11482           2 :       SDValue OtherOp;
   11483           4 :       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
   11484             :         SqrtOp = N1.getOperand(0);
   11485           2 :         OtherOp = N1.getOperand(1);
   11486           0 :       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
   11487             :         SqrtOp = N1.getOperand(1);
   11488           0 :         OtherOp = N1.getOperand(0);
   11489             :       }
   11490           2 :       if (SqrtOp.getNode()) {
   11491             :         // We found a FSQRT, so try to make this fold:
   11492             :         // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
   11493           2 :         if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
   11494           2 :           RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
   11495           2 :           AddToWorklist(RV.getNode());
   11496           2 :           return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
   11497             :         }
   11498             :       }
   11499             :     }
   11500             : 
   11501             :     // Fold into a reciprocal estimate and multiply instead of a real divide.
   11502        1147 :     if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
   11503         640 :       AddToWorklist(RV.getNode());
   11504         640 :       return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
   11505             :     }
   11506             :   }
   11507             : 
   11508             :   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
   11509        9478 :   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
   11510         280 :     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
   11511             :       // Both can be negated for free, check to see if at least one is cheaper
   11512             :       // negated.
   11513          21 :       if (LHSNeg == 2 || RHSNeg == 2)
   11514          17 :         return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
   11515          17 :                            GetNegatedExpression(N0, DAG, LegalOperations),
   11516          17 :                            GetNegatedExpression(N1, DAG, LegalOperations),
   11517          34 :                            Flags);
   11518             :     }
   11519             :   }
   11520             : 
   11521        9461 :   if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N))
   11522          20 :     return CombineRepeatedDivisors;
   11523             : 
   11524        9441 :   return SDValue();
   11525             : }
   11526             : 
   11527         283 : SDValue DAGCombiner::visitFREM(SDNode *N) {
   11528         283 :   SDValue N0 = N->getOperand(0);
   11529         283 :   SDValue N1 = N->getOperand(1);
   11530             :   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   11531             :   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
   11532         283 :   EVT VT = N->getValueType(0);
   11533             : 
   11534             :   // fold (frem c1, c2) -> fmod(c1,c2)
   11535         283 :   if (N0CFP && N1CFP)
   11536          27 :     return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
   11537             : 
   11538         274 :   if (SDValue NewSel = foldBinOpIntoSelect(N))
   11539           7 :     return NewSel;
   11540             : 
   11541         267 :   return SDValue();
   11542             : }
   11543             : 
   11544        2134 : SDValue DAGCombiner::visitFSQRT(SDNode *N) {
   11545        2134 :   SDNodeFlags Flags = N->getFlags();
   11546        2134 :   if (!DAG.getTarget().Options.UnsafeFPMath &&
   11547             :       !Flags.hasApproximateFuncs())
   11548        1849 :     return SDValue();
   11549             : 
   11550         285 :   SDValue N0 = N->getOperand(0);
   11551         285 :   if (TLI.isFsqrtCheap(N0, DAG))
   11552          47 :     return SDValue();
   11553             : 
   11554             :   // FSQRT nodes have flags that propagate to the created nodes.
   11555         238 :   return buildSqrtEstimate(N0, Flags);
   11556             : }
   11557             : 
   11558             : /// copysign(x, fp_extend(y)) -> copysign(x, y)
   11559             : /// copysign(x, fp_round(y)) -> copysign(x, y)
   11560        1429 : static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
   11561        1429 :   SDValue N1 = N->getOperand(1);
   11562        1429 :   if ((N1.getOpcode() == ISD::FP_EXTEND ||
   11563             :        N1.getOpcode() == ISD::FP_ROUND)) {
   11564             :     // Do not optimize out type conversion of f128 type yet.
   11565             :     // For some targets like x86_64, configuration is changed to keep one f128
   11566             :     // value in one SSE register, but instruction selection cannot handle
   11567             :     // FCOPYSIGN on SSE registers yet.
   11568          91 :     EVT N1VT = N1->getValueType(0);
   11569          91 :     EVT N1Op0VT = N1->getOperand(0).getValueType();
   11570           0 :     return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
   11571             :   }
   11572             :   return false;
   11573             : }
   11574             : 
   11575        1440 : SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
   11576        1440 :   SDValue N0 = N->getOperand(0);
   11577        1440 :   SDValue N1 = N->getOperand(1);
   11578             :   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   11579             :   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
   11580        1440 :   EVT VT = N->getValueType(0);
   11581             : 
   11582        1440 :   if (N0CFP && N1CFP) // Constant fold
   11583           0 :     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
   11584             : 
   11585        1440 :   if (N1CFP) {
   11586           1 :     const APFloat &V = N1CFP->getValueAPF();
   11587             :     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
   11588             :     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
   11589           1 :     if (!V.isNegative()) {
   11590           1 :       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
   11591           2 :         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
   11592             :     } else {
   11593           0 :       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
   11594           0 :         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
   11595           0 :                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
   11596             :     }
   11597             :   }
   11598             : 
   11599             :   // copysign(fabs(x), y) -> copysign(x, y)
   11600             :   // copysign(fneg(x), y) -> copysign(x, y)
   11601             :   // copysign(copysign(x,z), y) -> copysign(x, y)
   11602        1439 :   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
   11603             :       N0.getOpcode() == ISD::FCOPYSIGN)
   11604          12 :     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
   11605             : 
   11606             :   // copysign(x, abs(y)) -> abs(x)
   11607        1433 :   if (N1.getOpcode() == ISD::FABS)
   11608           4 :     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
   11609             : 
   11610             :   // copysign(x, copysign(y,z)) -> copysign(x, z)
   11611        1431 :   if (N1.getOpcode() == ISD::FCOPYSIGN)
   11612           4 :     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
   11613             : 
   11614             :   // copysign(x, fp_extend(y)) -> copysign(x, y)
   11615             :   // copysign(x, fp_round(y)) -> copysign(x, y)
   11616        1429 :   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
   11617         158 :     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
   11618             : 
   11619        1350 :   return SDValue();
   11620             : }
   11621             : 
   11622           0 : SDValue DAGCombiner::visitFPOW(SDNode *N) {
   11623           0 :   ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
   11624           0 :   if (!ExponentC)
   11625           0 :     return SDValue();
   11626             : 
   11627             :   // Try to convert x ** (1/3) into cube root.
   11628             :   // TODO: Handle the various flavors of long double.
   11629             :   // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
   11630             :   //       Some range near 1/3 should be fine.
   11631           0 :   EVT VT = N->getValueType(0);
   11632           0 :   if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
   11633           0 :       (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
   11634             :     // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
   11635             :     // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
   11636             :     // pow(-val, 1/3) =  nan; cbrt(-val) = -num.
   11637             :     // For regular numbers, rounding may cause the results to differ.
   11638             :     // Therefore, we require { nsz ninf nnan afn } for this transform.
   11639             :     // TODO: We could select out the special cases if we don't have nsz/ninf.
   11640           0 :     SDNodeFlags Flags = N->getFlags();
   11641           0 :     if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
   11642             :         !Flags.hasApproximateFuncs())
   11643           0 :       return SDValue();
   11644             : 
   11645             :     // Do not create a cbrt() libcall if the target does not have it, and do not
   11646             :     // turn a pow that has lowering support into a cbrt() libcall.
   11647           0 :     if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
   11648           0 :         (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
   11649             :          DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
   11650           0 :       return SDValue();
   11651             : 
   11652           0 :     return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags);
   11653             :   }
   11654             : 
   11655             :   // Try to convert x ** (1/4) into square roots.
   11656             :   // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
   11657             :   // TODO: This could be extended (using a target hook) to handle smaller
   11658             :   // power-of-2 fractional exponents.
   11659           0 :   if (ExponentC->getValueAPF().isExactlyValue(0.25)) {
   11660             :     // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
   11661             :     // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) =  NaN.
   11662             :     // For regular numbers, rounding may cause the results to differ.
   11663             :     // Therefore, we require { nsz ninf afn } for this transform.
   11664             :     // TODO: We could select out the special cases if we don't have nsz/ninf.
   11665           0 :     SDNodeFlags Flags = N->getFlags();
   11666           0 :     if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() ||
   11667             :         !Flags.hasApproximateFuncs())
   11668           0 :       return SDValue();
   11669             : 
   11670             :     // Don't double the number of libcalls. We are trying to inline fast code.
   11671           0 :     if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
   11672           0 :       return SDValue();
   11673             : 
   11674             :     // Assume that libcalls are the smallest code.
   11675             :     // TODO: This restriction should probably be lifted for vectors.
   11676           0 :     if (DAG.getMachineFunction().getFunction().optForSize())
   11677           0 :       return SDValue();
   11678             : 
   11679             :     // pow(X, 0.25) --> sqrt(sqrt(X))
   11680             :     SDLoc DL(N);
   11681           0 :     SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0), Flags);
   11682           0 :     return DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags);
   11683             :   }
   11684             : 
   11685           0 :   return SDValue();
   11686             : }
   11687             : 
   11688       34408 : static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
   11689             :                                const TargetLowering &TLI) {
   11690             :   // This optimization is guarded by a function attribute because it may produce
   11691             :   // unexpected results. Ie, programs may be relying on the platform-specific
   11692             :   // undefined behavior when the float-to-int conversion overflows.
   11693       34408 :   const Function &F = DAG.getMachineFunction().getFunction();
   11694       34408 :   Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
   11695       68801 :   if (StrictOverflow.getValueAsString().equals("false"))
   11696          15 :     return SDValue();
   11697             : 
   11698             :   // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
   11699             :   // replacing casts with a libcall. We also must be allowed to ignore -0.0
   11700             :   // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
   11701             :   // conversions would return +0.0.
   11702             :   // FIXME: We should be able to use node-level FMF here.
   11703             :   // TODO: If strict math, should we use FABS (+ range check for signed cast)?
   11704       68786 :   EVT VT = N->getValueType(0);
   11705       11533 :   if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
   11706       11533 :       !DAG.getTarget().Options.NoSignedZerosFPMath)
   11707       34262 :     return SDValue();
   11708             : 
   11709             :   // fptosi/fptoui round towards zero, so converting from FP to integer and
   11710             :   // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
   11711         131 :   SDValue N0 = N->getOperand(0);
   11712         262 :   if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
   11713          20 :       N0.getOperand(0).getValueType() == VT)
   11714          40 :     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
   11715             : 
   11716         111 :   if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
   11717          19 :       N0.getOperand(0).getValueType() == VT)
   11718          38 :     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
   11719             : 
   11720          92 :   return SDValue();
   11721             : }
   11722             : 
   11723       21933 : SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
   11724       21933 :   SDValue N0 = N->getOperand(0);
   11725       43866 :   EVT VT = N->getValueType(0);
   11726       21933 :   EVT OpVT = N0.getValueType();
   11727             : 
   11728             :   // fold (sint_to_fp c1) -> c1fp
   11729       21933 :   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
   11730             :       // ...but only if the target supports immediate floating-point values
   11731           5 :       (!LegalOperations ||
   11732           1 :        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
   11733           8 :     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
   11734             : 
   11735             :   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
   11736             :   // but UINT_TO_FP is legal on this target, try to convert.
   11737       36135 :   if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
   11738       14206 :       hasOperation(ISD::UINT_TO_FP, OpVT)) {
   11739             :     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
   11740           0 :     if (DAG.SignBitIsZero(N0))
   11741           0 :       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
   11742             :   }
   11743             : 
   11744             :   // The next optimizations are desirable only if SELECT_CC can be lowered.
   11745       42846 :   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
   11746             :     // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
   11747           4 :     if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
   11748        7486 :         !VT.isVector() &&
   11749           4 :         (!LegalOperations ||
   11750           0 :          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
   11751             :       SDLoc DL(N);
   11752             :       SDValue Ops[] =
   11753             :         { N0.getOperand(0), N0.getOperand(1),
   11754           4 :           DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
   11755           4 :           N0.getOperand(2) };
   11756           8 :       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
   11757             :     }
   11758             : 
   11759             :     // fold (sint_to_fp (zext (setcc x, y, cc))) ->
   11760             :     //      (select_cc x, y, 1.0, 0.0,, cc)
   11761         654 :     if (N0.getOpcode() == ISD::ZERO_EXTEND &&
   11762        7597 :         N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
   11763          26 :         (!LegalOperations ||
   11764           0 :          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
   11765             :       SDLoc DL(N);
   11766             :       SDValue Ops[] =
   11767          26 :         { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
   11768          26 :           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
   11769          78 :           N0.getOperand(0).getOperand(2) };
   11770          52 :       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
   11771             :     }
   11772             :   }
   11773             : 
   11774       21899 :   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
   11775          20 :     return FTrunc;
   11776             : 
   11777       21879 :   return SDValue();
   11778             : }
   11779             : 
   11780       12560 : SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
   11781       12560 :   SDValue N0 = N->getOperand(0);
   11782       25120 :   EVT VT = N->getValueType(0);
   11783       12560 :   EVT OpVT = N0.getValueType();
   11784             : 
   11785             :   // fold (uint_to_fp c1) -> c1fp
   11786       12560 :   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
   11787             :       // ...but only if the target supports immediate floating-point values
   11788           0 :       (!LegalOperations ||
   11789           0 :        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
   11790           0 :     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
   11791             : 
   11792             :   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
   11793             :   // but SINT_TO_FP is legal on this target, try to convert.
   11794       16427 :   if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
   11795        3867 :       hasOperation(ISD::SINT_TO_FP, OpVT)) {
   11796             :     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
   11797        1292 :     if (DAG.SignBitIsZero(N0))
   11798          40 :       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
   11799             :   }
   11800             : 
   11801             :   // The next optimizations are desirable only if SELECT_CC can be lowered.
   11802       24208 :   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
   11803             :     // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
   11804       10709 :     if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
   11805          31 :         (!LegalOperations ||
   11806           0 :          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
   11807             :       SDLoc DL(N);
   11808             :       SDValue Ops[] =
   11809             :         { N0.getOperand(0), N0.getOperand(1),
   11810          31 :           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
   11811          31 :           N0.getOperand(2) };
   11812          62 :       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
   11813             :     }
   11814             :   }
   11815             : 
   11816       12509 :   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
   11817          19 :     return FTrunc;
   11818             : 
   11819       12490 :   return SDValue();
   11820             : }
   11821             : 
   11822             : // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
   11823       15241 : static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
   11824       15241 :   SDValue N0 = N->getOperand(0);
   11825       30482 :   EVT VT = N->getValueType(0);
   11826             : 
   11827       15241 :   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
   11828       15206 :     return SDValue();
   11829             : 
   11830          35 :   SDValue Src = N0.getOperand(0);
   11831          35 :   EVT SrcVT = Src.getValueType();
   11832          35 :   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
   11833          35 :   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
   11834             : 
   11835             :   // We can safely assume the conversion won't overflow the output range,
   11836             :   // because (for example) (uint8_t)18293.f is undefined behavior.
   11837             : 
   11838             :   // Since we can assume the conversion won't overflow, our decision as to
   11839             :   // whether the input will fit in the float should depend on the minimum
   11840             :   // of the input range and output range.
   11841             : 
   11842             :   // This means this is also safe for a signed input and unsigned output, since
   11843             :   // a negative input would lead to undefined behavior.
   11844          35 :   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
   11845          35 :   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
   11846          35 :   unsigned ActualSize = std::min(InputSize, OutputSize);
   11847          35 :   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
   11848             : 
   11849             :   // We can only fold away the float conversion if the input range can be
   11850             :   // represented exactly in the float range.
   11851          35 :   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
   11852           5 :     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
   11853           3 :       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
   11854             :                                                        : ISD::ZERO_EXTEND;
   11855           6 :       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
   11856             :     }
   11857           2 :     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
   11858           2 :       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
   11859           1 :     return DAG.getBitcast(VT, Src);
   11860             :   }
   11861          30 :   return SDValue();
   11862             : }
   11863             : 
   11864           0 : SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
   11865           0 :   SDValue N0 = N->getOperand(0);
   11866           0 :   EVT VT = N->getValueType(0);
   11867             : 
   11868             :   // fold (fp_to_sint c1fp) -> c1
   11869             :   if (isConstantFPBuildVectorOrConstantFP(N0))
   11870           0 :     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
   11871             : 
   11872           0 :   return FoldIntToFPToInt(N, DAG);
   11873             : }
   11874             : 
   11875           0 : SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
   11876           0 :   SDValue N0 = N->getOperand(0);
   11877           0 :   EVT VT = N->getValueType(0);
   11878             : 
   11879             :   // fold (fp_to_uint c1fp) -> c1
   11880             :   if (isConstantFPBuildVectorOrConstantFP(N0))
   11881           0 :     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
   11882             : 
   11883           0 :   return FoldIntToFPToInt(N, DAG);
   11884             : }
   11885             : 
   11886        4291 : SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
   11887        4291 :   SDValue N0 = N->getOperand(0);
   11888        4291 :   SDValue N1 = N->getOperand(1);
   11889             :   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   11890        4291 :   EVT VT = N->getValueType(0);
   11891             : 
   11892             :   // fold (fp_round c1fp) -> c1fp
   11893        4291 :   if (N0CFP)
   11894           4 :     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
   11895             : 
   11896             :   // fold (fp_round (fp_extend x)) -> x
   11897        4289 :   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
   11898           6 :     return N0.getOperand(0);
   11899             : 
   11900             :   // fold (fp_round (fp_round x)) -> (fp_round x)
   11901        4283 :   if (N0.getOpcode() == ISD::FP_ROUND) {
   11902          19 :     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
   11903          19 :     const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
   11904             : 
   11905             :     // Skip this folding if it results in an fp_round from f80 to f16.
   11906             :     //
   11907             :     // f80 to f16 always generates an expensive (and as yet, unimplemented)
   11908             :     // libcall to __truncxfhf2 instead of selecting native f16 conversion
   11909             :     // instructions from f32 or f64.  Moreover, the first (value-preserving)
   11910             :     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
   11911             :     // x86.
   11912          19 :     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
   11913           1 :       return SDValue();
   11914             : 
   11915             :     // If the first fp_round isn't a value preserving truncation, it might
   11916             :     // introduce a tie in the second fp_round, that wouldn't occur in the
   11917             :     // single-step fp_round we want to fold to.
   11918             :     // In other words, double rounding isn't the same as rounding.
   11919             :     // Also, this is a value preserving truncation iff both fp_round's are.
   11920          18 :     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
   11921             :       SDLoc DL(N);
   11922           7 :       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
   11923           7 :                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
   11924             :     }
   11925             :   }
   11926             : 
   11927             :   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
   11928        4275 :   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
   11929           8 :     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
   11930           8 :                               N0.getOperand(0), N1);
   11931           8 :     AddToWorklist(Tmp.getNode());
   11932           8 :     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
   11933          16 :                        Tmp, N0.getOperand(1));
   11934             :   }
   11935             : 
   11936        4267 :   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
   11937           4 :     return NewVSel;
   11938             : 
   11939        4263 :   return SDValue();
   11940             : }
   11941             : 
   11942           0 : SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
   11943           0 :   SDValue N0 = N->getOperand(0);
   11944           0 :   EVT VT = N->getValueType(0);
   11945           0 :   EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
   11946             :   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   11947             : 
   11948             :   // fold (fp_round_inreg c1fp) -> c1fp
   11949           0 :   if (N0CFP && isTypeLegal(EVT)) {
   11950             :     SDLoc DL(N);
   11951           0 :     SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
   11952           0 :     return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
   11953             :   }
   11954             : 
   11955           0 :   return SDValue();
   11956             : }
   11957             : 
   11958        9165 : SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
   11959        9165 :   SDValue N0 = N->getOperand(0);
   11960       18330 :   EVT VT = N->getValueType(0);
   11961             : 
   11962             :   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
   11963        8648 :   if (N->hasOneUse() &&
   11964        8648 :       N->use_begin()->getOpcode() == ISD::FP_ROUND)
   11965           0 :     return SDValue();
   11966             : 
   11967             :   // fold (fp_extend c1fp) -> c1fp
   11968             :   if (isConstantFPBuildVectorOrConstantFP(N0))
   11969          20 :     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
   11970             : 
   11971             :   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
   11972        9995 :   if (N0.getOpcode() == ISD::FP16_TO_FP &&
   11973         840 :       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
   11974          10 :     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
   11975             : 
   11976             :   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
   11977             :   // value of X.
   11978             :   if (N0.getOpcode() == ISD::FP_ROUND
   11979        9386 :       && N0.getConstantOperandVal(1) == 1) {
   11980         133 :     SDValue In = N0.getOperand(0);
   11981         261 :     if (In.getValueType() == VT) return In;
   11982           5 :     if (VT.bitsLT(In.getValueType()))
   11983           5 :       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
   11984          10 :                          In, N0.getOperand(1));
   11985           0 :     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
   11986             :   }
   11987             : 
   11988             :   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
   11989        3548 :   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
   11990        3242 :        TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
   11991             :     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   11992         810 :     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
   11993             :                                      LN0->getChain(),
   11994             :                                      LN0->getBasePtr(), N0.getValueType(),
   11995         811 :                                      LN0->getMemOperand());
   11996         810 :     CombineTo(N, ExtLoad);
   11997             :     CombineTo(N0.getNode(),
   11998         810 :               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
   11999             :                           N0.getValueType(), ExtLoad,
   12000         810 :                           DAG.getIntPtrConstant(1, SDLoc(N0))),
   12001        1621 :               ExtLoad.getValue(1));
   12002         810 :     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   12003             :   }
   12004             : 
   12005        8207 :   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
   12006           2 :     return NewVSel;
   12007             : 
   12008        8205 :   return SDValue();
   12009             : }
   12010             : 
   12011           0 : SDValue DAGCombiner::visitFCEIL(SDNode *N) {
   12012           0 :   SDValue N0 = N->getOperand(0);
   12013           0 :   EVT VT = N->getValueType(0);
   12014             : 
   12015             :   // fold (fceil c1) -> fceil(c1)
   12016             :   if (isConstantFPBuildVectorOrConstantFP(N0))
   12017           0 :     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
   12018             : 
   12019           0 :   return SDValue();
   12020             : }
   12021             : 
   12022           0 : SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
   12023           0 :   SDValue N0 = N->getOperand(0);
   12024           0 :   EVT VT = N->getValueType(0);
   12025             : 
   12026             :   // fold (ftrunc c1) -> ftrunc(c1)
   12027             :   if (isConstantFPBuildVectorOrConstantFP(N0))
   12028           0 :     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
   12029             : 
   12030             :   // fold ftrunc (known rounded int x) -> x
   12031             :   // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
   12032             :   // likely to be generated to extract integer from a rounded floating value.
   12033           0 :   switch (N0.getOpcode()) {
   12034             :   default: break;
   12035           0 :   case ISD::FRINT:
   12036             :   case ISD::FTRUNC:
   12037             :   case ISD::FNEARBYINT:
   12038             :   case ISD::FFLOOR:
   12039             :   case ISD::FCEIL:
   12040           0 :     return N0;
   12041             :   }
   12042             : 
   12043           0 :   return SDValue();
   12044             : }
   12045             : 
   12046           0 : SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
   12047           0 :   SDValue N0 = N->getOperand(0);
   12048           0 :   EVT VT = N->getValueType(0);
   12049             : 
   12050             :   // fold (ffloor c1) -> ffloor(c1)
   12051             :   if (isConstantFPBuildVectorOrConstantFP(N0))
   12052           0 :     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
   12053             : 
   12054           0 :   return SDValue();
   12055             : }
   12056             : 
   12057             : // FIXME: FNEG and FABS have a lot in common; refactor.
   12058        6789 : SDValue DAGCombiner::visitFNEG(SDNode *N) {
   12059        6789 :   SDValue N0 = N->getOperand(0);
   12060        6789 :   EVT VT = N->getValueType(0);
   12061             : 
   12062             :   // Constant fold FNEG.
   12063        6789 :   if (isConstantFPBuildVectorOrConstantFP(N0))
   12064           0 :     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
   12065             : 
   12066        6789 :   if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
   12067        6789 :                          &DAG.getTarget().Options))
   12068         101 :     return GetNegatedExpression(N0, DAG, LegalOperations);
   12069             : 
   12070             :   // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
   12071             :   // constant pool values.
   12072        9424 :   if (!TLI.isFNegFree(VT) &&
   12073        6688 :       N0.getOpcode() == ISD::BITCAST &&
   12074             :       N0.getNode()->hasOneUse()) {
   12075         204 :     SDValue Int = N0.getOperand(0);
   12076         204 :     EVT IntVT = Int.getValueType();
   12077         384 :     if (IntVT.isInteger() && !IntVT.isVector()) {
   12078             :       APInt SignMask;
   12079         126 :       if (N0.getValueType().isVector()) {
   12080             :         // For a vector, get a mask such as 0x80... per scalar element
   12081             :         // and splat it.
   12082          32 :         SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
   12083          64 :         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
   12084             :       } else {
   12085             :         // For a scalar, just generate 0x80...
   12086          20 :         SignMask = APInt::getSignMask(IntVT.getSizeInBits());
   12087             :       }
   12088             :       SDLoc DL0(N0);
   12089          42 :       Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
   12090          42 :                         DAG.getConstant(SignMask, DL0, IntVT));
   12091          42 :       AddToWorklist(Int.getNode());
   12092          42 :       return DAG.getBitcast(VT, Int);
   12093             :     }
   12094             :   }
   12095             : 
   12096             :   // (fneg (fmul c, x)) -> (fmul -c, x)
   12097       13292 :   if (N0.getOpcode() == ISD::FMUL &&
   12098          71 :       (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
   12099         450 :     ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
   12100             :     if (CFP1) {
   12101           6 :       APFloat CVal = CFP1->getValueAPF();
   12102           6 :       CVal.changeSign();
   12103           8 :       if (Level >= AfterLegalizeDAG &&
   12104           2 :           (TLI.isFPImmLegal(CVal, VT) ||
   12105           1 :            TLI.isOperationLegal(ISD::ConstantFP, VT)))
   12106           1 :         return DAG.getNode(
   12107           2 :             ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
   12108           1 :             DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
   12109           4 :             N0->getFlags());
   12110             :     }
   12111             :   }
   12112             : 
   12113        6645 :   return SDValue();
   12114             : }
   12115             : 
   12116           0 : SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
   12117           0 :   SDValue N0 = N->getOperand(0);
   12118           0 :   SDValue N1 = N->getOperand(1);
   12119           0 :   EVT VT = N->getValueType(0);
   12120           0 :   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
   12121           0 :   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
   12122             : 
   12123           0 :   if (N0CFP && N1CFP) {
   12124           0 :     const APFloat &C0 = N0CFP->getValueAPF();
   12125           0 :     const APFloat &C1 = N1CFP->getValueAPF();
   12126           0 :     return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT);
   12127             :   }
   12128             : 
   12129             :   // Canonicalize to constant on RHS.
   12130             :   if (isConstantFPBuildVectorOrConstantFP(N0) &&
   12131             :      !isConstantFPBuildVectorOrConstantFP(N1))
   12132           0 :     return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0);
   12133             : 
   12134           0 :   return SDValue();
   12135             : }
   12136             : 
   12137           0 : SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
   12138           0 :   SDValue N0 = N->getOperand(0);
   12139           0 :   SDValue N1 = N->getOperand(1);
   12140           0 :   EVT VT = N->getValueType(0);
   12141           0 :   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
   12142           0 :   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
   12143             : 
   12144           0 :   if (N0CFP && N1CFP) {
   12145           0 :     const APFloat &C0 = N0CFP->getValueAPF();
   12146           0 :     const APFloat &C1 = N1CFP->getValueAPF();
   12147           0 :     return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT);
   12148             :   }
   12149             : 
   12150             :   // Canonicalize to constant on RHS.
   12151             :   if (isConstantFPBuildVectorOrConstantFP(N0) &&
   12152             :      !isConstantFPBuildVectorOrConstantFP(N1))
   12153           0 :     return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0);
   12154             : 
   12155           0 :   return SDValue();
   12156             : }
   12157             : 
   12158        5148 : SDValue DAGCombiner::visitFABS(SDNode *N) {
   12159        5148 :   SDValue N0 = N->getOperand(0);
   12160        5148 :   EVT VT = N->getValueType(0);
   12161             : 
   12162             :   // fold (fabs c1) -> fabs(c1)
   12163        5148 :   if (isConstantFPBuildVectorOrConstantFP(N0))
   12164           0 :     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
   12165             : 
   12166             :   // fold (fabs (fabs x)) -> (fabs x)
   12167       10296 :   if (N0.getOpcode() == ISD::FABS)
   12168           4 :     return N->getOperand(0);
   12169             : 
   12170             :   // fold (fabs (fneg x)) -> (fabs x)
   12171             :   // fold (fabs (fcopysign x, y)) -> (fabs x)
   12172        5144 :   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
   12173           8 :     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
   12174             : 
   12175             :   // fabs(bitcast(x)) -> bitcast(x & ~sign) to avoid constant pool loads.
   12176        5140 :   if (!TLI.isFAbsFree(VT) && N0.getOpcode() == ISD::BITCAST && N0.hasOneUse()) {
   12177         202 :     SDValue Int = N0.getOperand(0);
   12178         101 :     EVT IntVT = Int.getValueType();
   12179         179 :     if (IntVT.isInteger() && !IntVT.isVector()) {
   12180             :       APInt SignMask;
   12181         168 :       if (N0.getValueType().isVector()) {
   12182             :         // For a vector, get a mask such as 0x7f... per scalar element
   12183             :         // and splat it.
   12184          46 :         SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
   12185          92 :         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
   12186             :       } else {
   12187             :         // For a scalar, just generate 0x7f...
   12188          20 :         SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
   12189             :       }
   12190             :       SDLoc DL(N0);
   12191          56 :       Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
   12192          56 :                         DAG.getConstant(SignMask, DL, IntVT));
   12193          56 :       AddToWorklist(Int.getNode());
   12194         112 :       return DAG.getBitcast(N->getValueType(0), Int);
   12195             :     }
   12196             :   }
   12197             : 
   12198        5084 :   return SDValue();
   12199             : }
   12200             : 
   12201      258065 : SDValue DAGCombiner::visitBRCOND(SDNode *N) {
   12202      258065 :   SDValue Chain = N->getOperand(0);
   12203      258065 :   SDValue N1 = N->getOperand(1);
   12204      258065 :   SDValue N2 = N->getOperand(2);
   12205             : 
   12206             :   // If N is a constant we could fold this into a fallthrough or unconditional
   12207             :   // branch. However that doesn't happen very often in normal code, because
   12208             :   // Instcombine/SimplifyCFG should have handled the available opportunities.
   12209             :   // If we did this folding here, it would be necessary to update the
   12210             :   // MachineBasicBlock CFG, which is awkward.
   12211             : 
   12212             :   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
   12213             :   // on the target.
   12214      258065 :   if (N1.getOpcode() == ISD::SETCC &&
   12215      434520 :       TLI.isOperationLegalOrCustom(ISD::BR_CC,
   12216             :                                    N1.getOperand(0).getValueType())) {
   12217        4240 :     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
   12218             :                        Chain, N1.getOperand(2),
   12219        8480 :                        N1.getOperand(0), N1.getOperand(1), N2);
   12220             :   }
   12221             : 
   12222      253825 :   if (N1.hasOneUse()) {
   12223      248932 :     if (SDValue NewN1 = rebuildSetCC(N1))
   12224       79736 :       return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2);
   12225             :   }
   12226             : 
   12227      223965 :   return SDValue();
   12228             : }
   12229             : 
   12230      249617 : SDValue DAGCombiner::rebuildSetCC(SDValue N) {
   12231      249617 :   if (N.getOpcode() == ISD::SRL ||
   12232        1485 :       (N.getOpcode() == ISD::TRUNCATE &&
   12233        2897 :        (N.getOperand(0).hasOneUse() &&
   12234        1412 :         N.getOperand(0).getOpcode() == ISD::SRL))) {
   12235             :     // Look pass the truncate.
   12236         119 :     if (N.getOpcode() == ISD::TRUNCATE)
   12237          89 :       N = N.getOperand(0);
   12238             : 
   12239             :     // Match this pattern so that we can generate simpler code:
   12240             :     //
   12241             :     //   %a = ...
   12242             :     //   %b = and i32 %a, 2
   12243             :     //   %c = srl i32 %b, 1
   12244             :     //   brcond i32 %c ...
   12245             :     //
   12246             :     // into
   12247             :     //
   12248             :     //   %a = ...
   12249             :     //   %b = and i32 %a, 2
   12250             :     //   %c = setcc eq %b, 0
   12251             :     //   brcond %c ...
   12252             :     //
   12253             :     // This applies only when the AND constant value has one bit set and the
   12254             :     // SRL constant is equal to the log2 of the AND constant. The back-end is
   12255             :     // smart enough to convert the result into a TEST/JMP sequence.
   12256         119 :     SDValue Op0 = N.getOperand(0);
   12257         119 :     SDValue Op1 = N.getOperand(1);
   12258             : 
   12259         119 :     if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
   12260         118 :       SDValue AndOp1 = Op0.getOperand(1);
   12261             : 
   12262         118 :       if (AndOp1.getOpcode() == ISD::Constant) {
   12263         118 :         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
   12264             : 
   12265         118 :         if (AndConst.isPowerOf2() &&
   12266         236 :             cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
   12267             :           SDLoc DL(N);
   12268         118 :           return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
   12269             :                               Op0, DAG.getConstant(0, DL, Op0.getValueType()),
   12270         118 :                               ISD::SETNE);
   12271             :         }
   12272             :       }
   12273             :     }
   12274             :   }
   12275             : 
   12276             :   // Transform br(xor(x, y)) -> br(x != y)
   12277             :   // Transform br(xor(xor(x,y), 1)) -> br (x == y)
   12278      249499 :   if (N.getOpcode() == ISD::XOR) {
   12279             :     // Because we may call this on a speculatively constructed
   12280             :     // SimplifiedSetCC Node, we need to simplify this node first.
   12281             :     // Ideally this should be folded into SimplifySetCC and not
   12282             :     // here. For now, grab a handle to N so we don't lose it from
   12283             :     // replacements interal to the visit.
   12284       30835 :     HandleSDNode XORHandle(N);
   12285       52403 :     while (N.getOpcode() == ISD::XOR) {
   12286       30353 :       SDValue Tmp = visitXOR(N.getNode());
   12287             :       // No simplification done.
   12288       30353 :       if (!Tmp.getNode())
   12289             :         break;
   12290             :       // Returning N is form in-visit replacement that may invalidated
   12291             :       // N. Grab value from Handle.
   12292       22057 :       if (Tmp.getNode() == N.getNode())
   12293           8 :         N = XORHandle.getValue();
   12294             :       else // Node simplified. Try simplifying again.
   12295       22049 :         N = Tmp;
   12296             :     }
   12297             : 
   12298       30346 :     if (N.getOpcode() != ISD::XOR)
   12299       29857 :       return N;
   12300             : 
   12301             :     SDNode *TheXor = N.getNode();
   12302             : 
   12303        8296 :     SDValue Op0 = TheXor->getOperand(0);
   12304        8296 :     SDValue Op1 = TheXor->getOperand(1);
   12305             : 
   12306        8296 :     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
   12307             :       bool Equal = false;
   12308        7807 :       if (isOneConstant(Op0) && Op0.hasOneUse() &&
   12309             :           Op0.getOpcode() == ISD::XOR) {
   12310             :         TheXor = Op0.getNode();
   12311             :         Equal = true;
   12312             :       }
   12313             : 
   12314        7807 :       EVT SetCCVT = N.getValueType();
   12315        7807 :       if (LegalTypes)
   12316           4 :         SetCCVT = getSetCCResultType(SetCCVT);
   12317             :       // Replace the uses of XOR with SETCC
   12318        7807 :       return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1,
   12319       15614 :                           Equal ? ISD::SETEQ : ISD::SETNE);
   12320             :     }
   12321             :   }
   12322             : 
   12323      219642 :   return SDValue();
   12324             : }
   12325             : 
   12326             : // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
   12327             : //
   12328        6356 : SDValue DAGCombiner::visitBR_CC(SDNode *N) {
   12329        6356 :   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
   12330        6356 :   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
   12331             : 
   12332             :   // If N is a constant we could fold this into a fallthrough or unconditional
   12333             :   // branch. However that doesn't happen very often in normal code, because
   12334             :   // Instcombine/SimplifyCFG should have handled the available opportunities.
   12335             :   // If we did this folding here, it would be necessary to update the
   12336             :   // MachineBasicBlock CFG, which is awkward.
   12337             : 
   12338             :   // Use SimplifySetCC to simplify SETCC's.
   12339             :   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
   12340        6356 :                                CondLHS, CondRHS, CC->get(), SDLoc(N),
   12341       19068 :                                false);
   12342        6356 :   if (Simp.getNode()) AddToWorklist(Simp.getNode());
   12343             : 
   12344             :   // fold to a simpler setcc
   12345        6356 :   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
   12346         958 :     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
   12347             :                        N->getOperand(0), Simp.getOperand(2),
   12348             :                        Simp.getOperand(0), Simp.getOperand(1),
   12349         961 :                        N->getOperand(4));
   12350             : 
   12351        5877 :   return SDValue();
   12352             : }
   12353             : 
   12354             : /// Return true if 'Use' is a load or a store that uses N as its base pointer
   12355             : /// and that N may be folded in the load / store addressing mode.
   12356       12898 : static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
   12357             :                                     SelectionDAG &DAG,
   12358             :                                     const TargetLowering &TLI) {
   12359       12898 :   EVT VT;
   12360             :   unsigned AS;
   12361             : 
   12362             :   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(Use)) {
   12363        4644 :     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
   12364             :       return false;
   12365        4635 :     VT = LD->getMemoryVT();
   12366             :     AS = LD->getAddressSpace();
   12367             :   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(Use)) {
   12368        6962 :     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
   12369             :       return false;
   12370        6824 :     VT = ST->getMemoryVT();
   12371             :     AS = ST->getAddressSpace();
   12372             :   } else
   12373             :     return false;
   12374             : 
   12375       11459 :   TargetLowering::AddrMode AM;
   12376       22918 :   if (N->getOpcode() == ISD::ADD) {
   12377       11459 :     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
   12378             :     if (Offset)
   12379             :       // [reg +/- imm]
   12380       22858 :       AM.BaseOffs = Offset->getSExtValue();
   12381             :     else
   12382             :       // [reg +/- reg]
   12383          30 :       AM.Scale = 1;
   12384           0 :   } else if (N->getOpcode() == ISD::SUB) {
   12385           0 :     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
   12386             :     if (Offset)
   12387             :       // [reg +/- imm]
   12388           0 :       AM.BaseOffs = -Offset->getSExtValue();
   12389             :     else
   12390             :       // [reg +/- reg]
   12391           0 :       AM.Scale = 1;
   12392             :   } else
   12393             :     return false;
   12394             : 
   12395       11459 :   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
   12396       22918 :                                    VT.getTypeForEVT(*DAG.getContext()), AS);
   12397             : }
   12398             : 
   12399             : /// Try turning a load/store into a pre-indexed load/store when the base
   12400             : /// pointer is an add or subtract and it has other uses besides the load/store.
   12401             : /// After the transformation, the new indexed load/store has effectively folded
   12402             : /// the add/subtract in and all of its other uses are redirected to the
   12403             : /// new load/store.
   12404    13578213 : bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
   12405    13578213 :   if (Level < AfterLegalizeDAG)
   12406             :     return false;
   12407             : 
   12408             :   bool isLoad = true;
   12409     5529896 :   SDValue Ptr;
   12410             :   EVT VT;
   12411             :   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
   12412     2592171 :     if (LD->isIndexed())
   12413             :       return false;
   12414             :     VT = LD->getMemoryVT();
   12415     2591763 :     if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
   12416             :         !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
   12417             :       return false;
   12418       19138 :     Ptr = LD->getBasePtr();
   12419             :   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
   12420     2937725 :     if (ST->isIndexed())
   12421             :       return false;
   12422             :     VT = ST->getMemoryVT();
   12423     2937488 :     if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
   12424             :         !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
   12425             :       return false;
   12426       16827 :     Ptr = ST->getBasePtr();
   12427             :     isLoad = false;
   12428             :   } else {
   12429             :     return false;
   12430             :   }
   12431             : 
   12432             :   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
   12433             :   // out.  There is no reason to make this a preinc/predec.
   12434       35965 :   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
   12435             :       Ptr.getNode()->hasOneUse())
   12436             :     return false;
   12437             : 
   12438             :   // Ask the target to do addressing mode selection.
   12439        4878 :   SDValue BasePtr;
   12440        4878 :   SDValue Offset;
   12441        4878 :   ISD::MemIndexedMode AM = ISD::UNINDEXED;
   12442        4878 :   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
   12443             :     return false;
   12444             : 
   12445             :   // Backends without true r+i pre-indexed forms may need to pass a
   12446             :   // constant base with a variable offset so that constant coercion
   12447             :   // will work with the patterns in canonical form.
   12448             :   bool Swapped = false;
   12449             :   if (isa<ConstantSDNode>(BasePtr)) {
   12450             :     std::swap(BasePtr, Offset);
   12451             :     Swapped = true;
   12452             :   }
   12453             : 
   12454             :   // Don't create a indexed load / store with zero offset.
   12455        4797 :   if (isNullConstant(Offset))
   12456             :     return false;
   12457             : 
   12458             :   // Try turning it into a pre-indexed load / store except when:
   12459             :   // 1) The new base ptr is a frame index.
   12460             :   // 2) If N is a store and the new base ptr is either the same as or is a
   12461             :   //    predecessor of the value being stored.
   12462             :   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
   12463             :   //    that would create a cycle.
   12464             :   // 4) All uses are load / store ops that use it as old base ptr.
   12465             : 
   12466             :   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
   12467             :   // (plus the implicit offset) to a register to preinc anyway.
   12468        3849 :   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
   12469             :     return false;
   12470             : 
   12471             :   // Check #2.
   12472        3739 :   if (!isLoad) {
   12473        1793 :     SDValue Val = cast<StoreSDNode>(N)->getValue();
   12474        1788 :     if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
   12475             :       return false;
   12476             :   }
   12477             : 
   12478             :   // Caches for hasPredecessorHelper.
   12479             :   SmallPtrSet<const SDNode *, 32> Visited;
   12480             :   SmallVector<const SDNode *, 16> Worklist;
   12481        2163 :   Worklist.push_back(N);
   12482             : 
   12483             :   // If the offset is a constant, there may be other adds of constants that
   12484             :   // can be folded with this one. We should do this to avoid having to keep
   12485             :   // a copy of the original base pointer.
   12486             :   SmallVector<SDNode *, 16> OtherUses;
   12487             :   if (isa<ConstantSDNode>(Offset))
   12488        2017 :     for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
   12489             :                               UE = BasePtr.getNode()->use_end();
   12490        3882 :          UI != UE; ++UI) {
   12491             :       SDUse &Use = UI.getUse();
   12492             :       // Skip the use that is Ptr and uses of other results from BasePtr's
   12493             :       // node (important for nodes that return multiple results).
   12494        3394 :       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
   12495             :         continue;
   12496             : 
   12497        2779 :       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
   12498             :         continue;
   12499             : 
   12500        4632 :       if (Use.getUser()->getOpcode() != ISD::ADD &&
   12501             :           Use.getUser()->getOpcode() != ISD::SUB) {
   12502             :         OtherUses.clear();
   12503             :         break;
   12504             :       }
   12505             : 
   12506        1574 :       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
   12507             :       if (!isa<ConstantSDNode>(Op1)) {
   12508             :         OtherUses.clear();
   12509             :         break;
   12510             :       }
   12511             : 
   12512             :       // FIXME: In some cases, we can be smarter about this.
   12513         787 :       if (Op1.getValueType() != Offset.getValueType()) {
   12514             :         OtherUses.clear();
   12515             :         break;
   12516             :       }
   12517             : 
   12518         787 :       OtherUses.push_back(Use.getUser());
   12519             :     }
   12520             : 
   12521        2163 :   if (Swapped)
   12522             :     std::swap(BasePtr, Offset);
   12523             : 
   12524             :   // Now check for #3 and #4.
   12525             :   bool RealUse = false;
   12526             : 
   12527        6597 :   for (SDNode *Use : Ptr.getNode()->uses()) {
   12528        4611 :     if (Use == N)
   12529             :       continue;
   12530        2493 :     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
   12531             :       return false;
   12532             : 
   12533             :     // If Ptr may be folded in addressing mode of other use, then it's
   12534             :     // not profitable to do this transformation.
   12535        2316 :     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
   12536             :       RealUse = true;
   12537             :   }
   12538             : 
   12539        1986 :   if (!RealUse)
   12540             :     return false;
   12541             : 
   12542             :   SDValue Result;
   12543         421 :   if (isLoad)
   12544         308 :     Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
   12545         308 :                                 BasePtr, Offset, AM);
   12546             :   else
   12547         113 :     Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
   12548         113 :                                  BasePtr, Offset, AM);
   12549             :   ++PreIndexedNodes;
   12550             :   ++NodesCombined;
   12551             :   LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
   12552             :              Result.getNode()->dump(&DAG); dbgs() << '\n');
   12553             :   WorklistRemover DeadNodes(*this);
   12554         421 :   if (isLoad) {
   12555         616 :     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
   12556         616 :     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
   12557             :   } else {
   12558         226 :     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
   12559             :   }
   12560             : 
   12561             :   // Finally, since the node is now dead, remove it from the graph.
   12562         421 :   deleteAndRecombine(N);
   12563             : 
   12564         421 :   if (Swapped)
   12565             :     std::swap(BasePtr, Offset);
   12566             : 
   12567             :   // Replace other uses of BasePtr that can be updated to use Ptr
   12568         625 :   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
   12569             :     unsigned OffsetIdx = 1;
   12570         408 :     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
   12571             :       OffsetIdx = 0;
   12572             :     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
   12573             :            BasePtr.getNode() && "Expected BasePtr operand");
   12574             : 
   12575             :     // We need to replace ptr0 in the following expression:
   12576             :     //   x0 * offset0 + y0 * ptr0 = t0
   12577             :     // knowing that
   12578             :     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
   12579             :     //
   12580             :     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
   12581             :     // indexed load/store and the expression that needs to be re-written.
   12582             :     //
   12583             :     // Therefore, we have:
   12584             :     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
   12585             : 
   12586             :     ConstantSDNode *CN =
   12587             :       cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
   12588             :     int X0, X1, Y0, Y1;
   12589         204 :     const APInt &Offset0 = CN->getAPIntValue();
   12590         204 :     APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
   12591             : 
   12592         408 :     X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
   12593         204 :     Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
   12594         204 :     X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
   12595         204 :     Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
   12596             : 
   12597         204 :     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
   12598             : 
   12599             :     APInt CNV = Offset0;
   12600         204 :     if (X0 < 0) CNV = -CNV;
   12601         204 :     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
   12602         204 :     else CNV = CNV - Offset1;
   12603             : 
   12604         204 :     SDLoc DL(OtherUses[i]);
   12605             : 
   12606             :     // We can now generate the new expression.
   12607         408 :     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
   12608         204 :     SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
   12609             : 
   12610         204 :     SDValue NewUse = DAG.getNode(Opcode,
   12611             :                                  DL,
   12612         408 :                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
   12613         612 :     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
   12614         204 :     deleteAndRecombine(OtherUses[i]);
   12615             :   }
   12616             : 
   12617             :   // Replace the uses of Ptr with uses of the updated base value.
   12618         534 :   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
   12619         421 :   deleteAndRecombine(Ptr.getNode());
   12620         421 :   AddToWorklist(Result.getNode());
   12621             : 
   12622             :   return true;
   12623             : }
   12624             : 
   12625             : /// Try to combine a load/store with a add/sub of the base pointer node into a
   12626             : /// post-indexed load/store. The transformation folded the add/subtract into the
   12627             : /// new indexed load/store effectively and all of its uses are redirected to the
   12628             : /// new load/store.
   12629    13577792 : bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
   12630    13577792 :   if (Level < AfterLegalizeDAG)
   12631             :     return false;
   12632             : 
   12633             :   bool isLoad = true;
   12634             :   SDValue Ptr;
   12635             :   EVT VT;
   12636             :   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
   12637     2591863 :     if (LD->isIndexed())
   12638             :       return false;
   12639             :     VT = LD->getMemoryVT();
   12640     2591455 :     if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
   12641             :         !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
   12642             :       return false;
   12643       19477 :     Ptr = LD->getBasePtr();
   12644             :   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
   12645     2937612 :     if (ST->isIndexed())
   12646             :       return false;
   12647             :     VT = ST->getMemoryVT();
   12648     2937375 :     if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
   12649             :         !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
   12650             :       return false;
   12651       15678 :     Ptr = ST->getBasePtr();
   12652             :     isLoad = false;
   12653             :   } else {
   12654             :     return false;
   12655             :   }
   12656             : 
   12657             :   if (Ptr.getNode()->hasOneUse())
   12658             :     return false;
   12659             : 
   12660      232311 :   for (SDNode *Op : Ptr.getNode()->uses()) {
   12661      218474 :     if (Op == N ||
   12662      204208 :         (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
   12663      217108 :       continue;
   12664             : 
   12665        9557 :     SDValue BasePtr;
   12666        9557 :     SDValue Offset;
   12667        9557 :     ISD::MemIndexedMode AM = ISD::UNINDEXED;
   12668        9557 :     if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
   12669             :       // Don't create a indexed load / store with zero offset.
   12670        8711 :       if (isNullConstant(Offset))
   12671        8191 :         continue;
   12672             : 
   12673             :       // Try turning it into a post-indexed load / store except when
   12674             :       // 1) All uses are load / store ops that use it as base ptr (and
   12675             :       //    it may be folded as addressing mmode).
   12676             :       // 2) Op must be independent of N, i.e. Op is neither a predecessor
   12677             :       //    nor a successor of N. Otherwise, if Op is folded that would
   12678             :       //    create a cycle.
   12679             : 
   12680        7106 :       if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
   12681             :         continue;
   12682             : 
   12683             :       // Check for #1.
   12684             :       bool TryNext = false;
   12685       12912 :       for (SDNode *Use : BasePtr.getNode()->uses()) {
   12686       12392 :         if (Use == Ptr.getNode())
   12687             :           continue;
   12688             : 
   12689             :         // If all the uses are load / store addresses, then don't do the
   12690             :         // transformation.
   12691       24784 :         if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
   12692             :           bool RealUse = false;
   12693       18061 :           for (SDNode *UseUse : Use->uses()) {
   12694       10582 :             if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
   12695             :               RealUse = true;
   12696             :           }
   12697             : 
   12698        7479 :           if (!RealUse) {
   12699             :             TryNext = true;
   12700             :             break;
   12701             :           }
   12702             :         }
   12703             :       }
   12704             : 
   12705        7106 :       if (TryNext)
   12706             :         continue;
   12707             : 
   12708             :       // Check for #2.
   12709             :       SmallPtrSet<const SDNode *, 32> Visited;
   12710             :       SmallVector<const SDNode *, 8> Worklist;
   12711             :       // Ptr is predecessor to both N and Op.
   12712         520 :       Visited.insert(Ptr.getNode());
   12713         520 :       Worklist.push_back(N);
   12714         520 :       Worklist.push_back(Op);
   12715        1035 :       if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
   12716         515 :           !SDNode::hasPredecessorHelper(Op, Visited, Worklist)) {
   12717             :         SDValue Result = isLoad
   12718         452 :           ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
   12719         256 :                                BasePtr, Offset, AM)
   12720         452 :           : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
   12721         708 :                                 BasePtr, Offset, AM);
   12722             :         ++PostIndexedNodes;
   12723             :         ++NodesCombined;
   12724             :         LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
   12725             :                    dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
   12726             :                    dbgs() << '\n');
   12727             :         WorklistRemover DeadNodes(*this);
   12728         452 :         if (isLoad) {
   12729         510 :           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
   12730         510 :           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
   12731             :         } else {
   12732         394 :           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
   12733             :         }
   12734             : 
   12735             :         // Finally, since the node is now dead, remove it from the graph.
   12736         452 :         deleteAndRecombine(N);
   12737             : 
   12738             :         // Replace the uses of Use with uses of the updated base value.
   12739         649 :         DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
   12740             :                                       Result.getValue(isLoad ? 1 : 0));
   12741         452 :         deleteAndRecombine(Op);
   12742             :         return true;
   12743             :       }
   12744             :     }
   12745             :   }
   12746             : 
   12747             :   return false;
   12748             : }
   12749             : 
   12750             : /// Return the base-pointer arithmetic from an indexed \p LD.
   12751           0 : SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
   12752             :   ISD::MemIndexedMode AM = LD->getAddressingMode();
   12753             :   assert(AM != ISD::UNINDEXED);
   12754           0 :   SDValue BP = LD->getOperand(1);
   12755           0 :   SDValue Inc = LD->getOperand(2);
   12756             : 
   12757             :   // Some backends use TargetConstants for load offsets, but don't expect
   12758             :   // TargetConstants in general ADD nodes. We can convert these constants into
   12759             :   // regular Constants (if the constant is not opaque).
   12760             :   assert((Inc.getOpcode() != ISD::TargetConstant ||
   12761             :           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
   12762             :          "Cannot split out indexing using opaque target constants");
   12763           0 :   if (Inc.getOpcode() == ISD::TargetConstant) {
   12764             :     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
   12765           0 :     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
   12766           0 :                           ConstInc->getValueType(0));
   12767             :   }
   12768             : 
   12769             :   unsigned Opc =
   12770           0 :       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
   12771           0 :   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
   12772             : }
   12773             : 
   12774        8856 : static inline int numVectorEltsOrZero(EVT T) {
   12775       11247 :   return T.isVector() ? T.getVectorNumElements() : 0;
   12776             : }
   12777             : 
   12778        8866 : bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
   12779        8866 :   Val = ST->getValue();
   12780        8866 :   EVT STType = Val.getValueType();
   12781        8866 :   EVT STMemType = ST->getMemoryVT();
   12782           0 :   if (STType == STMemType)
   12783             :     return true;
   12784        1620 :   if (isTypeLegal(STMemType))
   12785             :     return false; // fail.
   12786        1594 :   if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
   12787             :       TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
   12788           0 :     Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
   12789           0 :     return true;
   12790             :   }
   12791        3188 :   if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
   12792        1594 :       STType.isInteger() && STMemType.isInteger()) {
   12793        1594 :     Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
   12794        1594 :     return true;
   12795             :   }
   12796           0 :   if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
   12797           0 :     Val = DAG.getBitcast(STMemType, Val);
   12798           0 :     return true;
   12799             :   }
   12800             :   return false; // fail.
   12801             : }
   12802             : 
   12803           0 : bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
   12804           0 :   EVT LDMemType = LD->getMemoryVT();
   12805           0 :   EVT LDType = LD->getValueType(0);
   12806             :   assert(Val.getValueType() == LDMemType &&
   12807             :          "Attempting to extend value of non-matching type");
   12808           0 :   if (LDType == LDMemType)
   12809           0 :     return true;
   12810           0 :   if (LDMemType.isInteger() && LDType.isInteger()) {
   12811           0 :     switch (LD->getExtensionType()) {
   12812           0 :     case ISD::NON_EXTLOAD:
   12813           0 :       Val = DAG.getBitcast(LDType, Val);
   12814           0 :       return true;
   12815           0 :     case ISD::EXTLOAD:
   12816           0 :       Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
   12817           0 :       return true;
   12818           0 :     case ISD::SEXTLOAD:
   12819           0 :       Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
   12820           0 :       return true;
   12821           0 :     case ISD::ZEXTLOAD:
   12822           0 :       Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
   12823           0 :       return true;
   12824             :     }
   12825             :   }
   12826             :   return false;
   12827             : }
   12828             : 
   12829     6353867 : SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
   12830     6353867 :   if (OptLevel == CodeGenOpt::None || LD->isVolatile())
   12831     3885058 :     return SDValue();
   12832     2468809 :   SDValue Chain = LD->getOperand(0);
   12833             :   StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
   12834      335918 :   if (!ST || ST->isVolatile())
   12835     2135182 :     return SDValue();
   12836             : 
   12837      333627 :   EVT LDType = LD->getValueType(0);
   12838      333627 :   EVT LDMemType = LD->getMemoryVT();
   12839      333627 :   EVT STMemType = ST->getMemoryVT();
   12840      333627 :   EVT STType = ST->getValue().getValueType();
   12841             : 
   12842      333627 :   BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
   12843      333627 :   BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
   12844             :   int64_t Offset;
   12845             : 
   12846             :   bool STCoversLD =
   12847      391529 :       BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset) && (Offset >= 0) &&
   12848      390355 :       (Offset * 8 <= LDMemType.getSizeInBits()) &&
   12849       20922 :       (Offset * 8 + LDMemType.getSizeInBits() <= STMemType.getSizeInBits());
   12850             : 
   12851             :   if (!STCoversLD)
   12852      314277 :     return SDValue();
   12853             : 
   12854             :   // Normalize for Endianness.
   12855       19350 :   if (DAG.getDataLayout().isBigEndian())
   12856        2466 :     Offset =
   12857        2466 :         (STMemType.getSizeInBits() - LDMemType.getSizeInBits()) / 8 - Offset;
   12858             : 
   12859             :   // Memory as copy space (potentially masked).
   12860       26235 :   if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
   12861             :     // Simple case: Direct non-truncating forwarding
   12862        4132 :     if (LDType.getSizeInBits() == LDMemType.getSizeInBits())
   12863        4086 :       return CombineTo(LD, ST->getValue(), Chain);
   12864             :     // Can we model the truncate and extension with an and mask?
   12865         138 :     if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
   12866          92 :         !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
   12867             :       // Mask to size of LDMemType
   12868             :       auto Mask =
   12869          92 :           DAG.getConstant(APInt::getLowBitsSet(STType.getSizeInBits(),
   12870             :                                                STMemType.getSizeInBits()),
   12871          46 :                           SDLoc(ST), STType);
   12872          92 :       auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
   12873             :       return CombineTo(LD, Val, Chain);
   12874             :     }
   12875             :   }
   12876             : 
   12877             :   // TODO: Deal with nonzero offset.
   12878       30436 :   if (LD->getBasePtr().isUndef() || Offset != 0)
   12879        6352 :     return SDValue();
   12880             :   // Model necessary truncations / extenstions.
   12881        8866 :   SDValue Val;
   12882             :   // Truncate Value To Stored Memory Size.
   12883             :   do {
   12884        8866 :     if (!getTruncatedStoreValue(ST, Val))
   12885             :       continue;
   12886        8840 :     if (!isTypeLegal(LDMemType))
   12887             :       continue;
   12888        3114 :     if (STMemType != LDMemType) {
   12889        4722 :       if (numVectorEltsOrZero(STMemType) == numVectorEltsOrZero(LDMemType) &&
   12890        2834 :           STMemType.isInteger() && LDMemType.isInteger())
   12891          69 :         Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
   12892             :       else
   12893             :         continue;
   12894             :     }
   12895         347 :     if (!extendLoadedValueToExtension(LD, Val))
   12896             :       continue;
   12897         333 :     return CombineTo(LD, Val, Chain);
   12898             :   } while (false);
   12899             : 
   12900             :   // On failure, cleanup dead nodes we may have created.
   12901        8533 :   if (Val->use_empty())
   12902        1594 :     deleteAndRecombine(Val.getNode());
   12903        8533 :   return SDValue();
   12904             : }
   12905             : 
   12906     6474249 : SDValue DAGCombiner::visitLOAD(SDNode *N) {
   12907             :   LoadSDNode *LD  = cast<LoadSDNode>(N);
   12908     6474249 :   SDValue Chain = LD->getChain();
   12909     6474249 :   SDValue Ptr   = LD->getBasePtr();
   12910             : 
   12911             :   // If load is not volatile and there are no uses of the loaded value (and
   12912             :   // the updated indexed value in case of indexed loads), change uses of the
   12913             :   // chain value into uses of the chain input (i.e. delete the dead load).
   12914     6474249 :   if (!LD->isVolatile()) {
   12915     6432490 :     if (N->getValueType(1) == MVT::Other) {
   12916             :       // Unindexed loads.
   12917     6432083 :       if (!N->hasAnyUseOfValue(0)) {
   12918             :         // It's not safe to use the two value CombineTo variant here. e.g.
   12919             :         // v1, chain2 = load chain1, loc
   12920             :         // v2, chain3 = load chain2, loc
   12921             :         // v3         = add v2, c
   12922             :         // Now we replace use of chain2 with chain1.  This makes the second load
   12923             :         // isomorphic to the one we are deleting, and thus makes this load live.
   12924             :         LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
   12925             :                    dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
   12926             :                    dbgs() << "\n");
   12927             :         WorklistRemover DeadNodes(*this);
   12928      240758 :         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
   12929             :         AddUsersToWorklist(Chain.getNode());
   12930      120379 :         if (N->use_empty())
   12931      120365 :           deleteAndRecombine(N);
   12932             : 
   12933      120379 :         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   12934             :       }
   12935             :     } else {
   12936             :       // Indexed loads.
   12937             :       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
   12938             : 
   12939             :       // If this load has an opaque TargetConstant offset, then we cannot split
   12940             :       // the indexing into an add/sub directly (that TargetConstant may not be
   12941             :       // valid for a different type of node, and we cannot convert an opaque
   12942             :       // target constant into a regular constant).
   12943         814 :       bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
   12944         173 :                        cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
   12945             : 
   12946         407 :       if (!N->hasAnyUseOfValue(0) &&
   12947           3 :           ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
   12948           6 :         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
   12949           3 :         SDValue Index;
   12950           3 :         if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
   12951           3 :           Index = SplitIndexingFromLoad(LD);
   12952             :           // Try to fold the base pointer arithmetic into subsequent loads and
   12953             :           // stores.
   12954             :           AddUsersToWorklist(N);
   12955             :         } else
   12956           0 :           Index = DAG.getUNDEF(N->getValueType(1));
   12957             :         LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
   12958             :                    dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
   12959             :                    dbgs() << " and 2 other values\n");
   12960             :         WorklistRemover DeadNodes(*this);
   12961           6 :         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
   12962           6 :         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
   12963           6 :         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
   12964           3 :         deleteAndRecombine(N);
   12965           3 :         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   12966             :       }
   12967             :     }
   12968             :   }
   12969             : 
   12970             :   // If this load is directly stored, replace the load value with the stored
   12971             :   // value.
   12972     6353867 :   if (auto V = ForwardStoreValueToDirectLoad(LD))
   12973        4465 :     return V;
   12974             : 
   12975             :   // Try to infer better alignment information than the load already has.
   12976     6349402 :   if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
   12977     2504543 :     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
   12978     1207066 :       if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) {
   12979       27799 :         SDValue NewLoad = DAG.getExtLoad(
   12980       27799 :             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
   12981             :             LD->getPointerInfo(), LD->getMemoryVT(), Align,
   12982       92763 :             LD->getMemOperand()->getFlags(), LD->getAAInfo());
   12983             :         // NewLoad will always be N as we are only refining the alignment
   12984             :         assert(NewLoad.getNode() == N);
   12985             :         (void)NewLoad;
   12986             :       }
   12987             :     }
   12988             :   }
   12989             : 
   12990     6349402 :   if (LD->isUnindexed()) {
   12991             :     // Walk up chain skipping non-aliasing memory nodes.
   12992     6348994 :     SDValue BetterChain = FindBetterChain(N, Chain);
   12993             : 
   12994             :     // If there is a better chain.
   12995             :     if (Chain != BetterChain) {
   12996             :       SDValue ReplLoad;
   12997             : 
   12998             :       // Replace the chain to void dependency.
   12999      194523 :       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
   13000      362800 :         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
   13001      535459 :                                BetterChain, Ptr, LD->getMemOperand());
   13002             :       } else {
   13003        4929 :         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
   13004             :                                   LD->getValueType(0),
   13005             :                                   BetterChain, Ptr, LD->getMemoryVT(),
   13006        4929 :                                   LD->getMemOperand());
   13007             :       }
   13008             : 
   13009             :       // Create token factor to keep old chain connected.
   13010      194523 :       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
   13011      389046 :                                   MVT::Other, Chain, ReplLoad.getValue(1));
   13012             : 
   13013             :       // Replace uses with load result and token factor
   13014             :       return CombineTo(N, ReplLoad.getValue(0), Token);
   13015             :     }
   13016             :   }
   13017             : 
   13018             :   // Try transforming N to an indexed load.
   13019     6154879 :   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
   13020         563 :     return SDValue(N, 0);
   13021             : 
   13022             :   // Try to slice up N to more direct loads if the slices are mapped to
   13023             :   // different register banks or pairing can take place.
   13024     6154316 :   if (SliceUpLoad(N))
   13025           5 :     return SDValue(N, 0);
   13026             : 
   13027     6154311 :   return SDValue();
   13028             : }
   13029             : 
   13030             : namespace {
   13031             : 
   13032             : /// Helper structure used to slice a load in smaller loads.
   13033             : /// Basically a slice is obtained from the following sequence:
   13034             : /// Origin = load Ty1, Base
   13035             : /// Shift = srl Ty1 Origin, CstTy Amount
   13036             : /// Inst = trunc Shift to Ty2
   13037             : ///
   13038             : /// Then, it will be rewritten into:
   13039             : /// Slice = load SliceTy, Base + SliceOffset
   13040             : /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
   13041             : ///
   13042             : /// SliceTy is deduced from the number of bits that are actually used to
   13043             : /// build Inst.
   13044             : struct LoadedSlice {
   13045             :   /// Helper structure used to compute the cost of a slice.
   13046             :   struct Cost {
   13047             :     /// Are we optimizing for code size.
   13048             :     bool ForCodeSize;
   13049             : 
   13050             :     /// Various cost.
   13051             :     unsigned Loads = 0;
   13052             :     unsigned Truncates = 0;
   13053             :     unsigned CrossRegisterBanksCopies = 0;
   13054             :     unsigned ZExts = 0;
   13055             :     unsigned Shift = 0;
   13056             : 
   13057          20 :     Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {}
   13058             : 
   13059             :     /// Get the cost of one isolated slice.
   13060          40 :     Cost(const LoadedSlice &LS, bool ForCodeSize = false)
   13061          40 :         : ForCodeSize(ForCodeSize), Loads(1) {
   13062          40 :       EVT TruncType = LS.Inst->getValueType(0);
   13063          40 :       EVT LoadedType = LS.getLoadedType();
   13064          40 :       if (TruncType != LoadedType &&
   13065           0 :           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
   13066           0 :         ZExts = 1;
   13067          40 :     }
   13068             : 
   13069             :     /// Account for slicing gain in the current cost.
   13070             :     /// Slicing provide a few gains like removing a shift or a
   13071             :     /// truncate. This method allows to grow the cost of the original
   13072             :     /// load with the gain from this slice.
   13073          40 :     void addSliceGain(const LoadedSlice &LS) {
   13074             :       // Each slice saves a truncate.
   13075          40 :       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
   13076          80 :       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
   13077          40 :                               LS.Inst->getValueType(0)))
   13078          12 :         ++Truncates;
   13079             :       // If there is a shift amount, this slice gets rid of it.
   13080          40 :       if (LS.Shift)
   13081          20 :         ++Shift;
   13082             :       // If this slice can merge a cross register bank copy, account for it.
   13083          40 :       if (LS.canMergeExpensiveCrossRegisterBankCopy())
   13084           4 :         ++CrossRegisterBanksCopies;
   13085          40 :     }
   13086             : 
   13087             :     Cost &operator+=(const Cost &RHS) {
   13088          40 :       Loads += RHS.Loads;
   13089          40 :       Truncates += RHS.Truncates;
   13090          40 :       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
   13091          40 :       ZExts += RHS.ZExts;
   13092          40 :       Shift += RHS.Shift;
   13093             :       return *this;
   13094             :     }
   13095             : 
   13096             :     bool operator==(const Cost &RHS) const {
   13097             :       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
   13098             :              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
   13099             :              ZExts == RHS.ZExts && Shift == RHS.Shift;
   13100             :     }
   13101             : 
   13102             :     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
   13103             : 
   13104          20 :     bool operator<(const Cost &RHS) const {
   13105             :       // Assume cross register banks copies are as expensive as loads.
   13106             :       // FIXME: Do we want some more target hooks?
   13107          20 :       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
   13108          20 :       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
   13109             :       // Unless we are optimizing for code size, consider the
   13110             :       // expensive operation first.
   13111          20 :       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
   13112          19 :         return ExpensiveOpsLHS < ExpensiveOpsRHS;
   13113           1 :       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
   13114           1 :              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
   13115             :     }
   13116             : 
   13117          20 :     bool operator>(const Cost &RHS) const { return RHS < *this; }
   13118             : 
   13119             :     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
   13120             : 
   13121             :     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
   13122             :   };
   13123             : 
   13124             :   // The last instruction that represent the slice. This should be a
   13125             :   // truncate instruction.
   13126             :   SDNode *Inst;
   13127             : 
   13128             :   // The original load instruction.
   13129             :   LoadSDNode *Origin;
   13130             : 
   13131             :   // The right shift amount in bits from the original load.
   13132             :   unsigned Shift;
   13133             : 
   13134             :   // The DAG from which Origin came from.
   13135             :   // This is used to get some contextual information about legal types, etc.
   13136             :   SelectionDAG *DAG;
   13137             : 
   13138             :   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
   13139             :               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
   13140         991 :       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
   13141             : 
   13142             :   /// Get the bits used in a chunk of bits \p BitWidth large.
   13143             :   /// \return Result is \p BitWidth and has used bits set to 1 and
   13144             :   ///         not used bits set to 0.
   13145        2143 :   APInt getUsedBits() const {
   13146             :     // Reproduce the trunc(lshr) sequence:
   13147             :     // - Start from the truncated value.
   13148             :     // - Zero extend to the desired bit width.
   13149             :     // - Shift left.
   13150             :     assert(Origin && "No original load to compare against.");
   13151        2143 :     unsigned BitWidth = Origin->getValueSizeInBits(0);
   13152             :     assert(Inst && "This slice is not bound to an instruction");
   13153             :     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
   13154             :            "Extracted slice is bigger than the whole type!");
   13155        2143 :     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
   13156        2143 :     UsedBits.setAllBits();
   13157        2143 :     UsedBits = UsedBits.zext(BitWidth);
   13158        2143 :     UsedBits <<= Shift;
   13159        2143 :     return UsedBits;
   13160             :   }
   13161             : 
   13162             :   /// Get the size of the slice to be loaded in bytes.
   13163        1148 :   unsigned getLoadedSize() const {
   13164        1148 :     unsigned SliceSize = getUsedBits().countPopulation();
   13165             :     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
   13166        1148 :     return SliceSize / 8;
   13167             :   }
   13168             : 
   13169             :   /// Get the type that will be loaded for this slice.
   13170             :   /// Note: This may not be the final type for the slice.
   13171        1068 :   EVT getLoadedType() const {
   13172             :     assert(DAG && "Missing context");
   13173        1068 :     LLVMContext &Ctxt = *DAG->getContext();
   13174        1068 :     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
   13175             :   }
   13176             : 
   13177             :   /// Get the alignment of the load used for this slice.
   13178          16 :   unsigned getAlignment() const {
   13179          16 :     unsigned Alignment = Origin->getAlignment();
   13180          16 :     unsigned Offset = getOffsetFromBase();
   13181          16 :     if (Offset != 0)
   13182          14 :       Alignment = MinAlign(Alignment, Alignment + Offset);
   13183          16 :     return Alignment;
   13184             :   }
   13185             : 
   13186             :   /// Check if this slice can be rewritten with legal operations.
   13187         974 :   bool isLegal() const {
   13188             :     // An invalid slice is not legal.
   13189         974 :     if (!Origin || !Inst || !DAG)
   13190             :       return false;
   13191             : 
   13192             :     // Offsets are for indexed load only, we do not handle that.
   13193        1948 :     if (!Origin->getOffset().isUndef())
   13194             :       return false;
   13195             : 
   13196         974 :     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
   13197             : 
   13198             :     // Check that the type is legal.
   13199         974 :     EVT SliceType = getLoadedType();
   13200             :     if (!TLI.isTypeLegal(SliceType))
   13201             :       return false;
   13202             : 
   13203             :     // Check that the load is legal for this type.
   13204             :     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
   13205         670 :       return false;
   13206             : 
   13207             :     // Check that the offset can be computed.
   13208             :     // 1. Check its type.
   13209         570 :     EVT PtrType = Origin->getBasePtr().getValueType();
   13210         285 :     if (PtrType == MVT::Untyped || PtrType.isExtended())
   13211           0 :       return false;
   13212             : 
   13213             :     // 2. Check that it fits in the immediate.
   13214         285 :     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
   13215             :       return false;
   13216             : 
   13217             :     // 3. Check that the computation is legal.
   13218             :     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
   13219           0 :       return false;
   13220             : 
   13221             :     // Check that the zext is legal if it needs one.
   13222         285 :     EVT TruncateType = Inst->getValueType(0);
   13223             :     if (TruncateType != SliceType &&
   13224             :         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
   13225           0 :       return false;
   13226             : 
   13227             :     return true;
   13228             :   }
   13229             : 
   13230             :   /// Get the offset in bytes of this slice in the original chunk of
   13231             :   /// bits.
   13232             :   /// \pre DAG != nullptr.
   13233         377 :   uint64_t getOffsetFromBase() const {
   13234             :     assert(DAG && "Missing context.");
   13235         377 :     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
   13236             :     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
   13237         377 :     uint64_t Offset = Shift / 8;
   13238         377 :     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
   13239             :     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
   13240             :            "The size of the original loaded type is not a multiple of a"
   13241             :            " byte.");
   13242             :     // If Offset is bigger than TySizeInBytes, it means we are loading all
   13243             :     // zeros. This should have been optimized before in the process.
   13244             :     assert(TySizeInBytes > Offset &&
   13245             :            "Invalid shift amount for given loaded size");
   13246         377 :     if (IsBigEndian)
   13247          80 :       Offset = TySizeInBytes - Offset - getLoadedSize();
   13248         377 :     return Offset;
   13249             :   }
   13250             : 
   13251             :   /// Generate the sequence of instructions to load the slice
   13252             :   /// represented by this object and redirect the uses of this slice to
   13253             :   /// this new sequence of instructions.
   13254             :   /// \pre this->Inst && this->Origin are valid Instructions and this
   13255             :   /// object passed the legal check: LoadedSlice::isLegal returned true.
   13256             :   /// \return The last instruction of the sequence used to load the slice.
   13257          10 :   SDValue loadSlice() const {
   13258             :     assert(Inst && Origin && "Unable to replace a non-existing slice.");
   13259          10 :     const SDValue &OldBaseAddr = Origin->getBasePtr();
   13260          10 :     SDValue BaseAddr = OldBaseAddr;
   13261             :     // Get the offset in that chunk of bytes w.r.t. the endianness.
   13262          10 :     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
   13263             :     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
   13264          10 :     if (Offset) {
   13265             :       // BaseAddr = BaseAddr + Offset.
   13266           5 :       EVT ArithType = BaseAddr.getValueType();
   13267           5 :       SDLoc DL(Origin);
   13268           5 :       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
   13269           5 :                               DAG->getConstant(Offset, DL, ArithType));
   13270             :     }
   13271             : 
   13272             :     // Create the type of the loaded slice according to its size.
   13273          10 :     EVT SliceType = getLoadedType();
   13274             : 
   13275             :     // Create the load for the slice.
   13276             :     SDValue LastInst =
   13277          20 :         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
   13278          10 :                      Origin->getPointerInfo().getWithOffset(Offset),
   13279          20 :                      getAlignment(), Origin->getMemOperand()->getFlags());
   13280             :     // If the final type is not the same as the loaded type, this means that
   13281             :     // we have to pad with zero. Create a zero extend for that.
   13282          10 :     EVT FinalType = Inst->getValueType(0);
   13283          10 :     if (SliceType != FinalType)
   13284           1 :       LastInst =
   13285           1 :           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
   13286          10 :     return LastInst;
   13287             :   }
   13288             : 
   13289             :   /// Check if this slice can be merged with an expensive cross register
   13290             :   /// bank copy. E.g.,
   13291             :   /// i = load i32
   13292             :   /// f = bitcast i32 i to float
   13293          40 :   bool canMergeExpensiveCrossRegisterBankCopy() const {
   13294          40 :     if (!Inst || !Inst->hasOneUse())
   13295             :       return false;
   13296             :     SDNode *Use = *Inst->use_begin();
   13297          36 :     if (Use->getOpcode() != ISD::BITCAST)
   13298             :       return false;
   13299             :     assert(DAG && "Missing context");
   13300           4 :     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
   13301           4 :     EVT ResVT = Use->getValueType(0);
   13302           4 :     const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
   13303             :     const TargetRegisterClass *ArgRC =
   13304           8 :         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
   13305           4 :     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
   13306           0 :       return false;
   13307             : 
   13308             :     // At this point, we know that we perform a cross-register-bank copy.
   13309             :     // Check if it is expensive.
   13310           8 :     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
   13311             :     // Assume bitcasts are cheap, unless both register classes do not
   13312             :     // explicitly share a common sub class.
   13313           4 :     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
   13314           0 :       return false;
   13315             : 
   13316             :     // Check if it will be merged with the load.
   13317             :     // 1. Check the alignment constraint.
   13318           8 :     unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
   13319           4 :         ResVT.getTypeForEVT(*DAG->getContext()));
   13320             : 
   13321           4 :     if (RequiredAlignment > getAlignment())
   13322             :       return false;
   13323             : 
   13324             :     // 2. Check that the load is a legal operation for that type.
   13325             :     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
   13326           0 :       return false;
   13327             : 
   13328             :     // 3. Check that we do not have a zext in the way.
   13329           4 :     if (Inst->getValueType(0) != getLoadedType())
   13330           0 :       return false;
   13331             : 
   13332             :     return true;
   13333             :   }
   13334             : };
   13335             : 
   13336             : } // end anonymous namespace
   13337             : 
   13338             : /// Check that all bits set in \p UsedBits form a dense region, i.e.,
   13339             : /// \p UsedBits looks like 0..0 1..1 0..0.
   13340          24 : static bool areUsedBitsDense(const APInt &UsedBits) {
   13341             :   // If all the bits are one, this is dense!
   13342          24 :   if (UsedBits.isAllOnesValue())
   13343             :     return true;
   13344             : 
   13345             :   // Get rid of the unused bits on the right.
   13346           2 :   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
   13347             :   // Get rid of the unused bits on the left.
   13348           2 :   if (NarrowedUsedBits.countLeadingZeros())
   13349           2 :     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
   13350             :   // Check that the chunk of bits is completely used.
   13351             :   return NarrowedUsedBits.isAllOnesValue();
   13352             : }
   13353             : 
   13354             : /// Check whether or not \p First and \p Second are next to each other
   13355             : /// in memory. This means that there is no hole between the bits loaded
   13356             : /// by \p First and the bits loaded by \p Second.
   13357           2 : static bool areSlicesNextToEachOther(const LoadedSlice &First,
   13358             :                                      const LoadedSlice &Second) {
   13359             :   assert(First.Origin == Second.Origin && First.Origin &&
   13360             :          "Unable to match different memory origins.");
   13361           2 :   APInt UsedBits = First.getUsedBits();
   13362             :   assert((UsedBits & Second.getUsedBits()) == 0 &&
   13363             :          "Slices are not supposed to overlap.");
   13364           2 :   UsedBits |= Second.getUsedBits();
   13365           2 :   return areUsedBitsDense(UsedBits);
   13366             : }
   13367             : 
   13368             : /// Adjust the \p GlobalLSCost according to the target
   13369             : /// paring capabilities and the layout of the slices.
   13370             : /// \pre \p GlobalLSCost should account for at least as many loads as
   13371             : /// there is in the slices in \p LoadedSlices.
   13372           0 : static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
   13373             :                                  LoadedSlice::Cost &GlobalLSCost) {
   13374           0 :   unsigned NumberOfSlices = LoadedSlices.size();
   13375             :   // If there is less than 2 elements, no pairing is possible.
   13376           0 :   if (NumberOfSlices < 2)
   13377           0 :     return;
   13378             : 
   13379             :   // Sort the slices so that elements that are likely to be next to each
   13380             :   // other in memory are next to each other in the list.
   13381           0 :   llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
   13382             :     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
   13383           0 :     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
   13384             :   });
   13385           0 :   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
   13386             :   // First (resp. Second) is the first (resp. Second) potentially candidate
   13387             :   // to be placed in a paired load.
   13388             :   const LoadedSlice *First = nullptr;
   13389             :   const LoadedSlice *Second = nullptr;
   13390           0 :   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
   13391             :                 // Set the beginning of the pair.
   13392             :                                                            First = Second) {
   13393           0 :     Second = &LoadedSlices[CurrSlice];
   13394             : 
   13395             :     // If First is NULL, it means we start a new pair.
   13396             :     // Get to the next slice.
   13397           0 :     if (!First)
   13398           0 :       continue;
   13399             : 
   13400           0 :     EVT LoadedType = First->getLoadedType();
   13401             : 
   13402             :     // If the types of the slices are different, we cannot pair them.
   13403           0 :     if (LoadedType != Second->getLoadedType())
   13404           0 :       continue;
   13405             : 
   13406             :     // Check if the target supplies paired loads for this type.
   13407           0 :     unsigned RequiredAlignment = 0;
   13408           0 :     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
   13409             :       // move to the next pair, this type is hopeless.
   13410             :       Second = nullptr;
   13411           0 :       continue;
   13412             :     }
   13413             :     // Check if we meet the alignment requirement.
   13414           0 :     if (RequiredAlignment > First->getAlignment())
   13415           0 :       continue;
   13416             : 
   13417             :     // Check that both loads are next to each other in memory.
   13418           0 :     if (!areSlicesNextToEachOther(*First, *Second))
   13419           0 :       continue;
   13420             : 
   13421             :     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
   13422           0 :     --GlobalLSCost.Loads;
   13423             :     // Move to the next pair.
   13424             :     Second = nullptr;
   13425             :   }
   13426             : }
   13427             : 
   13428             : /// Check the profitability of all involved LoadedSlice.
   13429             : /// Currently, it is considered profitable if there is exactly two
   13430             : /// involved slices (1) which are (2) next to each other in memory, and
   13431             : /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
   13432             : ///
   13433             : /// Note: The order of the elements in \p LoadedSlices may be modified, but not
   13434             : /// the elements themselves.
   13435             : ///
   13436             : /// FIXME: When the cost model will be mature enough, we can relax
   13437             : /// constraints (1) and (2).
   13438         127 : static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
   13439             :                                 const APInt &UsedBits, bool ForCodeSize) {
   13440         127 :   unsigned NumberOfSlices = LoadedSlices.size();
   13441         127 :   if (StressLoadSlicing)
   13442           2 :     return NumberOfSlices > 1;
   13443             : 
   13444             :   // Check (1).
   13445         125 :   if (NumberOfSlices != 2)
   13446             :     return false;
   13447             : 
   13448             :   // Check (2).
   13449          22 :   if (!areUsedBitsDense(UsedBits))
   13450             :     return false;
   13451             : 
   13452             :   // Check (3).
   13453          20 :   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
   13454             :   // The original code has one big load.
   13455          20 :   OrigCost.Loads = 1;
   13456          60 :   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
   13457          40 :     const LoadedSlice &LS = LoadedSlices[CurrSlice];
   13458             :     // Accumulate the cost of all the slices.
   13459          40 :     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
   13460             :     GlobalSlicingCost += SliceCost;
   13461             : 
   13462             :     // Account as cost in the original configuration the gain obtained
   13463             :     // with the current slices.
   13464          40 :     OrigCost.addSliceGain(LS);
   13465             :   }
   13466             : 
   13467             :   // If the target supports paired load, adjust the cost accordingly.
   13468          20 :   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
   13469          20 :   return OrigCost > GlobalSlicingCost;
   13470             : }
   13471             : 
   13472             : /// If the given load, \p LI, is used only by trunc or trunc(lshr)
   13473             : /// operations, split it in the various pieces being extracted.
   13474             : ///
   13475             : /// This sort of thing is introduced by SROA.
   13476             : /// This slicing takes care not to insert overlapping loads.
   13477             : /// \pre LI is a simple load (i.e., not an atomic or volatile load).
   13478     6154316 : bool DAGCombiner::SliceUpLoad(SDNode *N) {
   13479     6154316 :   if (Level < AfterLegalizeDAG)
   13480             :     return false;
   13481             : 
   13482             :   LoadSDNode *LD = cast<LoadSDNode>(N);
   13483     2591608 :   if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
   13484     5048014 :       !LD->getValueType(0).isInteger())
   13485      129236 :     return false;
   13486             : 
   13487             :   // Keep track of already used bits to detect overlapping values.
   13488             :   // In that case, we will just abort the transformation.
   13489             :   APInt UsedBits(LD->getValueSizeInBits(0), 0);
   13490             : 
   13491     2462372 :   SmallVector<LoadedSlice, 4> LoadedSlices;
   13492             : 
   13493             :   // Check if this load is used as several smaller chunks of bits.
   13494             :   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
   13495             :   // of computation for each trunc.
   13496     2462372 :   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
   13497     2979559 :        UI != UIEnd; ++UI) {
   13498             :     // Skip the uses of the chain.
   13499     2979432 :     if (UI.getUse().getResNo() != 0)
   13500      516902 :       continue;
   13501             : 
   13502             :     SDNode *User = *UI;
   13503             :     unsigned Shift = 0;
   13504             : 
   13505             :     // Check if this is a trunc(lshr).
   13506     2462530 :     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
   13507        8208 :         isa<ConstantSDNode>(User->getOperand(1))) {
   13508        3376 :       Shift = User->getConstantOperandVal(1);
   13509             :       User = *User->use_begin();
   13510             :     }
   13511             : 
   13512             :     // At this point, User is a Truncate, iff we encountered, trunc or
   13513             :     // trunc(lshr).
   13514     2462530 :     if (User->getOpcode() != ISD::TRUNCATE)
   13515     2462245 :       return false;
   13516             : 
   13517             :     // The width of the type must be a power of 2 and greater than 8-bits.
   13518             :     // Otherwise the load cannot be represented in LLVM IR.
   13519             :     // Moreover, if we shifted with a non-8-bits multiple, the slice
   13520             :     // will be across several bytes. We do not support that.
   13521             :     unsigned Width = User->getValueSizeInBits(0);
   13522        2076 :     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
   13523             :       return false;
   13524             : 
   13525             :     // Build the slice for this chain of computations.
   13526         991 :     LoadedSlice LS(User, LD, Shift, &DAG);
   13527         991 :     APInt CurrentUsedBits = LS.getUsedBits();
   13528             : 
   13529             :     // Check if this slice overlaps with another.
   13530         991 :     if ((CurrentUsedBits & UsedBits) != 0)
   13531             :       return false;
   13532             :     // Update the bits used globally.
   13533             :     UsedBits |= CurrentUsedBits;
   13534             : 
   13535             :     // Check if the new slice would be legal.
   13536         974 :     if (!LS.isLegal())
   13537             :       return false;
   13538             : 
   13539             :     // Record the slice.
   13540         285 :     LoadedSlices.push_back(LS);
   13541             :   }
   13542             : 
   13543             :   // Abort slicing if it does not seem to be profitable.
   13544         127 :   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
   13545             :     return false;
   13546             : 
   13547             :   ++SlicedLoads;
   13548             : 
   13549             :   // Rewrite each chain to use an independent load.
   13550             :   // By construction, each chain can be represented by a unique load.
   13551             : 
   13552             :   // Prepare the argument for the new token factor for all the slices.
   13553             :   SmallVector<SDValue, 8> ArgChains;
   13554          10 :   for (SmallVectorImpl<LoadedSlice>::const_iterator
   13555             :            LSIt = LoadedSlices.begin(),
   13556             :            LSItEnd = LoadedSlices.end();
   13557          15 :        LSIt != LSItEnd; ++LSIt) {
   13558          10 :     SDValue SliceInst = LSIt->loadSlice();
   13559          10 :     CombineTo(LSIt->Inst, SliceInst, true);
   13560          10 :     if (SliceInst.getOpcode() != ISD::LOAD)
   13561           1 :       SliceInst = SliceInst.getOperand(0);
   13562             :     assert(SliceInst->getOpcode() == ISD::LOAD &&
   13563             :            "It takes more than a zext to get to the loaded slice!!");
   13564          10 :     ArgChains.push_back(SliceInst.getValue(1));
   13565             :   }
   13566             : 
   13567           5 :   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
   13568           5 :                               ArgChains);
   13569          10 :   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
   13570           5 :   AddToWorklist(Chain.getNode());
   13571             :   return true;
   13572             : }
   13573             : 
   13574             : /// Check to see if V is (and load (ptr), imm), where the load is having
   13575             : /// specific bytes cleared out.  If so, return the byte size being masked out
   13576             : /// and the shift amount.
   13577             : static std::pair<unsigned, unsigned>
   13578           0 : CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
   13579             :   std::pair<unsigned, unsigned> Result(0, 0);
   13580             : 
   13581             :   // Check for the structure we're looking for.
   13582           0 :   if (V->getOpcode() != ISD::AND ||
   13583           0 :       !isa<ConstantSDNode>(V->getOperand(1)) ||
   13584           0 :       !ISD::isNormalLoad(V->getOperand(0).getNode()))
   13585           0 :     return Result;
   13586             : 
   13587             :   // Check the chain and pointer.
   13588             :   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
   13589           0 :   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
   13590             : 
   13591             :   // This only handles simple types.
   13592           0 :   if (V.getValueType() != MVT::i16 &&
   13593           0 :       V.getValueType() != MVT::i32 &&
   13594           0 :       V.getValueType() != MVT::i64)
   13595           0 :     return Result;
   13596             : 
   13597             :   // Check the constant mask.  Invert it so that the bits being masked out are
   13598             :   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
   13599             :   // follow the sign bit for uniformity.
   13600           0 :   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
   13601           0 :   unsigned NotMaskLZ = countLeadingZeros(NotMask);
   13602           0 :   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
   13603           0 :   unsigned NotMaskTZ = countTrailingZeros(NotMask);
   13604           0 :   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
   13605           0 :   if (NotMaskLZ == 64) return Result;  // All zero mask.
   13606             : 
   13607             :   // See if we have a continuous run of bits.  If so, we have 0*1+0*
   13608           0 :   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
   13609           0 :     return Result;
   13610             : 
   13611             :   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
   13612           0 :   if (V.getValueType() != MVT::i64 && NotMaskLZ)
   13613           0 :     NotMaskLZ -= 64-V.getValueSizeInBits();
   13614             : 
   13615           0 :   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
   13616           0 :   switch (MaskedBytes) {
   13617             :   case 1:
   13618             :   case 2:
   13619             :   case 4: break;
   13620           0 :   default: return Result; // All one mask, or 5-byte mask.
   13621             :   }
   13622             : 
   13623             :   // Verify that the first bit starts at a multiple of mask so that the access
   13624             :   // is aligned the same as the access width.
   13625           0 :   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
   13626             : 
   13627             :   // For narrowing to be valid, it must be the case that the load the
   13628             :   // immediately preceeding memory operation before the store.
   13629           0 :   if (LD == Chain.getNode())
   13630             :     ; // ok.
   13631           0 :   else if (Chain->getOpcode() == ISD::TokenFactor &&
   13632           0 :            SDValue(LD, 1).hasOneUse()) {
   13633             :     // LD has only 1 chain use so they are no indirect dependencies.
   13634             :     bool isOk = false;
   13635           0 :     for (const SDValue &ChainOp : Chain->op_values())
   13636           0 :       if (ChainOp.getNode() == LD) {
   13637             :         isOk = true;
   13638             :         break;
   13639             :       }
   13640           0 :     if (!isOk)
   13641           0 :       return Result;
   13642             :   } else
   13643           0 :     return Result; // Fail.
   13644             : 
   13645             :   Result.first = MaskedBytes;
   13646           0 :   Result.second = NotMaskTZ/8;
   13647           0 :   return Result;
   13648             : }
   13649             : 
   13650             : /// Check to see if IVal is something that provides a value as specified by
   13651             : /// MaskInfo. If so, replace the specified store with a narrower store of
   13652             : /// truncated IVal.
   13653             : static SDNode *
   13654           0 : ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
   13655             :                                 SDValue IVal, StoreSDNode *St,
   13656             :                                 DAGCombiner *DC) {
   13657           0 :   unsigned NumBytes = MaskInfo.first;
   13658           0 :   unsigned ByteShift = MaskInfo.second;
   13659           0 :   SelectionDAG &DAG = DC->getDAG();
   13660             : 
   13661             :   // Check to see if IVal is all zeros in the part being masked in by the 'or'
   13662             :   // that uses this.  If not, this is not a replacement.
   13663           0 :   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
   13664           0 :                                   ByteShift*8, (ByteShift+NumBytes)*8);
   13665           0 :   if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
   13666             : 
   13667             :   // Check that it is legal on the target to do this.  It is legal if the new
   13668             :   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
   13669             :   // legalization.
   13670           0 :   MVT VT = MVT::getIntegerVT(NumBytes*8);
   13671           0 :   if (!DC->isTypeLegal(VT))
   13672           0 :     return nullptr;
   13673             : 
   13674             :   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
   13675             :   // shifted by ByteShift and truncated down to NumBytes.
   13676           0 :   if (ByteShift) {
   13677           0 :     SDLoc DL(IVal);
   13678           0 :     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
   13679             :                        DAG.getConstant(ByteShift*8, DL,
   13680           0 :                                     DC->getShiftAmountTy(IVal.getValueType())));
   13681             :   }
   13682             : 
   13683             :   // Figure out the offset for the store and the alignment of the access.
   13684             :   unsigned StOffset;
   13685           0 :   unsigned NewAlign = St->getAlignment();
   13686             : 
   13687           0 :   if (DAG.getDataLayout().isLittleEndian())
   13688             :     StOffset = ByteShift;
   13689             :   else
   13690           0 :     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
   13691             : 
   13692           0 :   SDValue Ptr = St->getBasePtr();
   13693           0 :   if (StOffset) {
   13694           0 :     SDLoc DL(IVal);
   13695           0 :     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
   13696           0 :                       Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
   13697           0 :     NewAlign = MinAlign(NewAlign, StOffset);
   13698             :   }
   13699             : 
   13700             :   // Truncate down to the new size.
   13701           0 :   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
   13702             : 
   13703             :   ++OpsNarrowed;
   13704             :   return DAG
   13705           0 :       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
   13706           0 :                 St->getPointerInfo().getWithOffset(StOffset), NewAlign)
   13707           0 :       .getNode();
   13708             : }
   13709             : 
   13710             : /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
   13711             : /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
   13712             : /// narrowing the load and store if it would end up being a win for performance
   13713             : /// or code size.
   13714     7415489 : SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
   13715             :   StoreSDNode *ST  = cast<StoreSDNode>(N);
   13716     7415489 :   if (ST->isVolatile())
   13717       40346 :     return SDValue();
   13718             : 
   13719     7375143 :   SDValue Chain = ST->getChain();
   13720     7375143 :   SDValue Value = ST->getValue();
   13721     7375143 :   SDValue Ptr   = ST->getBasePtr();
   13722     7375143 :   EVT VT = Value.getValueType();
   13723             : 
   13724    20867955 :   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
   13725     2579393 :     return SDValue();
   13726             : 
   13727             :   unsigned Opc = Value.getOpcode();
   13728             : 
   13729             :   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
   13730             :   // is a byte mask indicating a consecutive number of bytes, check to see if
   13731             :   // Y is known to provide just those bytes.  If so, we try to replace the
   13732             :   // load + replace + store sequence with a single (narrower) store, which makes
   13733             :   // the load dead.
   13734     4795750 :   if (Opc == ISD::OR) {
   13735             :     std::pair<unsigned, unsigned> MaskedLoad;
   13736       16638 :     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
   13737       16638 :     if (MaskedLoad.first)
   13738        1020 :       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
   13739             :                                                   Value.getOperand(1), ST,this))
   13740          20 :         return SDValue(NewST, 0);
   13741             : 
   13742             :     // Or is commutative, so try swapping X and Y.
   13743       16618 :     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
   13744       16618 :     if (MaskedLoad.first)
   13745           4 :       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
   13746             :                                                   Value.getOperand(0), ST,this))
   13747           4 :         return SDValue(NewST, 0);
   13748             :   }
   13749             : 
   13750     4795726 :   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
   13751       33595 :       Value.getOperand(1).getOpcode() != ISD::Constant)
   13752     4770879 :     return SDValue();
   13753             : 
   13754       24847 :   SDValue N0 = Value.getOperand(0);
   13755        9068 :   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
   13756             :       Chain == SDValue(N0.getNode(), 1)) {
   13757             :     LoadSDNode *LD = cast<LoadSDNode>(N0);
   13758         798 :     if (LD->getBasePtr() != Ptr ||
   13759        1596 :         LD->getPointerInfo().getAddrSpace() !=
   13760        1596 :         ST->getPointerInfo().getAddrSpace())
   13761        2620 :       return SDValue();
   13762             : 
   13763             :     // Find the type to narrow it the load / op / store to.
   13764         798 :     SDValue N1 = Value.getOperand(1);
   13765         798 :     unsigned BitWidth = N1.getValueSizeInBits();
   13766         798 :     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
   13767         798 :     if (Opc == ISD::AND)
   13768         344 :       Imm ^= APInt::getAllOnesValue(BitWidth);
   13769        1596 :     if (Imm == 0 || Imm.isAllOnesValue())
   13770          11 :       return SDValue();
   13771         787 :     unsigned ShAmt = Imm.countTrailingZeros();
   13772         787 :     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
   13773         787 :     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
   13774         787 :     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
   13775             :     // The narrowing should be profitable, the load/store operation should be
   13776             :     // legal (or custom) and the store size should be equal to the NewVT width.
   13777        2659 :     while (NewBW < BitWidth &&
   13778         242 :            (NewVT.getStoreSizeInBits() != NewBW ||
   13779         376 :             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
   13780         134 :             !TLI.isNarrowingProfitable(VT, NewVT))) {
   13781        1872 :       NewBW = NextPowerOf2(NewBW);
   13782        1872 :       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
   13783             :     }
   13784         787 :     if (NewBW >= BitWidth)
   13785         662 :       return SDValue();
   13786             : 
   13787             :     // If the lsb changed does not start at the type bitwidth boundary,
   13788             :     // start at the previous one.
   13789         125 :     if (ShAmt % NewBW)
   13790         105 :       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
   13791             :     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
   13792         242 :                                    std::min(BitWidth, ShAmt + NewBW));
   13793         125 :     if ((Imm & Mask) == Imm) {
   13794         233 :       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
   13795         116 :       if (Opc == ISD::AND)
   13796           4 :         NewImm ^= APInt::getAllOnesValue(NewBW);
   13797         116 :       uint64_t PtrOff = ShAmt / 8;
   13798             :       // For big endian targets, we need to adjust the offset to the pointer to
   13799             :       // load the correct bytes.
   13800         116 :       if (DAG.getDataLayout().isBigEndian())
   13801           0 :         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
   13802             : 
   13803         116 :       unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
   13804         116 :       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
   13805         116 :       if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
   13806           0 :         return SDValue();
   13807             : 
   13808         232 :       SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
   13809             :                                    Ptr.getValueType(), Ptr,
   13810         116 :                                    DAG.getConstant(PtrOff, SDLoc(LD),
   13811         232 :                                                    Ptr.getValueType()));
   13812             :       SDValue NewLD =
   13813         232 :           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
   13814             :                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
   13815         464 :                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
   13816         232 :       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
   13817         116 :                                    DAG.getConstant(NewImm, SDLoc(Value),
   13818         232 :                                                    NewVT));
   13819             :       SDValue NewST =
   13820         116 :           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
   13821         348 :                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
   13822             : 
   13823         116 :       AddToWorklist(NewPtr.getNode());
   13824         116 :       AddToWorklist(NewLD.getNode());
   13825         116 :       AddToWorklist(NewVal.getNode());
   13826             :       WorklistRemover DeadNodes(*this);
   13827         116 :       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
   13828             :       ++OpsNarrowed;
   13829         116 :       return NewST;
   13830             :     }
   13831             :   }
   13832             : 
   13833       21438 :   return SDValue();
   13834             : }
   13835             : 
   13836             : /// For a given floating point load / store pair, if the load value isn't used
   13837             : /// by any other operations, then consider transforming the pair to integer
   13838             : /// load / store operations if the target deems the transformation profitable.
   13839     7832750 : SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
   13840             :   StoreSDNode *ST  = cast<StoreSDNode>(N);
   13841     7832750 :   SDValue Chain = ST->getChain();
   13842     7832750 :   SDValue Value = ST->getValue();
   13843     2018023 :   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
   13844             :       Value.hasOneUse() &&
   13845             :       Chain == SDValue(Value.getNode(), 1)) {
   13846             :     LoadSDNode *LD = cast<LoadSDNode>(Value);
   13847      640615 :     EVT VT = LD->getMemoryVT();
   13848          36 :     if (!VT.isFloatingPoint() ||
   13849        6941 :         VT != ST->getMemoryVT() ||
   13850        6941 :         LD->isNonTemporal() ||
   13851       13882 :         ST->isNonTemporal() ||
   13852      660748 :         LD->getPointerInfo().getAddrSpace() != 0 ||
   13853       12502 :         ST->getPointerInfo().getAddrSpace() != 0)
   13854      634364 :       return SDValue();
   13855             : 
   13856        6251 :     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
   13857        6251 :     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
   13858        2308 :         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
   13859        2311 :         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
   13860           3 :         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
   13861        6248 :       return SDValue();
   13862             : 
   13863           3 :     unsigned LDAlign = LD->getAlignment();
   13864           3 :     unsigned STAlign = ST->getAlignment();
   13865           3 :     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
   13866           3 :     unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
   13867           3 :     if (LDAlign < ABIAlign || STAlign < ABIAlign)
   13868           0 :       return SDValue();
   13869             : 
   13870             :     SDValue NewLD =
   13871           3 :         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
   13872           3 :                     LD->getPointerInfo(), LDAlign);
   13873             : 
   13874             :     SDValue NewST =
   13875           3 :         DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(),
   13876           3 :                      ST->getPointerInfo(), STAlign);
   13877             : 
   13878           3 :     AddToWorklist(NewLD.getNode());
   13879           3 :     AddToWorklist(NewST.getNode());
   13880             :     WorklistRemover DeadNodes(*this);
   13881           3 :     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
   13882             :     ++LdStFP2Int;
   13883           3 :     return NewST;
   13884             :   }
   13885             : 
   13886     7192135 :   return SDValue();
   13887             : }
   13888             : 
   13889             : // This is a helper function for visitMUL to check the profitability
   13890             : // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
   13891             : // MulNode is the original multiply, AddNode is (add x, c1),
   13892             : // and ConstNode is c2.
   13893             : //
   13894             : // If the (add x, c1) has multiple uses, we could increase
   13895             : // the number of adds if we make this transformation.
   13896             : // It would only be worth doing this if we can remove a
   13897             : // multiply in the process. Check for that here.
   13898             : // To illustrate:
   13899             : //     (A + c1) * c3
   13900             : //     (A + c2) * c3
   13901             : // We're checking for cases where we have common "c3 * A" expressions.
   13902           0 : bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
   13903             :                                               SDValue &AddNode,
   13904             :                                               SDValue &ConstNode) {
   13905             :   APInt Val;
   13906             : 
   13907             :   // If the add only has one use, this would be OK to do.
   13908           0 :   if (AddNode.getNode()->hasOneUse())
   13909           0 :     return true;
   13910             : 
   13911             :   // Walk all the users of the constant with which we're multiplying.
   13912           0 :   for (SDNode *Use : ConstNode->uses()) {
   13913           0 :     if (Use == MulNode) // This use is the one we're on right now. Skip it.
   13914           0 :       continue;
   13915             : 
   13916           0 :     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
   13917             :       SDNode *OtherOp;
   13918           0 :       SDNode *MulVar = AddNode.getOperand(0).getNode();
   13919             : 
   13920             :       // OtherOp is what we're multiplying against the constant.
   13921           0 :       if (Use->getOperand(0) == ConstNode)
   13922           0 :         OtherOp = Use->getOperand(1).getNode();
   13923             :       else
   13924             :         OtherOp = Use->getOperand(0).getNode();
   13925             : 
   13926             :       // Check to see if multiply is with the same operand of our "add".
   13927             :       //
   13928             :       //     ConstNode  = CONST
   13929             :       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
   13930             :       //     ...
   13931             :       //     AddNode  = (A + c1)  <-- MulVar is A.
   13932             :       //         = AddNode * ConstNode   <-- current visiting instruction.
   13933             :       //
   13934             :       // If we make this transformation, we will have a common
   13935             :       // multiply (ConstNode * A) that we can save.
   13936           0 :       if (OtherOp == MulVar)
   13937           0 :         return true;
   13938             : 
   13939             :       // Now check to see if a future expansion will give us a common
   13940             :       // multiply.
   13941             :       //
   13942             :       //     ConstNode  = CONST
   13943             :       //     AddNode    = (A + c1)
   13944             :       //     ...   = AddNode * ConstNode <-- current visiting instruction.
   13945             :       //     ...
   13946             :       //     OtherOp = (A + c2)
   13947             :       //     Use     = OtherOp * ConstNode <-- visiting Use.
   13948             :       //
   13949             :       // If we make this transformation, we will have a common
   13950             :       // multiply (CONST * A) after we also do the same transformation
   13951             :       // to the "t2" instruction.
   13952           0 :       if (OtherOp->getOpcode() == ISD::ADD &&
   13953           0 :           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
   13954           0 :           OtherOp->getOperand(0).getNode() == MulVar)
   13955           0 :         return true;
   13956             :     }
   13957             :   }
   13958             : 
   13959             :   // Didn't find a case where this would be profitable.
   13960             :   return false;
   13961             : }
   13962             : 
   13963           0 : SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
   13964             :                                          unsigned NumStores) {
   13965             :   SmallVector<SDValue, 8> Chains;
   13966             :   SmallPtrSet<const SDNode *, 8> Visited;
   13967           0 :   SDLoc StoreDL(StoreNodes[0].MemNode);
   13968             : 
   13969           0 :   for (unsigned i = 0; i < NumStores; ++i) {
   13970           0 :     Visited.insert(StoreNodes[i].MemNode);
   13971             :   }
   13972             : 
   13973             :   // don't include nodes that are children
   13974           0 :   for (unsigned i = 0; i < NumStores; ++i) {
   13975           0 :     if (Visited.count(StoreNodes[i].MemNode->getChain().getNode()) == 0)
   13976           0 :       Chains.push_back(StoreNodes[i].MemNode->getChain());
   13977             :   }
   13978             : 
   13979             :   assert(Chains.size() > 0 && "Chain should have generated a chain");
   13980           0 :   return DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, Chains);
   13981             : }
   13982             : 
   13983        1087 : bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
   13984             :     SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
   13985             :     bool IsConstantSrc, bool UseVector, bool UseTrunc) {
   13986             :   // Make sure we have something to merge.
   13987        1087 :   if (NumStores < 2)
   13988             :     return false;
   13989             : 
   13990             :   // The latest Node in the DAG.
   13991        1087 :   SDLoc DL(StoreNodes[0].MemNode);
   13992             : 
   13993             :   int64_t ElementSizeBits = MemVT.getStoreSizeInBits();
   13994        1087 :   unsigned SizeInBits = NumStores * ElementSizeBits;
   13995        1087 :   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
   13996             : 
   13997             :   EVT StoreTy;
   13998        1087 :   if (UseVector) {
   13999         309 :     unsigned Elts = NumStores * NumMemElts;
   14000             :     // Get the type for the merged vector store.
   14001         309 :     StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
   14002             :   } else
   14003         778 :     StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
   14004             : 
   14005             :   SDValue StoredVal;
   14006        1087 :   if (UseVector) {
   14007         309 :     if (IsConstantSrc) {
   14008             :       SmallVector<SDValue, 8> BuildVector;
   14009         655 :       for (unsigned I = 0; I != NumStores; ++I) {
   14010         948 :         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
   14011         474 :         SDValue Val = St->getValue();
   14012             :         // If constant is of the wrong type, convert it now.
   14013         948 :         if (MemVT != Val.getValueType()) {
   14014           2 :           Val = peekThroughBitcasts(Val);
   14015             :           // Deal with constants of wrong size.
   14016           2 :           if (ElementSizeBits != Val.getValueSizeInBits()) {
   14017             :             EVT IntMemVT =
   14018           0 :                 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
   14019             :             if (isa<ConstantFPSDNode>(Val)) {
   14020             :               // Not clear how to truncate FP values.
   14021           0 :               return false;
   14022             :             } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
   14023           0 :               Val = DAG.getConstant(C->getAPIntValue()
   14024           0 :                                         .zextOrTrunc(Val.getValueSizeInBits())
   14025           0 :                                         .zextOrTrunc(ElementSizeBits),
   14026           0 :                                     SDLoc(C), IntMemVT);
   14027             :           }
   14028             :           // Make sure correctly size type is the correct type.
   14029           2 :           Val = DAG.getBitcast(MemVT, Val);
   14030             :         }
   14031         474 :         BuildVector.push_back(Val);
   14032             :       }
   14033         181 :       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
   14034             :                                                : ISD::BUILD_VECTOR,
   14035         362 :                               DL, StoreTy, BuildVector);
   14036             :     } else {
   14037             :       SmallVector<SDValue, 8> Ops;
   14038         446 :       for (unsigned i = 0; i < NumStores; ++i) {
   14039         636 :         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
   14040         318 :         SDValue Val = peekThroughBitcasts(St->getValue());
   14041             :         // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
   14042             :         // type MemVT. If the underlying value is not the correct
   14043             :         // type, but it is an extraction of an appropriate vector we
   14044             :         // can recast Val to be of the correct type. This may require
   14045             :         // converting between EXTRACT_VECTOR_ELT and
   14046             :         // EXTRACT_SUBVECTOR.
   14047         636 :         if ((MemVT != Val.getValueType()) &&
   14048           8 :             (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
   14049             :              Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
   14050          10 :           EVT MemVTScalarTy = MemVT.getScalarType();
   14051             :           // We may need to add a bitcast here to get types to line up.
   14052          20 :           if (MemVTScalarTy != Val.getValueType().getScalarType()) {
   14053          10 :             Val = DAG.getBitcast(MemVT, Val);
   14054             :           } else {
   14055           0 :             unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
   14056             :                                             : ISD::EXTRACT_VECTOR_ELT;
   14057           0 :             SDValue Vec = Val.getOperand(0);
   14058           0 :             SDValue Idx = Val.getOperand(1);
   14059           0 :             Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
   14060             :           }
   14061             :         }
   14062         318 :         Ops.push_back(Val);
   14063             :       }
   14064             : 
   14065             :       // Build the extracted vector elements back into a vector.
   14066         128 :       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
   14067             :                                                : ISD::BUILD_VECTOR,
   14068         240 :                               DL, StoreTy, Ops);
   14069             :     }
   14070             :   } else {
   14071             :     // We should always use a vector store when merging extracted vector
   14072             :     // elements, so this path implies a store of constants.
   14073             :     assert(IsConstantSrc && "Merged vector elements should use vector store");
   14074             : 
   14075             :     APInt StoreInt(SizeInBits, 0);
   14076             : 
   14077             :     // Construct a single integer constant which is made of the smaller
   14078             :     // constant inputs.
   14079         778 :     bool IsLE = DAG.getDataLayout().isLittleEndian();
   14080        2978 :     for (unsigned i = 0; i < NumStores; ++i) {
   14081        2200 :       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
   14082        4400 :       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
   14083             : 
   14084        2200 :       SDValue Val = St->getValue();
   14085        2200 :       Val = peekThroughBitcasts(Val);
   14086        2200 :       StoreInt <<= ElementSizeBits;
   14087             :       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
   14088        2164 :         StoreInt |= C->getAPIntValue()
   14089        4328 :                         .zextOrTrunc(ElementSizeBits)
   14090        4328 :                         .zextOrTrunc(SizeInBits);
   14091             :       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
   14092          36 :         StoreInt |= C->getValueAPF()
   14093          36 :                         .bitcastToAPInt()
   14094          72 :                         .zextOrTrunc(ElementSizeBits)
   14095          72 :                         .zextOrTrunc(SizeInBits);
   14096             :         // If fp truncation is necessary give up for now.
   14097          36 :         if (MemVT.getSizeInBits() != ElementSizeBits)
   14098           0 :           return false;
   14099             :       } else {
   14100           0 :         llvm_unreachable("Invalid constant element type");
   14101             :       }
   14102             :     }
   14103             : 
   14104             :     // Create the new Load and Store operations.
   14105         778 :     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
   14106             :   }
   14107             : 
   14108        1087 :   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
   14109        1087 :   SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
   14110             : 
   14111             :   // make sure we use trunc store if it's necessary to be legal.
   14112             :   SDValue NewStore;
   14113        1087 :   if (!UseTrunc) {
   14114        1048 :     NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
   14115        1048 :                             FirstInChain->getPointerInfo(),
   14116        2096 :                             FirstInChain->getAlignment());
   14117             :   } else { // Must be realized as a trunc store
   14118             :     EVT LegalizedStoredValTy =
   14119          78 :         TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
   14120          39 :     unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
   14121             :     ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
   14122             :     SDValue ExtendedStoreVal =
   14123          78 :         DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
   14124          39 :                         LegalizedStoredValTy);
   14125          39 :     NewStore = DAG.getTruncStore(
   14126             :         NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
   14127          39 :         FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
   14128             :         FirstInChain->getAlignment(),
   14129          78 :         FirstInChain->getMemOperand()->getFlags());
   14130             :   }
   14131             : 
   14132             :   // Replace all merged stores with the new store.
   14133        4079 :   for (unsigned i = 0; i < NumStores; ++i)
   14134        5984 :     CombineTo(StoreNodes[i].MemNode, NewStore);
   14135             : 
   14136        1087 :   AddToWorklist(NewChain.getNode());
   14137        1087 :   return true;
   14138             : }
   14139             : 
   14140      292861 : void DAGCombiner::getStoreMergeCandidates(
   14141             :     StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
   14142             :     SDNode *&RootNode) {
   14143             :   // This holds the base pointer, index, and the offset in bytes from the base
   14144             :   // pointer.
   14145      292861 :   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
   14146      292861 :   EVT MemVT = St->getMemoryVT();
   14147             : 
   14148      292861 :   SDValue Val = peekThroughBitcasts(St->getValue());
   14149             :   // We must have a base and an offset.
   14150      292861 :   if (!BasePtr.getBase().getNode())
   14151       22512 :     return;
   14152             : 
   14153             :   // Do not handle stores to undef base pointers.
   14154      292856 :   if (BasePtr.getBase().isUndef())
   14155             :     return;
   14156             : 
   14157      291565 :   bool IsConstantSrc = isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val);
   14158      291565 :   bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
   14159      291565 :                           Val.getOpcode() == ISD::EXTRACT_SUBVECTOR);
   14160      291565 :   bool IsLoadSrc = isa<LoadSDNode>(Val);
   14161      291565 :   BaseIndexOffset LBasePtr;
   14162             :   // Match on loadbaseptr if relevant.
   14163      291565 :   EVT LoadVT;
   14164      291565 :   if (IsLoadSrc) {
   14165             :     auto *Ld = cast<LoadSDNode>(Val);
   14166       70374 :     LBasePtr = BaseIndexOffset::match(Ld, DAG);
   14167       70374 :     LoadVT = Ld->getMemoryVT();
   14168             :     // Load and store should be the same type.
   14169       70374 :     if (MemVT != LoadVT)
   14170             :       return;
   14171             :     // Loads must only have one use.
   14172       66746 :     if (!Ld->hasNUsesOfValue(1, 0))
   14173             :       return;
   14174             :     // The memory operands must not be volatile.
   14175       56258 :     if (Ld->isVolatile() || Ld->isIndexed())
   14176             :       return;
   14177             :   }
   14178             :   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
   14179             :                             int64_t &Offset) -> bool {
   14180             :     if (Other->isVolatile() || Other->isIndexed())
   14181             :       return false;
   14182             :     SDValue Val = peekThroughBitcasts(Other->getValue());
   14183             :     // Allow merging constants of different types as integers.
   14184             :     bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
   14185             :                                            : Other->getMemoryVT() != MemVT;
   14186             :     if (IsLoadSrc) {
   14187             :       if (NoTypeMatch)
   14188             :         return false;
   14189             :       // The Load's Base Ptr must also match
   14190             :       if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Val)) {
   14191             :         auto LPtr = BaseIndexOffset::match(OtherLd, DAG);
   14192             :         if (LoadVT != OtherLd->getMemoryVT())
   14193             :           return false;
   14194             :         // Loads must only have one use.
   14195             :         if (!OtherLd->hasNUsesOfValue(1, 0))
   14196             :           return false;
   14197             :         // The memory operands must not be volatile.
   14198             :         if (OtherLd->isVolatile() || OtherLd->isIndexed())
   14199             :           return false;
   14200             :         if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
   14201             :           return false;
   14202             :       } else
   14203             :         return false;
   14204             :     }
   14205             :     if (IsConstantSrc) {
   14206             :       if (NoTypeMatch)
   14207             :         return false;
   14208             :       if (!(isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val)))
   14209             :         return false;
   14210             :     }
   14211             :     if (IsExtractVecSrc) {
   14212             :       // Do not merge truncated stores here.
   14213             :       if (Other->isTruncatingStore())
   14214             :         return false;
   14215             :       if (!MemVT.bitsEq(Val.getValueType()))
   14216             :         return false;
   14217             :       if (Val.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
   14218             :           Val.getOpcode() != ISD::EXTRACT_SUBVECTOR)
   14219             :         return false;
   14220             :     }
   14221             :     Ptr = BaseIndexOffset::match(Other, DAG);
   14222             :     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
   14223      270349 :   };
   14224             : 
   14225             :   // We looking for a root node which is an ancestor to all mergable
   14226             :   // stores. We search up through a load, to our root and then down
   14227             :   // through all children. For instance we will find Store{1,2,3} if
   14228             :   // St is Store1, Store2. or Store3 where the root is not a load
   14229             :   // which always true for nonvolatile ops. TODO: Expand
   14230             :   // the search to find all valid candidates through multiple layers of loads.
   14231             :   //
   14232             :   // Root
   14233             :   // |-------|-------|
   14234             :   // Load    Load    Store3
   14235             :   // |       |
   14236             :   // Store1   Store2
   14237             :   //
   14238             :   // FIXME: We should be able to climb and
   14239             :   // descend TokenFactors to find candidates as well.
   14240             : 
   14241      270349 :   RootNode = St->getChain().getNode();
   14242             : 
   14243             :   if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
   14244       16733 :     RootNode = Ldn->getChain().getNode();
   14245       57750 :     for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
   14246       41017 :       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
   14247      157841 :         for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
   14248      134984 :           if (I2.getOperandNo() == 0)
   14249             :             if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
   14250      105519 :               BaseIndexOffset Ptr;
   14251             :               int64_t PtrDiff;
   14252      105519 :               if (CandidateMatch(OtherST, Ptr, PtrDiff))
   14253       82631 :                 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
   14254             :             }
   14255             :   } else
   14256     1855035 :     for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
   14257     1601419 :       if (I.getOperandNo() == 0)
   14258             :         if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
   14259     1205878 :           BaseIndexOffset Ptr;
   14260             :           int64_t PtrDiff;
   14261     1205878 :           if (CandidateMatch(OtherST, Ptr, PtrDiff))
   14262      826809 :             StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
   14263             :         }
   14264             : }
   14265             : 
   14266             : // We need to check that merging these stores does not cause a loop in
   14267             : // the DAG. Any store candidate may depend on another candidate
   14268             : // indirectly through its operand (we already consider dependencies
   14269             : // through the chain). Check in parallel by searching up from
   14270             : // non-chain operands of candidates.
   14271           0 : bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
   14272             :     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
   14273             :     SDNode *RootNode) {
   14274             :   // FIXME: We should be able to truncate a full search of
   14275             :   // predecessors by doing a BFS and keeping tabs the originating
   14276             :   // stores from which worklist nodes come from in a similar way to
   14277             :   // TokenFactor simplfication.
   14278             : 
   14279             :   SmallPtrSet<const SDNode *, 32> Visited;
   14280             :   SmallVector<const SDNode *, 8> Worklist;
   14281             : 
   14282             :   // RootNode is a predecessor to all candidates so we need not search
   14283             :   // past it. Add RootNode (peeking through TokenFactors). Do not count
   14284             :   // these towards size check.
   14285             : 
   14286           0 :   Worklist.push_back(RootNode);
   14287           0 :   while (!Worklist.empty()) {
   14288             :     auto N = Worklist.pop_back_val();
   14289           0 :     if (!Visited.insert(N).second)
   14290           0 :       continue; // Already present in Visited.
   14291           0 :     if (N->getOpcode() == ISD::TokenFactor) {
   14292           0 :       for (SDValue Op : N->ops())
   14293           0 :         Worklist.push_back(Op.getNode());
   14294             :     }
   14295             :   }
   14296             : 
   14297             :   // Don't count pruning nodes towards max.
   14298           0 :   unsigned int Max = 1024 + Visited.size();
   14299             :   // Search Ops of store candidates.
   14300           0 :   for (unsigned i = 0; i < NumStores; ++i) {
   14301           0 :     SDNode *N = StoreNodes[i].MemNode;
   14302             :     // Of the 4 Store Operands:
   14303             :     //   * Chain (Op 0) -> We have already considered these
   14304             :     //                    in candidate selection and can be
   14305             :     //                    safely ignored
   14306             :     //   * Value (Op 1) -> Cycles may happen (e.g. through load chains)
   14307             :     //   * Address (Op 2) -> Merged addresses may only vary by a fixed constant
   14308             :     //                      and so no cycles are possible.
   14309             :     //   * (Op 3) -> appears to always be undef. Cannot be source of cycle.
   14310             :     //
   14311             :     // Thus we need only check predecessors of the value operands.
   14312           0 :     auto *Op = N->getOperand(1).getNode();
   14313           0 :     if (Visited.insert(Op).second)
   14314           0 :       Worklist.push_back(Op);
   14315             :   }
   14316             :   // Search through DAG. We can stop early if we find a store node.
   14317           0 :   for (unsigned i = 0; i < NumStores; ++i)
   14318           0 :     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
   14319             :                                      Max))
   14320           0 :       return false;
   14321             :   return true;
   14322             : }
   14323             : 
   14324     7161723 : bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
   14325     7161723 :   if (OptLevel == CodeGenOpt::None)
   14326             :     return false;
   14327             : 
   14328     2948982 :   EVT MemVT = St->getMemoryVT();
   14329     2948982 :   int64_t ElementSizeBytes = MemVT.getStoreSize();
   14330     2948982 :   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
   14331             : 
   14332     2948982 :   if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
   14333             :     return false;
   14334             : 
   14335     2173596 :   bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute(
   14336             :       Attribute::NoImplicitFloat);
   14337             : 
   14338             :   // This function cannot currently deal with non-byte-sized memory sizes.
   14339     2173596 :   if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
   14340             :     return false;
   14341             : 
   14342     2169832 :   if (!MemVT.isSimple())
   14343             :     return false;
   14344             : 
   14345             :   // Perform an early exit check. Do not bother looking at stored values that
   14346             :   // are not constants, loads, or extracted vector elements.
   14347     2168096 :   SDValue StoredVal = peekThroughBitcasts(St->getValue());
   14348             :   bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
   14349             :   bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
   14350             :                        isa<ConstantFPSDNode>(StoredVal);
   14351     2168096 :   bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
   14352             :                           StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
   14353             : 
   14354     2168096 :   if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
   14355             :     return false;
   14356             : 
   14357      292861 :   SmallVector<MemOpLink, 8> StoreNodes;
   14358             :   SDNode *RootNode;
   14359             :   // Find potential store merge candidates by searching through chain sub-DAG
   14360      292861 :   getStoreMergeCandidates(St, StoreNodes, RootNode);
   14361             : 
   14362             :   // Check if there is anything to merge.
   14363      292861 :   if (StoreNodes.size() < 2)
   14364             :     return false;
   14365             : 
   14366             :   // Sort the memory operands according to their distance from the
   14367             :   // base pointer.
   14368      128311 :   llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
   14369           0 :     return LHS.OffsetFromBase < RHS.OffsetFromBase;
   14370             :   });
   14371             : 
   14372             :   // Store Merge attempts to merge the lowest stores. This generally
   14373             :   // works out as if successful, as the remaining stores are checked
   14374             :   // after the first collection of stores is merged. However, in the
   14375             :   // case that a non-mergeable store is found first, e.g., {p[-2],
   14376             :   // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
   14377             :   // mergeable cases. To prevent this, we prune such stores from the
   14378             :   // front of StoreNodes here.
   14379             : 
   14380             :   bool RV = false;
   14381      484864 :   while (StoreNodes.size() > 1) {
   14382             :     unsigned StartIdx = 0;
   14383      185704 :     while ((StartIdx + 1 < StoreNodes.size()) &&
   14384      335462 :            StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
   14385      167731 :                StoreNodes[StartIdx + 1].OffsetFromBase)
   14386             :       ++StartIdx;
   14387             : 
   14388             :     // Bail if we don't have enough candidates to merge.
   14389      132094 :     if (StartIdx + 1 >= StoreNodes.size())
   14390       17973 :       return RV;
   14391             : 
   14392      114121 :     if (StartIdx)
   14393       11397 :       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
   14394             : 
   14395             :     // Scan the memory operations on the chain and find the first
   14396             :     // non-consecutive store memory address.
   14397      114121 :     unsigned NumConsecutiveStores = 1;
   14398      114121 :     int64_t StartAddress = StoreNodes[0].OffsetFromBase;
   14399             :     // Check that the addresses are consecutive starting from the second
   14400             :     // element in the list of stores.
   14401      702071 :     for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
   14402      595111 :       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
   14403      595111 :       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
   14404             :         break;
   14405      587950 :       NumConsecutiveStores = i + 1;
   14406             :     }
   14407             : 
   14408      114121 :     if (NumConsecutiveStores < 2) {
   14409             :       StoreNodes.erase(StoreNodes.begin(),
   14410           0 :                        StoreNodes.begin() + NumConsecutiveStores);
   14411       98454 :       continue;
   14412             :     }
   14413             : 
   14414             :     // The node with the lowest store address.
   14415      114121 :     LLVMContext &Context = *DAG.getContext();
   14416      114121 :     const DataLayout &DL = DAG.getDataLayout();
   14417             : 
   14418             :     // Store the constants into memory as one consecutive store.
   14419      114121 :     if (IsConstantSrc) {
   14420      196515 :       while (NumConsecutiveStores >= 2) {
   14421      100025 :         LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
   14422             :         unsigned FirstStoreAS = FirstInChain->getAddressSpace();
   14423      100025 :         unsigned FirstStoreAlign = FirstInChain->getAlignment();
   14424             :         unsigned LastLegalType = 1;
   14425             :         unsigned LastLegalVectorType = 1;
   14426             :         bool LastIntegerTrunc = false;
   14427             :         bool NonZero = false;
   14428             :         unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
   14429      430666 :         for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
   14430      702772 :           StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
   14431      351386 :           SDValue StoredVal = ST->getValue();
   14432             :           bool IsElementZero = false;
   14433             :           if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
   14434      346375 :             IsElementZero = C->isNullValue();
   14435             :           else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
   14436        5007 :             IsElementZero = C->getConstantFPValue()->isNullValue();
   14437      351382 :           if (IsElementZero) {
   14438      315458 :             if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
   14439             :               FirstZeroAfterNonZero = i;
   14440             :           }
   14441      351386 :           NonZero |= !IsElementZero;
   14442             : 
   14443             :           // Find a legal type for the constant store.
   14444      351386 :           unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
   14445      351386 :           EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
   14446      351386 :           bool IsFast = false;
   14447             : 
   14448             :           // Break early when size is too large to be legal.
   14449      351386 :           if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
   14450             :             break;
   14451             : 
   14452      431059 :           if (TLI.isTypeLegal(StoreTy) &&
   14453      200119 :               TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
   14454       99701 :               TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
   14455       97520 :                                      FirstStoreAlign, &IsFast) &&
   14456             :               IsFast) {
   14457             :             LastIntegerTrunc = false;
   14458             :             LastLegalType = i + 1;
   14459             :             // Or check whether a truncstore is legal.
   14460      233121 :           } else if (TLI.getTypeAction(Context, StoreTy) ==
   14461             :                      TargetLowering::TypePromoteInteger) {
   14462             :             EVT LegalizedStoredValTy =
   14463       71783 :                 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
   14464       73639 :             if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
   14465        3712 :                 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
   14466        1856 :                 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
   14467        1185 :                                        FirstStoreAlign, &IsFast) &&
   14468             :                 IsFast) {
   14469             :               LastIntegerTrunc = true;
   14470             :               LastLegalType = i + 1;
   14471             :             }
   14472             :           }
   14473             : 
   14474             :           // We only use vectors if the constant is known to be zero or the
   14475             :           // target allows it and the function is not marked with the
   14476             :           // noimplicitfloat attribute.
   14477       40595 :           if ((!NonZero ||
   14478      341654 :                TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
   14479             :               !NoVectors) {
   14480             :             // Find a legal type for the vector store.
   14481      300787 :             unsigned Elts = (i + 1) * NumMemElts;
   14482      300787 :             EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
   14483      423925 :             if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
   14484      121582 :                 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
   14485       60061 :                 TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
   14486       58942 :                                        FirstStoreAlign, &IsFast) &&
   14487             :                 IsFast)
   14488             :               LastLegalVectorType = i + 1;
   14489             :           }
   14490             :         }
   14491             : 
   14492      100025 :         bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
   14493      100025 :         unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
   14494             : 
   14495             :         // Check if we found a legal integer type that creates a meaningful
   14496             :         // merge.
   14497      100025 :         if (NumElem < 2) {
   14498             :           // We know that candidate stores are in order and of correct
   14499             :           // shape. While there is no mergeable sequence from the
   14500             :           // beginning one may start later in the sequence. The only
   14501             :           // reason a merge of size N could have failed where another of
   14502             :           // the same size would not have, is if the alignment has
   14503             :           // improved or we've dropped a non-zero value. Drop as many
   14504             :           // candidates as we can here.
   14505             :           unsigned NumSkip = 1;
   14506      483495 :           while (
   14507      492419 :               (NumSkip < NumConsecutiveStores) &&
   14508     1068510 :               (NumSkip < FirstZeroAfterNonZero) &&
   14509      971898 :               (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
   14510      483495 :             NumSkip++;
   14511             : 
   14512       99066 :           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
   14513       99066 :           NumConsecutiveStores -= NumSkip;
   14514       99066 :           continue;
   14515             :         }
   14516             : 
   14517             :         // Check that we can merge these candidates without causing a cycle.
   14518         959 :         if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
   14519             :                                                       RootNode)) {
   14520           0 :           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
   14521           0 :           NumConsecutiveStores -= NumElem;
   14522           0 :           continue;
   14523             :         }
   14524             : 
   14525         959 :         RV |= MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, true,
   14526             :                                               UseVector, LastIntegerTrunc);
   14527             : 
   14528             :         // Remove merged stores for next iteration.
   14529         959 :         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
   14530         959 :         NumConsecutiveStores -= NumElem;
   14531             :       }
   14532       96490 :       continue;
   14533             :     }
   14534             : 
   14535             :     // When extracting multiple vector elements, try to store them
   14536             :     // in one vector store rather than a sequence of scalar stores.
   14537       17631 :     if (IsExtractVecSrc) {
   14538             :       // Loop on Consecutive Stores on success.
   14539        3978 :       while (NumConsecutiveStores >= 2) {
   14540        2014 :         LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
   14541             :         unsigned FirstStoreAS = FirstInChain->getAddressSpace();
   14542        2014 :         unsigned FirstStoreAlign = FirstInChain->getAlignment();
   14543             :         unsigned NumStoresToMerge = 1;
   14544       11213 :         for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
   14545             :           // Find a legal type for the vector store.
   14546        9612 :           unsigned Elts = (i + 1) * NumMemElts;
   14547             :           EVT Ty =
   14548        9612 :               EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
   14549             :           bool IsFast;
   14550             : 
   14551             :           // Break early when size is too large to be legal.
   14552        9612 :           if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
   14553             :             break;
   14554             : 
   14555       10466 :           if (TLI.isTypeLegal(Ty) &&
   14556        2220 :               TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
   14557         953 :               TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
   14558         357 :                                      FirstStoreAlign, &IsFast) &&
   14559             :               IsFast)
   14560             :             NumStoresToMerge = i + 1;
   14561             :         }
   14562             : 
   14563             :         // Check if we found a legal integer type creating a meaningful
   14564             :         // merge.
   14565        2014 :         if (NumStoresToMerge < 2) {
   14566             :           // We know that candidate stores are in order and of correct
   14567             :           // shape. While there is no mergeable sequence from the
   14568             :           // beginning one may start later in the sequence. The only
   14569             :           // reason a merge of size N could have failed where another of
   14570             :           // the same size would not have, is if the alignment has
   14571             :           // improved. Drop as many candidates as we can here.
   14572             :           unsigned NumSkip = 1;
   14573       14323 :           while (
   14574       30535 :               (NumSkip < NumConsecutiveStores) &&
   14575       28660 :               (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
   14576       14323 :             NumSkip++;
   14577             : 
   14578        1882 :           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
   14579        1882 :           NumConsecutiveStores -= NumSkip;
   14580        1882 :           continue;
   14581             :         }
   14582             : 
   14583             :         // Check that we can merge these candidates without causing a cycle.
   14584         132 :         if (!checkMergeStoreCandidatesForDependencies(
   14585             :                 StoreNodes, NumStoresToMerge, RootNode)) {
   14586             :           StoreNodes.erase(StoreNodes.begin(),
   14587           4 :                            StoreNodes.begin() + NumStoresToMerge);
   14588           4 :           NumConsecutiveStores -= NumStoresToMerge;
   14589           4 :           continue;
   14590             :         }
   14591             : 
   14592         128 :         RV |= MergeStoresOfConstantsOrVecElts(
   14593             :             StoreNodes, MemVT, NumStoresToMerge, false, true, false);
   14594             : 
   14595             :         StoreNodes.erase(StoreNodes.begin(),
   14596         128 :                          StoreNodes.begin() + NumStoresToMerge);
   14597         128 :         NumConsecutiveStores -= NumStoresToMerge;
   14598             :       }
   14599        1964 :       continue;
   14600             :     }
   14601             : 
   14602             :     // Below we handle the case of multiple consecutive stores that
   14603             :     // come from multiple consecutive loads. We merge them into a single
   14604             :     // wide load and a single wide store.
   14605             : 
   14606             :     // Look for load nodes which are used by the stored values.
   14607       15667 :     SmallVector<MemOpLink, 8> LoadNodes;
   14608             : 
   14609             :     // Find acceptable loads. Loads need to have the same chain (token factor),
   14610             :     // must not be zext, volatile, indexed, and they must be consecutive.
   14611       15667 :     BaseIndexOffset LdBasePtr;
   14612             : 
   14613      110375 :     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
   14614      189416 :       StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
   14615       94708 :       SDValue Val = peekThroughBitcasts(St->getValue());
   14616             :       LoadSDNode *Ld = cast<LoadSDNode>(Val);
   14617             : 
   14618       94708 :       BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
   14619             :       // If this is not the first ptr that we check.
   14620       94708 :       int64_t LdOffset = 0;
   14621       94708 :       if (LdBasePtr.getBase().getNode()) {
   14622             :         // The base ptr must be the same.
   14623       79041 :         if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
   14624             :           break;
   14625             :       } else {
   14626             :         // Check that all other base pointers are the same as this one.
   14627       15667 :         LdBasePtr = LdPtr;
   14628             :       }
   14629             : 
   14630             :       // We found a potential memory operand to merge.
   14631       94708 :       LoadNodes.push_back(MemOpLink(Ld, LdOffset));
   14632             :     }
   14633             : 
   14634       30956 :     while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
   14635             :       // If we have load/store pair instructions and we only have two values,
   14636             :       // don't bother merging.
   14637             :       unsigned RequiredAlignment;
   14638        8239 :       if (LoadNodes.size() == 2 &&
   14639       15909 :           TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
   14640         620 :           StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
   14641         620 :         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
   14642         620 :         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
   14643         620 :         break;
   14644             :       }
   14645       15289 :       LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
   14646             :       unsigned FirstStoreAS = FirstInChain->getAddressSpace();
   14647       15289 :       unsigned FirstStoreAlign = FirstInChain->getAlignment();
   14648       15289 :       LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
   14649             :       unsigned FirstLoadAS = FirstLoad->getAddressSpace();
   14650       15289 :       unsigned FirstLoadAlign = FirstLoad->getAlignment();
   14651             : 
   14652             :       // Scan the memory operations on the chain and find the first
   14653             :       // non-consecutive load memory address. These variables hold the index in
   14654             :       // the store node array.
   14655             : 
   14656             :       unsigned LastConsecutiveLoad = 1;
   14657             : 
   14658             :       // This variable refers to the size and not index in the array.
   14659       15289 :       unsigned LastLegalVectorType = 1;
   14660       15289 :       unsigned LastLegalIntegerType = 1;
   14661             :       bool isDereferenceable = true;
   14662             :       bool DoIntegerTruncate = false;
   14663       15289 :       StartAddress = LoadNodes[0].OffsetFromBase;
   14664       15289 :       SDValue FirstChain = FirstLoad->getChain();
   14665       75719 :       for (unsigned i = 1; i < LoadNodes.size(); ++i) {
   14666             :         // All loads must share the same chain.
   14667       65721 :         if (LoadNodes[i].MemNode->getChain() != FirstChain)
   14668             :           break;
   14669             : 
   14670       64552 :         int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
   14671       64552 :         if (CurrAddress - StartAddress != (ElementSizeBytes * i))
   14672             :           break;
   14673             :         LastConsecutiveLoad = i;
   14674             : 
   14675       64160 :         if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
   14676             :           isDereferenceable = false;
   14677             : 
   14678             :         // Find a legal type for the vector store.
   14679       64160 :         unsigned Elts = (i + 1) * NumMemElts;
   14680       64160 :         EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
   14681             : 
   14682             :         // Break early when size is too large to be legal.
   14683       64160 :         if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
   14684             :           break;
   14685             : 
   14686             :         bool IsFastSt, IsFastLd;
   14687       65713 :         if (TLI.isTypeLegal(StoreTy) &&
   14688        8163 :             TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
   14689        2880 :             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
   14690        1948 :                                    FirstStoreAlign, &IsFastSt) &&
   14691         131 :             IsFastSt &&
   14692         131 :             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
   14693         131 :                                    FirstLoadAlign, &IsFastLd) &&
   14694             :             IsFastLd) {
   14695         125 :           LastLegalVectorType = i + 1;
   14696             :         }
   14697             : 
   14698             :         // Find a legal type for the integer store.
   14699       60430 :         unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
   14700       60430 :         StoreTy = EVT::getIntegerVT(Context, SizeInBits);
   14701       67080 :         if (TLI.isTypeLegal(StoreTy) &&
   14702       13282 :             TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
   14703        6632 :             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
   14704         174 :                                    FirstStoreAlign, &IsFastSt) &&
   14705         174 :             IsFastSt &&
   14706         174 :             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
   14707         174 :                                    FirstLoadAlign, &IsFastLd) &&
   14708             :             IsFastLd) {
   14709         174 :           LastLegalIntegerType = i + 1;
   14710             :           DoIntegerTruncate = false;
   14711             :           // Or check whether a truncstore and extload is legal.
   14712       60256 :         } else if (TLI.getTypeAction(Context, StoreTy) ==
   14713             :                    TargetLowering::TypePromoteInteger) {
   14714       39602 :           EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
   14715       40551 :           if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
   14716        1898 :               TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
   14717         949 :               TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy,
   14718         949 :                                  StoreTy) &&
   14719             :               TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy,
   14720         933 :                                  StoreTy) &&
   14721         933 :               TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
   14722         933 :               TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
   14723          25 :                                      FirstStoreAlign, &IsFastSt) &&
   14724          25 :               IsFastSt &&
   14725          25 :               TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
   14726          25 :                                      FirstLoadAlign, &IsFastLd) &&
   14727             :               IsFastLd) {
   14728          25 :             LastLegalIntegerType = i + 1;
   14729             :             DoIntegerTruncate = true;
   14730             :           }
   14731             :         }
   14732             :       }
   14733             : 
   14734             :       // Only use vector types if the vector type is larger than the integer
   14735             :       // type. If they are the same, use integers.
   14736       15289 :       bool UseVectorTy =
   14737       15289 :           LastLegalVectorType > LastLegalIntegerType && !NoVectors;
   14738             :       unsigned LastLegalType =
   14739       15289 :           std::max(LastLegalVectorType, LastLegalIntegerType);
   14740             : 
   14741             :       // We add +1 here because the LastXXX variables refer to location while
   14742             :       // the NumElem refers to array/index size.
   14743             :       unsigned NumElem =
   14744       18581 :           std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
   14745       15289 :       NumElem = std::min(LastLegalType, NumElem);
   14746             : 
   14747       15289 :       if (NumElem < 2) {
   14748             :         // We know that candidate stores are in order and of correct
   14749             :         // shape. While there is no mergeable sequence from the
   14750             :         // beginning one may start later in the sequence. The only
   14751             :         // reason a merge of size N could have failed where another of
   14752             :         // the same size would not have is if the alignment or either
   14753             :         // the load or store has improved. Drop as many candidates as we
   14754             :         // can here.
   14755             :         unsigned NumSkip = 1;
   14756       77890 :         while ((NumSkip < LoadNodes.size()) &&
   14757      170256 :                (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
   14758       77632 :                (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
   14759       77582 :           NumSkip++;
   14760       15042 :         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
   14761       15042 :         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
   14762       15042 :         NumConsecutiveStores -= NumSkip;
   14763       15042 :         continue;
   14764             :       }
   14765             : 
   14766             :       // Check that we can merge these candidates without causing a cycle.
   14767         247 :       if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
   14768             :                                                     RootNode)) {
   14769           0 :         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
   14770           0 :         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
   14771           0 :         NumConsecutiveStores -= NumElem;
   14772           0 :         continue;
   14773             :       }
   14774             : 
   14775             :       // Find if it is better to use vectors or integers to load and store
   14776             :       // to memory.
   14777             :       EVT JointMemOpVT;
   14778         247 :       if (UseVectorTy) {
   14779             :         // Find a legal type for the vector store.
   14780         110 :         unsigned Elts = NumElem * NumMemElts;
   14781         110 :         JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
   14782             :       } else {
   14783         137 :         unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
   14784         137 :         JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
   14785             :       }
   14786             : 
   14787         247 :       SDLoc LoadDL(LoadNodes[0].MemNode);
   14788         247 :       SDLoc StoreDL(StoreNodes[0].MemNode);
   14789             : 
   14790             :       // The merged loads are required to have the same incoming chain, so
   14791             :       // using the first's chain is acceptable.
   14792             : 
   14793         247 :       SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
   14794         247 :       AddToWorklist(NewStoreChain.getNode());
   14795             : 
   14796             :       MachineMemOperand::Flags MMOFlags =
   14797         247 :           isDereferenceable ? MachineMemOperand::MODereferenceable
   14798             :                             : MachineMemOperand::MONone;
   14799             : 
   14800         247 :       SDValue NewLoad, NewStore;
   14801         247 :       if (UseVectorTy || !DoIntegerTruncate) {
   14802         238 :         NewLoad =
   14803         238 :             DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
   14804         238 :                         FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
   14805         476 :                         FirstLoadAlign, MMOFlags);
   14806         238 :         NewStore = DAG.getStore(
   14807             :             NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
   14808         238 :             FirstInChain->getPointerInfo(), FirstStoreAlign);
   14809             :       } else { // This must be the truncstore/extload case
   14810             :         EVT ExtendedTy =
   14811           9 :             TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
   14812           9 :         NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
   14813             :                                  FirstLoad->getChain(), FirstLoad->getBasePtr(),
   14814           9 :                                  FirstLoad->getPointerInfo(), JointMemOpVT,
   14815          18 :                                  FirstLoadAlign, MMOFlags);
   14816           9 :         NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
   14817             :                                      FirstInChain->getBasePtr(),
   14818           9 :                                      FirstInChain->getPointerInfo(),
   14819             :                                      JointMemOpVT, FirstInChain->getAlignment(),
   14820          18 :                                      FirstInChain->getMemOperand()->getFlags());
   14821             :       }
   14822             : 
   14823             :       // Transfer chain users from old loads to the new load.
   14824         923 :       for (unsigned i = 0; i < NumElem; ++i) {
   14825         676 :         LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
   14826        1352 :         DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
   14827             :                                       SDValue(NewLoad.getNode(), 1));
   14828             :       }
   14829             : 
   14830             :       // Replace the all stores with the new store. Recursively remove
   14831             :       // corresponding value if its no longer used.
   14832         923 :       for (unsigned i = 0; i < NumElem; ++i) {
   14833        1352 :         SDValue Val = StoreNodes[i].MemNode->getOperand(1);
   14834         676 :         CombineTo(StoreNodes[i].MemNode, NewStore);
   14835         676 :         if (Val.getNode()->use_empty())
   14836         676 :           recursivelyDeleteUnusedNodes(Val.getNode());
   14837             :       }
   14838             : 
   14839             :       RV = true;
   14840         247 :       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
   14841         247 :       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
   14842         247 :       NumConsecutiveStores -= NumElem;
   14843             :     }
   14844             :   }
   14845             :   return RV;
   14846             : }
   14847             : 
   14848      420021 : SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
   14849             :   SDLoc SL(ST);
   14850      420021 :   SDValue ReplStore;
   14851             : 
   14852             :   // Replace the chain to avoid dependency.
   14853      420021 :   if (ST->isTruncatingStore()) {
   14854        1839 :     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
   14855             :                                   ST->getBasePtr(), ST->getMemoryVT(),
   14856        3678 :                                   ST->getMemOperand());
   14857             :   } else {
   14858      418182 :     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
   14859      836364 :                              ST->getMemOperand());
   14860             :   }
   14861             : 
   14862             :   // Create token to keep both nodes around.
   14863      420021 :   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
   14864      420021 :                               MVT::Other, ST->getChain(), ReplStore);
   14865             : 
   14866             :   // Make sure the new and old chains are cleaned up.
   14867      420021 :   AddToWorklist(Token.getNode());
   14868             : 
   14869             :   // Don't add users to work list.
   14870      420021 :   return CombineTo(ST, Token, false);
   14871             : }
   14872             : 
   14873        9477 : SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
   14874        9477 :   SDValue Value = ST->getValue();
   14875        9477 :   if (Value.getOpcode() == ISD::TargetConstantFP)
   14876           0 :     return SDValue();
   14877             : 
   14878             :   SDLoc DL(ST);
   14879             : 
   14880        9477 :   SDValue Chain = ST->getChain();
   14881        9477 :   SDValue Ptr = ST->getBasePtr();
   14882             : 
   14883             :   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
   14884             : 
   14885             :   // NOTE: If the original store is volatile, this transform must not increase
   14886             :   // the number of stores.  For example, on x86-32 an f64 can be stored in one
   14887             :   // processor operation but an i64 (which is not legal) requires two.  So the
   14888             :   // transform should not be done in this case.
   14889             : 
   14890        9477 :   SDValue Tmp;
   14891        9477 :   switch (CFP->getSimpleValueType(0).SimpleTy) {
   14892           0 :   default:
   14893           0 :     llvm_unreachable("Unknown FP type");
   14894        1930 :   case MVT::f16:    // We don't do this for these yet.
   14895             :   case MVT::f80:
   14896             :   case MVT::f128:
   14897             :   case MVT::ppcf128:
   14898        1930 :     return SDValue();
   14899             :   case MVT::f32:
   14900        1425 :     if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
   14901             :         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
   14902             :       ;
   14903        2788 :       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
   14904        2788 :                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
   14905        1394 :                             MVT::i32);
   14906        1394 :       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
   14907             :     }
   14908             : 
   14909           0 :     return SDValue();
   14910        6153 :   case MVT::f64:
   14911        6153 :     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
   14912        1232 :          !ST->isVolatile()) ||
   14913             :         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
   14914             :       ;
   14915        2464 :       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
   14916        1232 :                             getZExtValue(), SDLoc(CFP), MVT::i64);
   14917        1232 :       return DAG.getStore(Chain, DL, Tmp,
   14918        1232 :                           Ptr, ST->getMemOperand());
   14919             :     }
   14920             : 
   14921        4921 :     if (!ST->isVolatile() &&
   14922             :         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
   14923             :       // Many FP stores are not made apparent until after legalize, e.g. for
   14924             :       // argument passing.  Since this is so common, custom legalize the
   14925             :       // 64-bit integer store into two 32-bit stores.
   14926        9818 :       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
   14927        4909 :       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
   14928        4909 :       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
   14929        4909 :       if (DAG.getDataLayout().isBigEndian())
   14930             :         std::swap(Lo, Hi);
   14931             : 
   14932        4909 :       unsigned Alignment = ST->getAlignment();
   14933        4909 :       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
   14934             :       AAMDNodes AAInfo = ST->getAAInfo();
   14935             : 
   14936        4909 :       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
   14937        4909 :                                  ST->getAlignment(), MMOFlags, AAInfo);
   14938        4909 :       Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
   14939        4909 :                         DAG.getConstant(4, DL, Ptr.getValueType()));
   14940        4909 :       Alignment = MinAlign(Alignment, 4U);
   14941        4909 :       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
   14942        4909 :                                  ST->getPointerInfo().getWithOffset(4),
   14943        4909 :                                  Alignment, MMOFlags, AAInfo);
   14944        4909 :       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
   14945        4909 :                          St0, St1);
   14946             :     }
   14947             : 
   14948          12 :     return SDValue();
   14949             :   }
   14950             : }
   14951             : 
   14952     8005537 : SDValue DAGCombiner::visitSTORE(SDNode *N) {
   14953             :   StoreSDNode *ST  = cast<StoreSDNode>(N);
   14954     8005537 :   SDValue Chain = ST->getChain();
   14955     8005537 :   SDValue Value = ST->getValue();
   14956     8005537 :   SDValue Ptr   = ST->getBasePtr();
   14957             : 
   14958             :   // If this is a store of a bit convert, store the input value if the
   14959             :   // resultant store does not need a higher alignment than the original.
   14960    16011074 :   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
   14961             :       ST->isUnindexed()) {
   14962      214911 :     EVT SVT = Value.getOperand(0).getValueType();
   14963             :     // If the store is volatile, we only want to change the store type if the
   14964             :     // resulting store is legal. Otherwise we might increase the number of
   14965             :     // memory accesses. We don't care if the original type was legal or not
   14966             :     // as we assume software couldn't rely on the number of accesses of an
   14967             :     // illegal type.
   14968       12530 :     if (((!LegalOperations && !ST->isVolatile()) ||
   14969      416733 :          TLI.isOperationLegal(ISD::STORE, SVT)) &&
   14970      402462 :         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
   14971      170915 :       unsigned OrigAlign = ST->getAlignment();
   14972      170915 :       bool Fast = false;
   14973      170915 :       if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT,
   14974      170915 :                                  ST->getAddressSpace(), OrigAlign, &Fast) &&
   14975             :           Fast) {
   14976      170675 :         return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
   14977             :                             ST->getPointerInfo(), OrigAlign,
   14978      513526 :                             ST->getMemOperand()->getFlags(), ST->getAAInfo());
   14979             :       }
   14980             :     }
   14981             :   }
   14982             : 
   14983             :   // Turn 'store undef, Ptr' -> nothing.
   14984    15669724 :   if (Value.isUndef() && ST->isUnindexed())
   14985        2112 :     return Chain;
   14986             : 
   14987             :   // Try to infer better alignment information than the store already has.
   14988     7832750 :   if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
   14989     3619290 :     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
   14990     2123509 :       if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) {
   14991             :         SDValue NewStore =
   14992        9135 :             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
   14993             :                               ST->getMemoryVT(), Align,
   14994       23593 :                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
   14995             :         // NewStore will always be N as we are only refining the alignment
   14996             :         assert(NewStore.getNode() == N);
   14997             :         (void)NewStore;
   14998             :       }
   14999             :     }
   15000             :   }
   15001             : 
   15002             :   // Try transforming a pair floating point load / store ops to integer
   15003             :   // load / store ops.
   15004     7832750 :   if (SDValue NewST = TransformFPLoadStorePair(N))
   15005           3 :     return NewST;
   15006             : 
   15007     7832747 :   if (ST->isUnindexed()) {
   15008             :     // Walk up chain skipping non-aliasing memory nodes, on this store and any
   15009             :     // adjacent stores.
   15010     7832510 :     if (findBetterNeighborChains(ST)) {
   15011             :       // replaceStoreChain uses CombineTo, which handled all of the worklist
   15012             :       // manipulation. Return the original node to not do anything else.
   15013      401881 :       return SDValue(ST, 0);
   15014             :     }
   15015     7430629 :     Chain = ST->getChain();
   15016             :   }
   15017             : 
   15018             :   // FIXME: is there such a thing as a truncating indexed store?
   15019     7430866 :   if (ST->isTruncatingStore() && ST->isUnindexed() &&
   15020     7484374 :       Value.getValueType().isInteger()) {
   15021             :     // See if we can simplify the input to this truncstore with knowledge that
   15022             :     // only the low bits are being used.  For example:
   15023             :     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
   15024       53272 :     SDValue Shorter = DAG.GetDemandedBits(
   15025       53272 :         Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
   15026      106544 :                                     ST->getMemoryVT().getScalarSizeInBits()));
   15027       53272 :     AddToWorklist(Value.getNode());
   15028       53272 :     if (Shorter.getNode())
   15029        1489 :       return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
   15030        1489 :                                Ptr, ST->getMemoryVT(), ST->getMemOperand());
   15031             : 
   15032             :     // Otherwise, see if we can simplify the operation with
   15033             :     // SimplifyDemandedBits, which only works if the value has a single use.
   15034       51783 :     if (SimplifyDemandedBits(
   15035             :             Value,
   15036       51783 :             APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
   15037      103566 :                                  ST->getMemoryVT().getScalarSizeInBits()))) {
   15038             :       // Re-visit the store if anything changed and the store hasn't been merged
   15039             :       // with another node (N is deleted) SimplifyDemandedBits will add Value's
   15040             :       // node back to the worklist if necessary, but we also need to re-visit
   15041             :       // the Store node itself.
   15042        2270 :       if (N->getOpcode() != ISD::DELETED_NODE)
   15043        2270 :         AddToWorklist(N);
   15044        2270 :       return SDValue(N, 0);
   15045             :     }
   15046             :   }
   15047             : 
   15048             :   // If this is a load followed by a store to the same location, then the store
   15049             :   // is dead/noop.
   15050             :   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
   15051     2037692 :     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
   15052        6784 :         ST->isUnindexed() && !ST->isVolatile() &&
   15053             :         // There can't be any side effects between the load and store, such as
   15054             :         // a call or store.
   15055     2032605 :         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
   15056             :       // The store is dead, remove it.
   15057         107 :       return Chain;
   15058             :     }
   15059             :   }
   15060             : 
   15061             :   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
   15062     1136710 :     if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
   15063     2258925 :         !ST1->isVolatile() && ST1->getBasePtr() == Ptr &&
   15064             :         ST->getMemoryVT() == ST1->getMemoryVT()) {
   15065             :       // If this is a store followed by a store with the same value to the same
   15066             :       // location, then the store is dead/noop.
   15067        1602 :       if (ST1->getValue() == Value) {
   15068             :         // The store is dead, remove it.
   15069          67 :         return Chain;
   15070             :       }
   15071             : 
   15072             :       // If this is a store who's preceeding store to the same location
   15073             :       // and no one other node is chained to that store we can effectively
   15074             :       // drop the store. Do not remove stores to undef as they may be used as
   15075             :       // data sinks.
   15076        2048 :       if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
   15077             :           !ST1->getBasePtr().isUndef()) {
   15078             :         // ST1 is fully overwritten and can be elided. Combine with it's chain
   15079             :         // value.
   15080         382 :         CombineTo(ST1, ST1->getChain());
   15081         382 :         return SDValue();
   15082             :       }
   15083             :     }
   15084             :   }
   15085             : 
   15086             :   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
   15087             :   // truncating store.  We can do this even if this is already a truncstore.
   15088     7426551 :   if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
   15089     7477833 :       && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
   15090      102564 :       TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
   15091             :                             ST->getMemoryVT())) {
   15092        2218 :     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
   15093        2227 :                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
   15094             :   }
   15095             : 
   15096             :   // Always perform this optimization before types are legal. If the target
   15097             :   // prefers, also try this after legalization to catch stores that were created
   15098             :   // by intrinsics or other nodes.
   15099     7424333 :   if (!LegalTypes || (TLI.mergeStoresAfterLegalization())) {
   15100             :     while (true) {
   15101             :       // There can be multiple store sequences on the same chain.
   15102             :       // Keep trying to merge store sequences until we are unable to do so
   15103             :       // or until we merge the last store on the chain.
   15104     7161723 :       bool Changed = MergeConsecutiveStores(ST);
   15105     7161723 :       if (!Changed) break;
   15106             :       // Return N as merge only uses CombineTo and no worklist clean
   15107             :       // up is necessary.
   15108        1123 :       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
   15109         999 :         return SDValue(N, 0);
   15110             :     }
   15111             :   }
   15112             : 
   15113             :   // Try transforming N to an indexed store.
   15114     7423334 :   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
   15115         310 :     return SDValue(N, 0);
   15116             : 
   15117             :   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
   15118             :   //
   15119             :   // Make sure to do this only after attempting to merge stores in order to
   15120             :   //  avoid changing the types of some subset of stores due to visit order,
   15121             :   //  preventing their merging.
   15122             :   if (isa<ConstantFPSDNode>(ST->getValue())) {
   15123        9477 :     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
   15124        7535 :       return NewSt;
   15125             :   }
   15126             : 
   15127     7415489 :   if (SDValue NewSt = splitMergedValStore(ST))
   15128           0 :     return NewSt;
   15129             : 
   15130     7415489 :   return ReduceLoadOpStoreWidth(N);
   15131             : }
   15132             : 
   15133             : /// For the instruction sequence of store below, F and I values
   15134             : /// are bundled together as an i64 value before being stored into memory.
   15135             : /// Sometimes it is more efficent to generate separate stores for F and I,
   15136             : /// which can remove the bitwise instructions or sink them to colder places.
   15137             : ///
   15138             : ///   (store (or (zext (bitcast F to i32) to i64),
   15139             : ///              (shl (zext I to i64), 32)), addr)  -->
   15140             : ///   (store F, addr) and (store I, addr+4)
   15141             : ///
   15142             : /// Similarly, splitting for other merged store can also be beneficial, like:
   15143             : /// For pair of {i32, i32}, i64 store --> two i32 stores.
   15144             : /// For pair of {i32, i16}, i64 store --> two i32 stores.
   15145             : /// For pair of {i16, i16}, i32 store --> two i16 stores.
   15146             : /// For pair of {i16, i8},  i32 store --> two i16 stores.
   15147             : /// For pair of {i8, i8},   i16 store --> two i8 stores.
   15148             : ///
   15149             : /// We allow each target to determine specifically which kind of splitting is
   15150             : /// supported.
   15151             : ///
   15152             : /// The store patterns are commonly seen from the simple code snippet below
   15153             : /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
   15154             : ///   void goo(const std::pair<int, float> &);
   15155             : ///   hoo() {
   15156             : ///     ...
   15157             : ///     goo(std::make_pair(tmp, ftmp));
   15158             : ///     ...
   15159             : ///   }
   15160             : ///
   15161     7415489 : SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
   15162     7415489 :   if (OptLevel == CodeGenOpt::None)
   15163     4211383 :     return SDValue();
   15164             : 
   15165     3204106 :   SDValue Val = ST->getValue();
   15166             :   SDLoc DL(ST);
   15167             : 
   15168             :   // Match OR operand.
   15169     9612318 :   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
   15170     3185740 :     return SDValue();
   15171             : 
   15172             :   // Match SHL operand and get Lower and Higher parts of Val.
   15173       18366 :   SDValue Op1 = Val.getOperand(0);
   15174       18366 :   SDValue Op2 = Val.getOperand(1);
   15175             :   SDValue Lo, Hi;
   15176       18366 :   if (Op1.getOpcode() != ISD::SHL) {
   15177             :     std::swap(Op1, Op2);
   15178       16763 :     if (Op1.getOpcode() != ISD::SHL)
   15179       13643 :       return SDValue();
   15180             :   }
   15181        4723 :   Lo = Op2;
   15182        4723 :   Hi = Op1.getOperand(0);
   15183        4723 :   if (!Op1.hasOneUse())
   15184           9 :     return SDValue();
   15185             : 
   15186             :   // Match shift amount to HalfValBitSize.
   15187        4714 :   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
   15188             :   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
   15189        6748 :   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
   15190        3135 :     return SDValue();
   15191             : 
   15192             :   // Lo and Hi are zero-extended from int with size less equal than 32
   15193             :   // to i64.
   15194         300 :   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
   15195         450 :       !Lo.getOperand(0).getValueType().isScalarInteger() ||
   15196         300 :       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
   15197          12 :       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
   15198        1597 :       !Hi.getOperand(0).getValueType().isScalarInteger() ||
   15199           6 :       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
   15200        1573 :     return SDValue();
   15201             : 
   15202             :   // Use the EVT of low and high parts before bitcast as the input
   15203             :   // of target query.
   15204           6 :   EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
   15205           0 :                   ? Lo.getOperand(0).getValueType()
   15206           6 :                   : Lo.getValueType();
   15207           6 :   EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
   15208           0 :                    ? Hi.getOperand(0).getValueType()
   15209           6 :                    : Hi.getValueType();
   15210           6 :   if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
   15211           6 :     return SDValue();
   15212             : 
   15213             :   // Start to split store.
   15214           0 :   unsigned Alignment = ST->getAlignment();
   15215           0 :   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
   15216             :   AAMDNodes AAInfo = ST->getAAInfo();
   15217             : 
   15218             :   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
   15219           0 :   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
   15220           0 :   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
   15221           0 :   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
   15222             : 
   15223           0 :   SDValue Chain = ST->getChain();
   15224           0 :   SDValue Ptr = ST->getBasePtr();
   15225             :   // Lower value store.
   15226           0 :   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
   15227           0 :                              ST->getAlignment(), MMOFlags, AAInfo);
   15228           0 :   Ptr =
   15229           0 :       DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
   15230           0 :                   DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
   15231             :   // Higher value store.
   15232             :   SDValue St1 =
   15233           0 :       DAG.getStore(St0, DL, Hi, Ptr,
   15234           0 :                    ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
   15235           0 :                    Alignment / 2, MMOFlags, AAInfo);
   15236           0 :   return St1;
   15237             : }
   15238             : 
   15239             : /// Convert a disguised subvector insertion into a shuffle:
   15240             : /// insert_vector_elt V, (bitcast X from vector type), IdxC -->
   15241             : /// bitcast(shuffle (bitcast V), (extended X), Mask)
   15242             : /// Note: We do not use an insert_subvector node because that requires a legal
   15243             : /// subvector type.
   15244       50985 : SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
   15245       50985 :   SDValue InsertVal = N->getOperand(1);
   15246       53745 :   if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
   15247        2730 :       !InsertVal.getOperand(0).getValueType().isVector())
   15248       49911 :     return SDValue();
   15249             : 
   15250        1074 :   SDValue SubVec = InsertVal.getOperand(0);
   15251        1074 :   SDValue DestVec = N->getOperand(0);
   15252        1074 :   EVT SubVecVT = SubVec.getValueType();
   15253        1074 :   EVT VT = DestVec.getValueType();
   15254             :   unsigned NumSrcElts = SubVecVT.getVectorNumElements();
   15255        1074 :   unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
   15256        1074 :   unsigned NumMaskVals = ExtendRatio * NumSrcElts;
   15257             : 
   15258             :   // Step 1: Create a shuffle mask that implements this insert operation. The
   15259             :   // vector that we are inserting into will be operand 0 of the shuffle, so
   15260             :   // those elements are just 'i'. The inserted subvector is in the first
   15261             :   // positions of operand 1 of the shuffle. Example:
   15262             :   // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
   15263        1074 :   SmallVector<int, 16> Mask(NumMaskVals);
   15264      106808 :   for (unsigned i = 0; i != NumMaskVals; ++i) {
   15265      105734 :     if (i / NumSrcElts == InsIndex)
   15266       27190 :       Mask[i] = (i % NumSrcElts) + NumMaskVals;
   15267             :     else
   15268      184278 :       Mask[i] = i;
   15269             :   }
   15270             : 
   15271             :   // Bail out if the target can not handle the shuffle we want to create.
   15272        1074 :   EVT SubVecEltVT = SubVecVT.getVectorElementType();
   15273        1074 :   EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
   15274        2148 :   if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
   15275        1048 :     return SDValue();
   15276             : 
   15277             :   // Step 2: Create a wide vector from the inserted source vector by appending
   15278             :   // undefined elements. This is the same size as our destination vector.
   15279             :   SDLoc DL(N);
   15280          26 :   SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
   15281          26 :   ConcatOps[0] = SubVec;
   15282          52 :   SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
   15283             : 
   15284             :   // Step 3: Shuffle in the padded subvector.
   15285          26 :   SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
   15286          52 :   SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
   15287          26 :   AddToWorklist(PaddedSubV.getNode());
   15288          26 :   AddToWorklist(DestVecBC.getNode());
   15289          26 :   AddToWorklist(Shuf.getNode());
   15290          26 :   return DAG.getBitcast(VT, Shuf);
   15291             : }
   15292             : 
   15293       52048 : SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
   15294       52048 :   SDValue InVec = N->getOperand(0);
   15295       52048 :   SDValue InVal = N->getOperand(1);
   15296       52048 :   SDValue EltNo = N->getOperand(2);
   15297             :   SDLoc DL(N);
   15298             : 
   15299             :   // If the inserted element is an UNDEF, just use the input vector.
   15300      104096 :   if (InVal.isUndef())
   15301         323 :     return InVec;
   15302             : 
   15303       51725 :   EVT VT = InVec.getValueType();
   15304             : 
   15305             :   // Remove redundant insertions:
   15306             :   // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
   15307             :   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
   15308       51758 :       InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
   15309          11 :     return InVec;
   15310             : 
   15311             :   auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
   15312             :   if (!IndexC) {
   15313             :     // If this is variable insert to undef vector, it might be better to splat:
   15314             :     // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
   15315         729 :     if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
   15316          60 :       SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal);
   15317         120 :       return DAG.getBuildVector(VT, DL, Ops);
   15318             :     }
   15319         669 :     return SDValue();
   15320             :   }
   15321             : 
   15322             :   // We must know which element is being inserted for folds below here.
   15323       50985 :   unsigned Elt = IndexC->getZExtValue();
   15324       50985 :   if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
   15325          26 :     return Shuf;
   15326             : 
   15327             :   // Canonicalize insert_vector_elt dag nodes.
   15328             :   // Example:
   15329             :   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
   15330             :   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
   15331             :   //
   15332             :   // Do this only if the child insert_vector node has one use; also
   15333             :   // do this only if indices are both constants and Idx1 < Idx0.
   15334       18210 :   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
   15335       50959 :       && isa<ConstantSDNode>(InVec.getOperand(2))) {
   15336       18179 :     unsigned OtherElt = InVec.getConstantOperandVal(2);
   15337       18179 :     if (Elt < OtherElt) {
   15338             :       // Swap nodes.
   15339         290 :       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
   15340         290 :                                   InVec.getOperand(0), InVal, EltNo);
   15341         290 :       AddToWorklist(NewOp.getNode());
   15342         290 :       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
   15343         580 :                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
   15344             :     }
   15345             :   }
   15346             : 
   15347             :   // If we can't generate a legal BUILD_VECTOR, exit
   15348       50669 :   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
   15349        3597 :     return SDValue();
   15350             : 
   15351             :   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
   15352             :   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
   15353             :   // vector elements.
   15354             :   SmallVector<SDValue, 8> Ops;
   15355             :   // Do not combine these two vectors if the output vector will not replace
   15356             :   // the input vector.
   15357       64199 :   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
   15358       34030 :     Ops.append(InVec.getNode()->op_begin(),
   15359             :                InVec.getNode()->op_end());
   15360       30057 :   } else if (InVec.isUndef()) {
   15361             :     unsigned NElts = VT.getVectorNumElements();
   15362       15268 :     Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
   15363             :   } else {
   15364       22423 :     return SDValue();
   15365             :   }
   15366             : 
   15367             :   // Insert the element
   15368       49298 :   if (Elt < Ops.size()) {
   15369             :     // All the operands of BUILD_VECTOR must have the same type;
   15370             :     // we enforce that here.
   15371       24649 :     EVT OpVT = Ops[0].getValueType();
   15372       24649 :     Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
   15373             :   }
   15374             : 
   15375             :   // Return the new vector
   15376       49298 :   return DAG.getBuildVector(VT, DL, Ops);
   15377             : }
   15378             : 
   15379         744 : SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
   15380             :     SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
   15381             :   assert(!OriginalLoad->isVolatile());
   15382             : 
   15383         744 :   EVT ResultVT = EVE->getValueType(0);
   15384         744 :   EVT VecEltVT = InVecVT.getVectorElementType();
   15385         744 :   unsigned Align = OriginalLoad->getAlignment();
   15386        1488 :   unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
   15387         744 :       VecEltVT.getTypeForEVT(*DAG.getContext()));
   15388             : 
   15389         744 :   if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
   15390          46 :     return SDValue();
   15391             : 
   15392         698 :   ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
   15393             :     ISD::NON_EXTLOAD : ISD::EXTLOAD;
   15394         698 :   if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
   15395          12 :     return SDValue();
   15396             : 
   15397             :   Align = NewAlign;
   15398             : 
   15399         686 :   SDValue NewPtr = OriginalLoad->getBasePtr();
   15400         686 :   SDValue Offset;
   15401         686 :   EVT PtrType = NewPtr.getValueType();
   15402             :   MachinePointerInfo MPI;
   15403             :   SDLoc DL(EVE);
   15404             :   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
   15405         675 :     int Elt = ConstEltNo->getZExtValue();
   15406         675 :     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
   15407         675 :     Offset = DAG.getConstant(PtrOff, DL, PtrType);
   15408        1350 :     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
   15409             :   } else {
   15410          11 :     Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
   15411          11 :     Offset = DAG.getNode(
   15412             :         ISD::MUL, DL, PtrType, Offset,
   15413          11 :         DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
   15414          11 :     MPI = OriginalLoad->getPointerInfo();
   15415             :   }
   15416        1372 :   NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
   15417             : 
   15418             :   // The replacement we need to do here is a little tricky: we need to
   15419             :   // replace an extractelement of a load with a load.
   15420             :   // Use ReplaceAllUsesOfValuesWith to do the replacement.
   15421             :   // Note that this replacement assumes that the extractvalue is the only
   15422             :   // use of the load; that's okay because we don't want to perform this
   15423             :   // transformation in other cases anyway.
   15424             :   SDValue Load;
   15425             :   SDValue Chain;
   15426         686 :   if (ResultVT.bitsGT(VecEltVT)) {
   15427             :     // If the result type of vextract is wider than the load, then issue an
   15428             :     // extending load instead.
   15429           0 :     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
   15430             :                                                   VecEltVT)
   15431           0 :                                    ? ISD::ZEXTLOAD
   15432             :                                    : ISD::EXTLOAD;
   15433           0 :     Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
   15434             :                           OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
   15435           0 :                           Align, OriginalLoad->getMemOperand()->getFlags(),
   15436           0 :                           OriginalLoad->getAAInfo());
   15437             :     Chain = Load.getValue(1);
   15438             :   } else {
   15439         686 :     Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
   15440         686 :                        MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
   15441        1372 :                        OriginalLoad->getAAInfo());
   15442         686 :     Chain = Load.getValue(1);
   15443         686 :     if (ResultVT.bitsLT(VecEltVT))
   15444           0 :       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
   15445             :     else
   15446         686 :       Load = DAG.getBitcast(ResultVT, Load);
   15447             :   }
   15448             :   WorklistRemover DeadNodes(*this);
   15449             :   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
   15450         686 :   SDValue To[] = { Load, Chain };
   15451         686 :   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
   15452             :   // Since we're explicitly calling ReplaceAllUses, add the new node to the
   15453             :   // worklist explicitly as well.
   15454         686 :   AddToWorklist(Load.getNode());
   15455             :   AddUsersToWorklist(Load.getNode()); // Add users too
   15456             :   // Make sure to revisit this node to clean it up; it will usually be dead.
   15457         686 :   AddToWorklist(EVE);
   15458             :   ++OpsNarrowed;
   15459         686 :   return SDValue(EVE, 0);
   15460             : }
   15461             : 
   15462      416312 : SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
   15463      416312 :   SDValue InVec = N->getOperand(0);
   15464      416312 :   EVT VT = InVec.getValueType();
   15465      832624 :   EVT NVT = N->getValueType(0);
   15466      416312 :   if (InVec.isUndef())
   15467          31 :     return DAG.getUNDEF(NVT);
   15468             : 
   15469             :   // (vextract (scalar_to_vector val, 0) -> val
   15470      416281 :   if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
   15471             :     // Check if the result type doesn't match the inserted element type. A
   15472             :     // SCALAR_TO_VECTOR may truncate the inserted element and the
   15473             :     // EXTRACT_VECTOR_ELT may widen the extracted vector.
   15474         337 :     SDValue InOp = InVec.getOperand(0);
   15475           0 :     if (InOp.getValueType() != NVT) {
   15476             :       assert(InOp.getValueType().isInteger() && NVT.isInteger());
   15477          10 :       return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT);
   15478             :     }
   15479         332 :     return InOp;
   15480             :   }
   15481             : 
   15482      415944 :   SDValue EltNo = N->getOperand(1);
   15483             :   ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
   15484             : 
   15485             :   // extract_vector_elt of out-of-bounds element -> UNDEF
   15486     1234011 :   if (ConstEltNo && ConstEltNo->getAPIntValue().uge(VT.getVectorNumElements()))
   15487           6 :     return DAG.getUNDEF(NVT);
   15488             : 
   15489             :   // extract_vector_elt (build_vector x, y), 1 -> y
   15490      411331 :   if (ConstEltNo &&
   15491             :       InVec.getOpcode() == ISD::BUILD_VECTOR &&
   15492      514496 :       TLI.isTypeLegal(VT) &&
   15493       34190 :       (InVec.hasOneUse() ||
   15494       34190 :        TLI.aggressivelyPreferBuildVectorSources(VT))) {
   15495      145236 :     SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue());
   15496             :     EVT InEltVT = Elt.getValueType();
   15497             : 
   15498             :     // Sometimes build_vector's scalar input types do not match result type.
   15499           0 :     if (NVT == InEltVT)
   15500       48412 :       return Elt;
   15501             : 
   15502             :     // TODO: It may be useful to truncate if free if the build_vector implicitly
   15503             :     // converts.
   15504             :   }
   15505             : 
   15506      367526 :   if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST) {
   15507             :     // The vector index of the LSBs of the source depend on the endian-ness.
   15508       99371 :     bool IsLE = DAG.getDataLayout().isLittleEndian();
   15509             : 
   15510             :     // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
   15511      100909 :     unsigned BCTruncElt = IsLE ? 0 : VT.getVectorNumElements() - 1;
   15512       99371 :     SDValue BCSrc = InVec.getOperand(0);
   15513      140589 :     if (InVec.hasOneUse() && ConstEltNo->getZExtValue() == BCTruncElt &&
   15514      126295 :         VT.isInteger() && BCSrc.getValueType().isScalarInteger())
   15515        2620 :       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc);
   15516             :   }
   15517             : 
   15518             :   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
   15519             :   //
   15520             :   // This only really matters if the index is non-constant since other combines
   15521             :   // on the constant elements already work.
   15522      732432 :   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT &&
   15523        1188 :       EltNo == InVec.getOperand(2)) {
   15524          30 :     SDValue Elt = InVec.getOperand(1);
   15525          49 :     return VT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, SDLoc(N), NVT) : Elt;
   15526             :   }
   15527             : 
   15528             :   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
   15529             :   // We only perform this optimization before the op legalization phase because
   15530             :   // we may introduce new vector instructions which are not backed by TD
   15531             :   // patterns. For example on AVX, extracting elements from a wide vector
   15532             :   // without using extract_subvector. However, if we can find an underlying
   15533             :   // scalar value, then we can always use that.
   15534      366186 :   if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) {
   15535         917 :     int NumElem = VT.getVectorNumElements();
   15536             :     ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
   15537             :     // Find the new index to extract from.
   15538        1834 :     int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue());
   15539             : 
   15540             :     // Extracting an undef index is undef.
   15541         917 :     if (OrigElt == -1)
   15542         387 :       return DAG.getUNDEF(NVT);
   15543             : 
   15544             :     // Select the right vector half to extract from.
   15545             :     SDValue SVInVec;
   15546         890 :     if (OrigElt < NumElem) {
   15547         554 :       SVInVec = InVec->getOperand(0);
   15548             :     } else {
   15549         336 :       SVInVec = InVec->getOperand(1);
   15550         336 :       OrigElt -= NumElem;
   15551             :     }
   15552             : 
   15553         890 :     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
   15554         260 :       SDValue InOp = SVInVec.getOperand(OrigElt);
   15555         130 :       if (InOp.getValueType() != NVT) {
   15556             :         assert(InOp.getValueType().isInteger() && NVT.isInteger());
   15557           0 :         InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT);
   15558             :       }
   15559             : 
   15560         130 :       return InOp;
   15561             :     }
   15562             : 
   15563             :     // FIXME: We should handle recursing on other vector shuffles and
   15564             :     // scalar_to_vector here as well.
   15565             : 
   15566         760 :     if (!LegalOperations ||
   15567             :         // FIXME: Should really be just isOperationLegalOrCustom.
   15568         760 :         TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VT) ||
   15569             :         TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VT)) {
   15570         230 :       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
   15571         460 :       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
   15572         690 :                          DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
   15573             :     }
   15574             :   }
   15575             : 
   15576             :   // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
   15577             :   // simplify it based on the (valid) extraction indices.
   15578      731598 :   if (llvm::all_of(InVec->uses(), [&](SDNode *Use) {
   15579     1588386 :         return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
   15580     1588386 :                Use->getOperand(0) == InVec &&
   15581             :                isa<ConstantSDNode>(Use->getOperand(1));
   15582             :       })) {
   15583      271577 :     APInt DemandedElts = APInt::getNullValue(VT.getVectorNumElements());
   15584     1581452 :     for (SDNode *Use : InVec->uses()) {
   15585     1309875 :       auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
   15586     2619750 :       if (CstElt->getAPIntValue().ult(VT.getVectorNumElements()))
   15587     1309875 :         DemandedElts.setBit(CstElt->getZExtValue());
   15588             :     }
   15589      271577 :     if (SimplifyDemandedVectorElts(InVec, DemandedElts, true))
   15590         575 :       return SDValue(N, 0);
   15591             :   }
   15592             : 
   15593             :   bool BCNumEltsChanged = false;
   15594      365224 :   EVT ExtVT = VT.getVectorElementType();
   15595      365224 :   EVT LVT = ExtVT;
   15596             : 
   15597             :   // If the result of load has to be truncated, then it's not necessarily
   15598             :   // profitable.
   15599      365224 :   if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
   15600           0 :     return SDValue();
   15601             : 
   15602      730448 :   if (InVec.getOpcode() == ISD::BITCAST) {
   15603             :     // Don't duplicate a load with other uses.
   15604       97820 :     if (!InVec.hasOneUse())
   15605       78733 :       return SDValue();
   15606             : 
   15607       57261 :     EVT BCVT = InVec.getOperand(0).getValueType();
   15608       19087 :     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
   15609       12598 :       return SDValue();
   15610        6489 :     if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
   15611             :       BCNumEltsChanged = true;
   15612        6489 :     InVec = InVec.getOperand(0);
   15613        6489 :     ExtVT = BCVT.getVectorElementType();
   15614             :   }
   15615             : 
   15616             :   // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
   15617      114780 :   if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
   15618      274717 :       ISD::isNormalLoad(InVec.getNode()) &&
   15619          67 :       !N->getOperand(1)->hasPredecessor(InVec.getNode())) {
   15620          67 :     SDValue Index = N->getOperand(1);
   15621             :     if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) {
   15622          67 :       if (!OrigLoad->isVolatile()) {
   15623             :         return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
   15624          54 :                                                              OrigLoad);
   15625             :       }
   15626             :     }
   15627             :   }
   15628             : 
   15629             :   // Perform only after legalization to ensure build_vector / vector_shuffle
   15630             :   // optimizations have already been done.
   15631      273839 :   if (!LegalOperations) return SDValue();
   15632             : 
   15633             :   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
   15634             :   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
   15635             :   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
   15636             : 
   15637      159113 :   if (ConstEltNo) {
   15638      158926 :     int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
   15639             : 
   15640             :     LoadSDNode *LN0 = nullptr;
   15641             :     const ShuffleVectorSDNode *SVN = nullptr;
   15642      158926 :     if (ISD::isNormalLoad(InVec.getNode())) {
   15643             :       LN0 = cast<LoadSDNode>(InVec);
   15644           0 :     } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
   15645       38347 :                InVec.getOperand(0).getValueType() == ExtVT &&
   15646             :                ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
   15647             :       // Don't duplicate a load with other uses.
   15648          17 :       if (!InVec.hasOneUse())
   15649          15 :         return SDValue();
   15650             : 
   15651           2 :       LN0 = cast<LoadSDNode>(InVec.getOperand(0));
   15652             :     } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
   15653             :       // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
   15654             :       // =>
   15655             :       // (load $addr+1*size)
   15656             : 
   15657             :       // Don't duplicate a load with other uses.
   15658         505 :       if (!InVec.hasOneUse())
   15659         443 :         return SDValue();
   15660             : 
   15661             :       // If the bit convert changed the number of elements, it is unsafe
   15662             :       // to examine the mask.
   15663          62 :       if (BCNumEltsChanged)
   15664           0 :         return SDValue();
   15665             : 
   15666             :       // Select the input vector, guarding against out of range extract vector.
   15667             :       unsigned NumElems = VT.getVectorNumElements();
   15668          62 :       int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
   15669          62 :       InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
   15670             : 
   15671         124 :       if (InVec.getOpcode() == ISD::BITCAST) {
   15672             :         // Don't duplicate a load with other uses.
   15673          51 :         if (!InVec.hasOneUse())
   15674          42 :           return SDValue();
   15675             : 
   15676          18 :         InVec = InVec.getOperand(0);
   15677             :       }
   15678          20 :       if (ISD::isNormalLoad(InVec.getNode())) {
   15679             :         LN0 = cast<LoadSDNode>(InVec);
   15680          13 :         Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
   15681          13 :         EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType());
   15682             :       }
   15683             :     }
   15684             : 
   15685             :     // Make sure we found a non-volatile load and the extractelement is
   15686             :     // the only use.
   15687      120594 :     if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
   15688      157736 :       return SDValue();
   15689             : 
   15690             :     // If Idx was -1 above, Elt is going to be -1, so just return undef.
   15691         690 :     if (Elt == -1)
   15692           0 :       return DAG.getUNDEF(LVT);
   15693             : 
   15694         690 :     return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);
   15695             :   }
   15696             : 
   15697         187 :   return SDValue();
   15698             : }
   15699             : 
   15700             : // Simplify (build_vec (ext )) to (bitcast (build_vec ))
   15701      697780 : SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
   15702             :   // We perform this optimization post type-legalization because
   15703             :   // the type-legalizer often scalarizes integer-promoted vectors.
   15704             :   // Performing this optimization before may create bit-casts which
   15705             :   // will be type-legalized to complex code sequences.
   15706             :   // We perform this optimization only before the operation legalizer because we
   15707             :   // may introduce illegal operations.
   15708      697780 :   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
   15709      572087 :     return SDValue();
   15710             : 
   15711      125693 :   unsigned NumInScalars = N->getNumOperands();
   15712             :   SDLoc DL(N);
   15713      125693 :   EVT VT = N->getValueType(0);
   15714             : 
   15715             :   // Check to see if this is a BUILD_VECTOR of a bunch of values
   15716             :   // which come from any_extend or zero_extend nodes. If so, we can create
   15717             :   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
   15718             :   // optimizations. We do not handle sign-extend because we can't fill the sign
   15719             :   // using shuffles.
   15720      125693 :   EVT SourceType = MVT::Other;
   15721             :   bool AllAnyExt = true;
   15722             : 
   15723      136494 :   for (unsigned i = 0; i != NumInScalars; ++i) {
   15724      267554 :     SDValue In = N->getOperand(i);
   15725             :     // Ignore undef inputs.
   15726      133777 :     if (In.isUndef()) continue;
   15727             : 
   15728      132453 :     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
   15729             :     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
   15730             : 
   15731             :     // Abort if the element is not an extension.
   15732      132453 :     if (!ZeroExt && !AnyExt) {
   15733      122974 :       SourceType = MVT::Other;
   15734      122974 :       break;
   15735             :     }
   15736             : 
   15737             :     // The input is a ZeroExt or AnyExt. Check the original type.
   15738        9479 :     EVT InTy = In.getOperand(0).getValueType();
   15739             : 
   15740             :     // Check that all of the widened source types are the same.
   15741             :     if (SourceType == MVT::Other)
   15742             :       // First time.
   15743        3342 :       SourceType = InTy;
   15744        6137 :     else if (InTy != SourceType) {
   15745             :       // Multiple income types. Abort.
   15746           2 :       SourceType = MVT::Other;
   15747           2 :       break;
   15748             :     }
   15749             : 
   15750             :     // Check if all of the extends are ANY_EXTENDs.
   15751             :     AllAnyExt &= AnyExt;
   15752             :   }
   15753             : 
   15754             :   // In order to have valid types, all of the inputs must be extended from the
   15755             :   // same source type and all of the inputs must be any or zero extend.
   15756             :   // Scalar sizes must be a power of two.
   15757      125693 :   EVT OutScalarTy = VT.getScalarType();
   15758      125693 :   bool ValidTypes = SourceType != MVT::Other &&
   15759        2717 :                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
   15760        2717 :                  isPowerOf2_32(SourceType.getSizeInBits());
   15761             : 
   15762             :   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
   15763             :   // turn into a single shuffle instruction.
   15764             :   if (!ValidTypes)
   15765      122976 :     return SDValue();
   15766             : 
   15767        2717 :   bool isLE = DAG.getDataLayout().isLittleEndian();
   15768        2717 :   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
   15769             :   assert(ElemRatio > 1 && "Invalid element size ratio");
   15770         982 :   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
   15771        2717 :                                DAG.getConstant(0, DL, SourceType);
   15772             : 
   15773        2717 :   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
   15774        2717 :   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
   15775             : 
   15776             :   // Populate the new build_vector
   15777       11469 :   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
   15778       17504 :     SDValue Cast = N->getOperand(i);
   15779             :     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
   15780             :             Cast.getOpcode() == ISD::ZERO_EXTEND ||
   15781             :             Cast.isUndef()) && "Invalid cast opcode");
   15782             :     SDValue In;
   15783        8752 :     if (Cast.isUndef())
   15784          74 :       In = DAG.getUNDEF(SourceType);
   15785             :     else
   15786        8678 :       In = Cast->getOperand(0);
   15787        8752 :     unsigned Index = isLE ? (i * ElemRatio) :
   15788          22 :                             (i * ElemRatio + (ElemRatio - 1));
   15789             : 
   15790             :     assert(Index < Ops.size() && "Invalid index");
   15791       17504 :     Ops[Index] = In;
   15792             :   }
   15793             : 
   15794             :   // The type of the new BUILD_VECTOR node.
   15795        2717 :   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
   15796             :   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
   15797             :          "Invalid vector size");
   15798             :   // Check if the new vector type is legal.
   15799        2717 :   if (!isTypeLegal(VecVT) ||
   15800             :       (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
   15801             :        TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
   15802        1304 :     return SDValue();
   15803             : 
   15804             :   // Make the new BUILD_VECTOR.
   15805        2826 :   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
   15806             : 
   15807             :   // The new BUILD_VECTOR node has the potential to be further optimized.
   15808        1413 :   AddToWorklist(BV.getNode());
   15809             :   // Bitcast to the desired type.
   15810        1413 :   return DAG.getBitcast(VT, BV);
   15811             : }
   15812             : 
   15813      696367 : SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
   15814      696367 :   EVT VT = N->getValueType(0);
   15815             : 
   15816      696367 :   unsigned NumInScalars = N->getNumOperands();
   15817             :   SDLoc DL(N);
   15818             : 
   15819             :   EVT SrcVT = MVT::Other;
   15820             :   unsigned Opcode = ISD::DELETED_NODE;
   15821             :   unsigned NumDefs = 0;
   15822             : 
   15823      703634 :   for (unsigned i = 0; i != NumInScalars; ++i) {
   15824     1406202 :     SDValue In = N->getOperand(i);
   15825             :     unsigned Opc = In.getOpcode();
   15826             : 
   15827      703101 :     if (Opc == ISD::UNDEF)
   15828             :       continue;
   15829             : 
   15830             :     // If all scalar values are floats and converted from integers.
   15831      697929 :     if (Opcode == ISD::DELETED_NODE &&
   15832      696367 :         (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
   15833             :       Opcode = Opc;
   15834             :     }
   15835             : 
   15836      697929 :     if (Opc != Opcode)
   15837      695834 :       return SDValue();
   15838             : 
   15839        2095 :     EVT InVT = In.getOperand(0).getValueType();
   15840             : 
   15841             :     // If all scalar values are typed differently, bail out. It's chosen to
   15842             :     // simplify BUILD_VECTOR of integer types.
   15843             :     if (SrcVT == MVT::Other)
   15844         543 :       SrcVT = InVT;
   15845           0 :     if (SrcVT != InVT)
   15846           0 :       return SDValue();
   15847        2095 :     NumDefs++;
   15848             :   }
   15849             : 
   15850             :   // If the vector has just one element defined, it's not worth to fold it into
   15851             :   // a vectorized one.
   15852         533 :   if (NumDefs < 2)
   15853           5 :     return SDValue();
   15854             : 
   15855             :   assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP)
   15856             :          && "Should only handle conversion from integer to float.");
   15857             :   assert(SrcVT != MVT::Other && "Cannot determine source type!");
   15858             : 
   15859         528 :   EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
   15860             : 
   15861         528 :   if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
   15862         510 :     return SDValue();
   15863             : 
   15864             :   // Just because the floating-point vector type is legal does not necessarily
   15865             :   // mean that the corresponding integer vector type is.
   15866          18 :   if (!isTypeLegal(NVT))
   15867           0 :     return SDValue();
   15868             : 
   15869             :   SmallVector<SDValue, 8> Opnds;
   15870          78 :   for (unsigned i = 0; i != NumInScalars; ++i) {
   15871         120 :     SDValue In = N->getOperand(i);
   15872             : 
   15873          60 :     if (In.isUndef())
   15874          12 :       Opnds.push_back(DAG.getUNDEF(SrcVT));
   15875             :     else
   15876          48 :       Opnds.push_back(In.getOperand(0));
   15877             :   }
   15878          36 :   SDValue BV = DAG.getBuildVector(NVT, DL, Opnds);
   15879          18 :   AddToWorklist(BV.getNode());
   15880             : 
   15881          36 :   return DAG.getNode(Opcode, DL, VT, BV);
   15882             : }
   15883             : 
   15884           0 : SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
   15885             :                                            ArrayRef<int> VectorMask,
   15886             :                                            SDValue VecIn1, SDValue VecIn2,
   15887             :                                            unsigned LeftIdx) {
   15888           0 :   MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
   15889           0 :   SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
   15890             : 
   15891           0 :   EVT VT = N->getValueType(0);
   15892           0 :   EVT InVT1 = VecIn1.getValueType();
   15893           0 :   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
   15894             : 
   15895             :   unsigned Vec2Offset = 0;
   15896             :   unsigned NumElems = VT.getVectorNumElements();
   15897             :   unsigned ShuffleNumElems = NumElems;
   15898             : 
   15899             :   // In case both the input vectors are extracted from same base
   15900             :   // vector we do not need extra addend (Vec2Offset) while
   15901             :   // computing shuffle mask.
   15902           0 :   if (!VecIn2 || !(VecIn1.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
   15903           0 :       !(VecIn2.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
   15904           0 :       !(VecIn1.getOperand(0) == VecIn2.getOperand(0)))
   15905             :     Vec2Offset = InVT1.getVectorNumElements();
   15906             : 
   15907             :   // We can't generate a shuffle node with mismatched input and output types.
   15908             :   // Try to make the types match the type of the output.
   15909           0 :   if (InVT1 != VT || InVT2 != VT) {
   15910           0 :     if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
   15911             :       // If the output vector length is a multiple of both input lengths,
   15912             :       // we can concatenate them and pad the rest with undefs.
   15913           0 :       unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
   15914             :       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
   15915           0 :       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
   15916           0 :       ConcatOps[0] = VecIn1;
   15917           0 :       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
   15918           0 :       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
   15919           0 :       VecIn2 = SDValue();
   15920           0 :     } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
   15921           0 :       if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
   15922           0 :         return SDValue();
   15923             : 
   15924           0 :       if (!VecIn2.getNode()) {
   15925             :         // If we only have one input vector, and it's twice the size of the
   15926             :         // output, split it in two.
   15927           0 :         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
   15928           0 :                              DAG.getConstant(NumElems, DL, IdxTy));
   15929           0 :         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
   15930             :         // Since we now have shorter input vectors, adjust the offset of the
   15931             :         // second vector's start.
   15932             :         Vec2Offset = NumElems;
   15933           0 :       } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
   15934             :         // VecIn1 is wider than the output, and we have another, possibly
   15935             :         // smaller input. Pad the smaller input with undefs, shuffle at the
   15936             :         // input vector width, and extract the output.
   15937             :         // The shuffle type is different than VT, so check legality again.
   15938           0 :         if (LegalOperations &&
   15939           0 :             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
   15940           0 :           return SDValue();
   15941             : 
   15942             :         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
   15943             :         // lower it back into a BUILD_VECTOR. So if the inserted type is
   15944             :         // illegal, don't even try.
   15945           0 :         if (InVT1 != InVT2) {
   15946           0 :           if (!TLI.isTypeLegal(InVT2))
   15947           0 :             return SDValue();
   15948           0 :           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
   15949           0 :                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
   15950             :         }
   15951           0 :         ShuffleNumElems = NumElems * 2;
   15952             :       } else {
   15953             :         // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
   15954             :         // than VecIn1. We can't handle this for now - this case will disappear
   15955             :         // when we start sorting the vectors by type.
   15956           0 :         return SDValue();
   15957             :       }
   15958           0 :     } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
   15959             :                InVT1.getSizeInBits() == VT.getSizeInBits()) {
   15960           0 :       SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
   15961           0 :       ConcatOps[0] = VecIn2;
   15962           0 :       VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
   15963             :     } else {
   15964             :       // TODO: Support cases where the length mismatch isn't exactly by a
   15965             :       // factor of 2.
   15966             :       // TODO: Move this check upwards, so that if we have bad type
   15967             :       // mismatches, we don't create any DAG nodes.
   15968           0 :       return SDValue();
   15969             :     }
   15970             :   }
   15971             : 
   15972             :   // Initialize mask to undef.
   15973           0 :   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
   15974             : 
   15975             :   // Only need to run up to the number of elements actually used, not the
   15976             :   // total number of elements in the shuffle - if we are shuffling a wider
   15977             :   // vector, the high lanes should be set to undef.
   15978           0 :   for (unsigned i = 0; i != NumElems; ++i) {
   15979           0 :     if (VectorMask[i] <= 0)
   15980           0 :       continue;
   15981             : 
   15982           0 :     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
   15983           0 :     if (VectorMask[i] == (int)LeftIdx) {
   15984           0 :       Mask[i] = ExtIndex;
   15985           0 :     } else if (VectorMask[i] == (int)LeftIdx + 1) {
   15986           0 :       Mask[i] = Vec2Offset + ExtIndex;
   15987             :     }
   15988             :   }
   15989             : 
   15990             :   // The type the input vectors may have changed above.
   15991           0 :   InVT1 = VecIn1.getValueType();
   15992             : 
   15993             :   // If we already have a VecIn2, it should have the same type as VecIn1.
   15994             :   // If we don't, get an undef/zero vector of the appropriate type.
   15995           0 :   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
   15996             :   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
   15997             : 
   15998           0 :   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
   15999           0 :   if (ShuffleNumElems > NumElems)
   16000           0 :     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
   16001             : 
   16002           0 :   return Shuffle;
   16003             : }
   16004             : 
   16005             : // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
   16006             : // operations. If the types of the vectors we're extracting from allow it,
   16007             : // turn this into a vector_shuffle node.
   16008      696349 : SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
   16009             :   SDLoc DL(N);
   16010      696349 :   EVT VT = N->getValueType(0);
   16011             : 
   16012             :   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
   16013      696349 :   if (!isTypeLegal(VT))
   16014           0 :     return SDValue();
   16015             : 
   16016             :   // May only combine to shuffle after legalize if shuffle is legal.
   16017      696349 :   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
   16018      369950 :     return SDValue();
   16019             : 
   16020             :   bool UsesZeroVector = false;
   16021      326399 :   unsigned NumElems = N->getNumOperands();
   16022             : 
   16023             :   // Record, for each element of the newly built vector, which input vector
   16024             :   // that element comes from. -1 stands for undef, 0 for the zero vector,
   16025             :   // and positive values for the input vectors.
   16026             :   // VectorMask maps each element to its vector number, and VecIn maps vector
   16027             :   // numbers to their initial SDValues.
   16028             : 
   16029      326399 :   SmallVector<int, 8> VectorMask(NumElems, -1);
   16030             :   SmallVector<SDValue, 8> VecIn;
   16031      326399 :   VecIn.push_back(SDValue());
   16032             : 
   16033     2040534 :   for (unsigned i = 0; i != NumElems; ++i) {
   16034     3845394 :     SDValue Op = N->getOperand(i);
   16035             : 
   16036     1922697 :     if (Op.isUndef())
   16037     1685484 :       continue;
   16038             : 
   16039             :     // See if we can use a blend with a zero vector.
   16040             :     // TODO: Should we generalize this to a blend with an arbitrary constant
   16041             :     // vector?
   16042     1888797 :     if (isNullConstant(Op) || isNullFPConstant(Op)) {
   16043             :       UsesZeroVector = true;
   16044     1651584 :       VectorMask[i] = 0;
   16045     1651584 :       continue;
   16046             :     }
   16047             : 
   16048             :     // Not an undef or zero. If the input is something other than an
   16049             :     // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
   16050      237213 :     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
   16051             :         !isa<ConstantSDNode>(Op.getOperand(1)))
   16052      208457 :       return SDValue();
   16053       28756 :     SDValue ExtractedFromVec = Op.getOperand(0);
   16054             : 
   16055       28756 :     APInt ExtractIdx = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue();
   16056      115024 :     if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
   16057           2 :       return SDValue();
   16058             : 
   16059             :     // All inputs must have the same element type as the output.
   16060       28754 :     if (VT.getVectorElementType() !=
   16061       57508 :         ExtractedFromVec.getValueType().getVectorElementType())
   16062         103 :       return SDValue();
   16063             : 
   16064             :     // Have we seen this input vector before?
   16065             :     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
   16066             :     // a map back from SDValues to numbers isn't worth it.
   16067             :     unsigned Idx = std::distance(
   16068             :         VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
   16069       28651 :     if (Idx == VecIn.size())
   16070        7880 :       VecIn.push_back(ExtractedFromVec);
   16071             : 
   16072       28651 :     VectorMask[i] = Idx;
   16073             :   }
   16074             : 
   16075             :   // If we didn't find at least one input vector, bail out.
   16076      235674 :   if (VecIn.size() < 2)
   16077      111226 :     return SDValue();
   16078             : 
   16079             :   // If all the Operands of BUILD_VECTOR extract from same
   16080             :   // vector, then split the vector efficiently based on the maximum
   16081             :   // vector access index and adjust the VectorMask and
   16082             :   // VecIn accordingly.
   16083        6611 :   if (VecIn.size() == 2) {
   16084        5817 :     unsigned MaxIndex = 0;
   16085             :     unsigned NearestPow2 = 0;
   16086        5817 :     SDValue Vec = VecIn.back();
   16087        5817 :     EVT InVT = Vec.getValueType();
   16088        5817 :     MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
   16089        5817 :     SmallVector<unsigned, 8> IndexVec(NumElems, 0);
   16090             : 
   16091       56436 :     for (unsigned i = 0; i < NumElems; i++) {
   16092      101238 :       if (VectorMask[i] <= 0)
   16093       27627 :         continue;
   16094       68976 :       unsigned Index = N->getOperand(i).getConstantOperandVal(1);
   16095       22992 :       IndexVec[i] = Index;
   16096       22992 :       MaxIndex = std::max(MaxIndex, Index);
   16097             :     }
   16098             : 
   16099        5817 :     NearestPow2 = PowerOf2Ceil(MaxIndex);
   16100        5817 :     if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
   16101        2981 :         NumElems * 2 < NearestPow2) {
   16102         462 :       unsigned SplitSize = NearestPow2 / 2;
   16103         462 :       EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
   16104         462 :                                      InVT.getVectorElementType(), SplitSize);
   16105         462 :       if (TLI.isTypeLegal(SplitVT)) {
   16106         324 :         SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
   16107         324 :                                      DAG.getConstant(SplitSize, DL, IdxTy));
   16108         324 :         SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
   16109         324 :                                      DAG.getConstant(0, DL, IdxTy));
   16110             :         VecIn.pop_back();
   16111         324 :         VecIn.push_back(VecIn1);
   16112         324 :         VecIn.push_back(VecIn2);
   16113             : 
   16114        2850 :         for (unsigned i = 0; i < NumElems; i++) {
   16115        5052 :           if (VectorMask[i] <= 0)
   16116             :             continue;
   16117        3538 :           VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
   16118             :         }
   16119             :       }
   16120             :     }
   16121             :   }
   16122             : 
   16123             :   // TODO: We want to sort the vectors by descending length, so that adjacent
   16124             :   // pairs have similar length, and the longer vector is always first in the
   16125             :   // pair.
   16126             : 
   16127             :   // TODO: Should this fire if some of the input vectors has illegal type (like
   16128             :   // it does now), or should we let legalization run its course first?
   16129             : 
   16130             :   // Shuffle phase:
   16131             :   // Take pairs of vectors, and shuffle them so that the result has elements
   16132             :   // from these vectors in the correct places.
   16133             :   // For example, given:
   16134             :   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
   16135             :   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
   16136             :   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
   16137             :   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
   16138             :   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
   16139             :   // We will generate:
   16140             :   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
   16141             :   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
   16142             :   SmallVector<SDValue, 4> Shuffles;
   16143       19172 :   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
   16144        6782 :     unsigned LeftIdx = 2 * In + 1;
   16145        6782 :     SDValue VecLeft = VecIn[LeftIdx];
   16146             :     SDValue VecRight =
   16147        6782 :         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
   16148             : 
   16149        6782 :     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
   16150        6782 :                                                 VecRight, LeftIdx))
   16151        5950 :       Shuffles.push_back(Shuffle);
   16152             :     else
   16153         832 :       return SDValue();
   16154             :   }
   16155             : 
   16156             :   // If we need the zero vector as an "ingredient" in the blend tree, add it
   16157             :   // to the list of shuffles.
   16158        5779 :   if (UsesZeroVector)
   16159         737 :     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
   16160         158 :                                       : DAG.getConstantFP(0.0, DL, VT));
   16161             : 
   16162             :   // If we only have one shuffle, we're done.
   16163        5779 :   if (Shuffles.size() == 1)
   16164        5061 :     return Shuffles[0];
   16165             : 
   16166             :   // Update the vector mask to point to the post-shuffle vectors.
   16167        6310 :   for (int &Vec : VectorMask)
   16168        5592 :     if (Vec == 0)
   16169        2210 :       Vec = Shuffles.size() - 1;
   16170             :     else
   16171        3382 :       Vec = (Vec - 1) / 2;
   16172             : 
   16173             :   // More than one shuffle. Generate a binary tree of blends, e.g. if from
   16174             :   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
   16175             :   // generate:
   16176             :   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
   16177             :   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
   16178             :   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
   16179             :   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
   16180             :   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
   16181             :   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
   16182             :   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
   16183             : 
   16184             :   // Make sure the initial size of the shuffle list is even.
   16185         718 :   if (Shuffles.size() % 2)
   16186           2 :     Shuffles.push_back(DAG.getUNDEF(VT));
   16187             : 
   16188        1451 :   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
   16189         733 :     if (CurSize % 2) {
   16190           0 :       Shuffles[CurSize] = DAG.getUNDEF(VT);
   16191           0 :       CurSize++;
   16192             :     }
   16193        1485 :     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
   16194         752 :       int Left = 2 * In;
   16195         752 :       int Right = 2 * In + 1;
   16196         752 :       SmallVector<int, 8> Mask(NumElems, -1);
   16197        6872 :       for (unsigned i = 0; i != NumElems; ++i) {
   16198       12240 :         if (VectorMask[i] == Left) {
   16199        2792 :           Mask[i] = i;
   16200        2792 :           VectorMask[i] = In;
   16201        3328 :         } else if (VectorMask[i] == Right) {
   16202        2912 :           Mask[i] = i + NumElems;
   16203        2912 :           VectorMask[i] = In;
   16204             :         }
   16205             :       }
   16206             : 
   16207         752 :       Shuffles[In] =
   16208        3008 :           DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
   16209             :     }
   16210             :   }
   16211         718 :   return Shuffles[0];
   16212             : }
   16213             : 
   16214             : // Try to turn a build vector of zero extends of extract vector elts into a
   16215             : // a vector zero extend and possibly an extract subvector.
   16216             : // TODO: Support sign extend or any extend?
   16217             : // TODO: Allow undef elements?
   16218             : // TODO: Don't require the extracts to start at element 0.
   16219      697793 : SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
   16220      697793 :   if (LegalOperations)
   16221      370717 :     return SDValue();
   16222             : 
   16223      327076 :   EVT VT = N->getValueType(0);
   16224             : 
   16225      327076 :   SDValue Op0 = N->getOperand(0);
   16226             :   auto checkElem = [&](SDValue Op) -> int64_t {
   16227             :     if (Op.getOpcode() == ISD::ZERO_EXTEND &&
   16228             :         Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
   16229             :         Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
   16230             :       if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
   16231             :         return C->getZExtValue();
   16232             :     return -1;
   16233      327076 :   };
   16234             : 
   16235             :   // Make sure the first element matches
   16236             :   // (zext (extract_vector_elt X, C))
   16237      327076 :   int64_t Offset = checkElem(Op0);
   16238      327076 :   if (Offset < 0)
   16239      327057 :     return SDValue();
   16240             : 
   16241          19 :   unsigned NumElems = N->getNumOperands();
   16242          38 :   SDValue In = Op0.getOperand(0).getOperand(0);
   16243          19 :   EVT InSVT = In.getValueType().getScalarType();
   16244          19 :   EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
   16245             : 
   16246             :   // Don't create an illegal input type after type legalization.
   16247          19 :   if (LegalTypes && !TLI.isTypeLegal(InVT))
   16248           0 :     return SDValue();
   16249             : 
   16250             :   // Ensure all the elements come from the same vector and are adjacent.
   16251          53 :   for (unsigned i = 1; i != NumElems; ++i) {
   16252          80 :     if ((Offset + i) != checkElem(N->getOperand(i)))
   16253           6 :       return SDValue();
   16254             :   }
   16255             : 
   16256             :   SDLoc DL(N);
   16257          13 :   In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
   16258          26 :                    Op0.getOperand(0).getOperand(1));
   16259          26 :   return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, In);
   16260             : }
   16261             : 
   16262      698113 : SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
   16263      698113 :   EVT VT = N->getValueType(0);
   16264             : 
   16265             :   // A vector built entirely of undefs is undef.
   16266      698113 :   if (ISD::allOperandsUndef(N))
   16267         154 :     return DAG.getUNDEF(VT);
   16268             : 
   16269             :   // If this is a splat of a bitcast from another vector, change to a
   16270             :   // concat_vector.
   16271             :   // For example:
   16272             :   //   (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
   16273             :   //     (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
   16274             :   //
   16275             :   // If X is a build_vector itself, the concat can become a larger build_vector.
   16276             :   // TODO: Maybe this is useful for non-splat too?
   16277      697959 :   if (!LegalOperations) {
   16278      327242 :     if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
   16279      253623 :       Splat = peekThroughBitcasts(Splat);
   16280      507246 :       EVT SrcVT = Splat.getValueType();
   16281      253623 :       if (SrcVT.isVector()) {
   16282           9 :         unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
   16283           9 :         EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
   16284           9 :                                      SrcVT.getVectorElementType(), NumElts);
   16285           9 :         if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
   16286           8 :           SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
   16287           8 :           SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
   16288           8 :                                        NewVT, Ops);
   16289           8 :           return DAG.getBitcast(VT, Concat);
   16290             :         }
   16291             :       }
   16292             :     }
   16293             :   }
   16294             : 
   16295             :   // Check if we can express BUILD VECTOR via subvector extract.
   16296      697951 :   if (!LegalTypes && (N->getNumOperands() > 1)) {
   16297      230896 :     SDValue Op0 = N->getOperand(0);
   16298             :     auto checkElem = [&](SDValue Op) -> uint64_t {
   16299             :       if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
   16300             :           (Op0.getOperand(0) == Op.getOperand(0)))
   16301             :         if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
   16302             :           return CNode->getZExtValue();
   16303             :       return -1;
   16304      230896 :     };
   16305             : 
   16306      230896 :     int Offset = checkElem(Op0);
   16307      234797 :     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
   16308      469254 :       if (Offset + i != checkElem(N->getOperand(i))) {
   16309             :         Offset = -1;
   16310             :         break;
   16311             :       }
   16312             :     }
   16313             : 
   16314      230896 :     if ((Offset == 0) &&
   16315         274 :         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
   16316         158 :       return Op0.getOperand(0);
   16317      230805 :     if ((Offset != -1) &&
   16318      230963 :         ((Offset % N->getValueType(0).getVectorNumElements()) ==
   16319             :          0)) // IDX must be multiple of output size.
   16320          67 :       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
   16321         134 :                          Op0.getOperand(0), Op0.getOperand(1));
   16322             :   }
   16323             : 
   16324      697793 :   if (SDValue V = convertBuildVecZextToZext(N))
   16325          13 :     return V;
   16326             : 
   16327      697780 :   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
   16328        1413 :     return V;
   16329             : 
   16330      696367 :   if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
   16331          18 :     return V;
   16332             : 
   16333      696349 :   if (SDValue V = reduceBuildVecToShuffle(N))
   16334        5779 :     return V;
   16335             : 
   16336      690570 :   return SDValue();
   16337             : }
   16338             : 
   16339       26693 : static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
   16340       26693 :   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   16341       53386 :   EVT OpVT = N->getOperand(0).getValueType();
   16342             : 
   16343             :   // If the operands are legal vectors, leave them alone.
   16344             :   if (TLI.isTypeLegal(OpVT))
   16345       22718 :     return SDValue();
   16346             : 
   16347             :   SDLoc DL(N);
   16348        7950 :   EVT VT = N->getValueType(0);
   16349             :   SmallVector<SDValue, 8> Ops;
   16350             : 
   16351        3975 :   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
   16352        3975 :   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
   16353             : 
   16354             :   // Keep track of what we encounter.
   16355             :   bool AnyInteger = false;
   16356             :   bool AnyFP = false;
   16357        4055 :   for (const SDValue &Op : N->ops()) {
   16358        8256 :     if (ISD::BITCAST == Op.getOpcode() &&
   16359         462 :         !Op.getOperand(0).getValueType().isVector())
   16360          66 :       Ops.push_back(Op.getOperand(0));
   16361        3963 :     else if (ISD::UNDEF == Op.getOpcode())
   16362          17 :       Ops.push_back(ScalarUndef);
   16363             :     else
   16364        3946 :       return SDValue();
   16365             : 
   16366             :     // Note whether we encounter an integer or floating point scalar.
   16367             :     // If it's neither, bail out, it could be something weird like x86mmx.
   16368          83 :     EVT LastOpVT = Ops.back().getValueType();
   16369          83 :     if (LastOpVT.isFloatingPoint())
   16370             :       AnyFP = true;
   16371          74 :     else if (LastOpVT.isInteger())
   16372             :       AnyInteger = true;
   16373             :     else
   16374           3 :       return SDValue();
   16375             :   }
   16376             : 
   16377             :   // If any of the operands is a floating point scalar bitcast to a vector,
   16378             :   // use floating point types throughout, and bitcast everything.
   16379             :   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
   16380          26 :   if (AnyFP) {
   16381           3 :     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
   16382           3 :     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
   16383           3 :     if (AnyInteger) {
   16384          10 :       for (SDValue &Op : Ops) {
   16385          16 :         if (Op.getValueType() == SVT)
   16386           5 :           continue;
   16387           3 :         if (Op.isUndef())
   16388           1 :           Op = ScalarUndef;
   16389             :         else
   16390           2 :           Op = DAG.getBitcast(SVT, Op);
   16391             :       }
   16392             :     }
   16393             :   }
   16394             : 
   16395          26 :   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
   16396          26 :                                VT.getSizeInBits() / SVT.getSizeInBits());
   16397          26 :   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
   16398             : }
   16399             : 
   16400             : // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
   16401             : // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
   16402             : // most two distinct vectors the same size as the result, attempt to turn this
   16403             : // into a legal shuffle.
   16404       12000 : static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
   16405       12000 :   EVT VT = N->getValueType(0);
   16406       24000 :   EVT OpVT = N->getOperand(0).getValueType();
   16407       12000 :   int NumElts = VT.getVectorNumElements();
   16408       12000 :   int NumOpElts = OpVT.getVectorNumElements();
   16409             : 
   16410       12000 :   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
   16411             :   SmallVector<int, 8> Mask;
   16412             : 
   16413       16285 :   for (SDValue Op : N->ops()) {
   16414       14900 :     Op = peekThroughBitcasts(Op);
   16415             : 
   16416             :     // UNDEF nodes convert to UNDEF shuffle mask values.
   16417       14900 :     if (Op.isUndef()) {
   16418        2775 :       Mask.append((unsigned)NumOpElts, -1);
   16419        2775 :       continue;
   16420             :     }
   16421             : 
   16422       12125 :     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
   16423       10144 :       return SDValue();
   16424             : 
   16425             :     // What vector are we extracting the subvector from and at what index?
   16426        1981 :     SDValue ExtVec = Op.getOperand(0);
   16427             : 
   16428             :     // We want the EVT of the original extraction to correctly scale the
   16429             :     // extraction index.
   16430        1981 :     EVT ExtVT = ExtVec.getValueType();
   16431        1981 :     ExtVec = peekThroughBitcasts(ExtVec);
   16432             : 
   16433             :     // UNDEF nodes convert to UNDEF shuffle mask values.
   16434        1981 :     if (ExtVec.isUndef()) {
   16435           0 :       Mask.append((unsigned)NumOpElts, -1);
   16436           0 :       continue;
   16437             :     }
   16438             : 
   16439             :     if (!isa<ConstantSDNode>(Op.getOperand(1)))
   16440           0 :       return SDValue();
   16441        1981 :     int ExtIdx = Op.getConstantOperandVal(1);
   16442             : 
   16443             :     // Ensure that we are extracting a subvector from a vector the same
   16444             :     // size as the result.
   16445        1981 :     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
   16446         471 :       return SDValue();
   16447             : 
   16448             :     // Scale the subvector index to account for any bitcast.
   16449        1510 :     int NumExtElts = ExtVT.getVectorNumElements();
   16450        1510 :     if (0 == (NumExtElts % NumElts))
   16451        1489 :       ExtIdx /= (NumExtElts / NumElts);
   16452          21 :     else if (0 == (NumElts % NumExtElts))
   16453          21 :       ExtIdx *= (NumElts / NumExtElts);
   16454             :     else
   16455           0 :       return SDValue();
   16456             : 
   16457             :     // At most we can reference 2 inputs in the final shuffle.
   16458        1510 :     if (SV0.isUndef() || SV0 == ExtVec) {
   16459        1428 :       SV0 = ExtVec;
   16460        7083 :       for (int i = 0; i != NumOpElts; ++i)
   16461        5655 :         Mask.push_back(i + ExtIdx);
   16462          82 :     } else if (SV1.isUndef() || SV1 == ExtVec) {
   16463          82 :       SV1 = ExtVec;
   16464         619 :       for (int i = 0; i != NumOpElts; ++i)
   16465         537 :         Mask.push_back(i + ExtIdx + NumElts);
   16466             :     } else {
   16467           0 :       return SDValue();
   16468             :     }
   16469             :   }
   16470             : 
   16471        2770 :   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))
   16472          14 :     return SDValue();
   16473             : 
   16474        1371 :   return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
   16475        4113 :                               DAG.getBitcast(VT, SV1), Mask);
   16476             : }
   16477             : 
   16478       30650 : SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
   16479             :   // If we only have one input vector, we don't need to do any concatenation.
   16480       30650 :   if (N->getNumOperands() == 1)
   16481           0 :     return N->getOperand(0);
   16482             : 
   16483             :   // Check if all of the operands are undefs.
   16484       30650 :   EVT VT = N->getValueType(0);
   16485       30650 :   if (ISD::allOperandsUndef(N))
   16486           2 :     return DAG.getUNDEF(VT);
   16487             : 
   16488             :   // Optimize concat_vectors where all but the first of the vectors are undef.
   16489       61296 :   if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
   16490           0 :         return Op.isUndef();
   16491             :       })) {
   16492        7728 :     SDValue In = N->getOperand(0);
   16493             :     assert(In.getValueType().isVector() && "Must concat vectors");
   16494             : 
   16495             :     // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr).
   16496        8184 :     if (In->getOpcode() == ISD::BITCAST &&
   16497        8543 :         !In->getOperand(0).getValueType().isVector()) {
   16498          97 :       SDValue Scalar = In->getOperand(0);
   16499             : 
   16500             :       // If the bitcast type isn't legal, it might be a trunc of a legal type;
   16501             :       // look through the trunc so we can still do the transform:
   16502             :       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
   16503          97 :       if (Scalar->getOpcode() == ISD::TRUNCATE &&
   16504          97 :           !TLI.isTypeLegal(Scalar.getValueType()) &&
   16505           4 :           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
   16506           4 :         Scalar = Scalar->getOperand(0);
   16507             : 
   16508          97 :       EVT SclTy = Scalar->getValueType(0);
   16509             : 
   16510          97 :       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
   16511          17 :         return SDValue();
   16512             : 
   16513             :       // Bail out if the vector size is not a multiple of the scalar size.
   16514          80 :       if (VT.getSizeInBits() % SclTy.getSizeInBits())
   16515           3 :         return SDValue();
   16516             : 
   16517          77 :       unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
   16518          77 :       if (VNTNumElms < 2)
   16519           0 :         return SDValue();
   16520             : 
   16521          77 :       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
   16522          77 :       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
   16523           5 :         return SDValue();
   16524             : 
   16525          72 :       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
   16526          72 :       return DAG.getBitcast(VT, Res);
   16527             :     }
   16528             :   }
   16529             : 
   16530             :   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
   16531             :   // We have already tested above for an UNDEF only concatenation.
   16532             :   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
   16533             :   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
   16534             :   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
   16535           0 :     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
   16536             :   };
   16537       30551 :   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
   16538             :     SmallVector<SDValue, 8> Opnds;
   16539        3858 :     EVT SVT = VT.getScalarType();
   16540             : 
   16541        3858 :     EVT MinVT = SVT;
   16542        3858 :     if (!SVT.isFloatingPoint()) {
   16543             :       // If BUILD_VECTOR are from built from integer, they may have different
   16544             :       // operand types. Get the smallest type and truncate all operands to it.
   16545             :       bool FoundMinVT = false;
   16546       12226 :       for (const SDValue &Op : N->ops())
   16547       17408 :         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
   16548        6747 :           EVT OpSVT = Op.getOperand(0).getValueType();
   16549        9972 :           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
   16550             :           FoundMinVT = true;
   16551             :         }
   16552             :       assert(FoundMinVT && "Concat vector type mismatch");
   16553             :     }
   16554             : 
   16555       13414 :     for (const SDValue &Op : N->ops()) {
   16556       19112 :       EVT OpVT = Op.getValueType();
   16557             :       unsigned NumElts = OpVT.getVectorNumElements();
   16558             : 
   16559        9556 :       if (ISD::UNDEF == Op.getOpcode())
   16560        2243 :         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
   16561             : 
   16562       19112 :       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
   16563        7313 :         if (SVT.isFloatingPoint()) {
   16564             :           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
   16565         566 :           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
   16566             :         } else {
   16567       29584 :           for (unsigned i = 0; i != NumElts; ++i)
   16568       22837 :             Opnds.push_back(
   16569       68511 :                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
   16570             :         }
   16571             :       }
   16572             :     }
   16573             : 
   16574             :     assert(VT.getVectorNumElements() == Opnds.size() &&
   16575             :            "Concat vector type mismatch");
   16576        7716 :     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
   16577             :   }
   16578             : 
   16579             :   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
   16580       26693 :   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
   16581          26 :     return V;
   16582             : 
   16583             :   // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
   16584       26667 :   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
   16585       12000 :     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
   16586        1371 :       return V;
   16587             : 
   16588             :   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
   16589             :   // nodes often generate nop CONCAT_VECTOR nodes.
   16590             :   // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
   16591             :   // place the incoming vectors at the exact same location.
   16592             :   SDValue SingleSource = SDValue();
   16593       50592 :   unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
   16594             : 
   16595       25616 :   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
   16596       25589 :     SDValue Op = N->getOperand(i);
   16597             : 
   16598       25589 :     if (Op.isUndef())
   16599             :       continue;
   16600             : 
   16601             :     // Check if this is the identity extract:
   16602       25484 :     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
   16603       24554 :       return SDValue();
   16604             : 
   16605             :     // Find the single incoming vector for the extract_subvector.
   16606         930 :     if (SingleSource.getNode()) {
   16607             :       if (Op.getOperand(0) != SingleSource)
   16608         140 :         return SDValue();
   16609             :     } else {
   16610         777 :       SingleSource = Op.getOperand(0);
   16611             : 
   16612             :       // Check the source type is the same as the type of the result.
   16613             :       // If not, this concat may extend the vector, so we can not
   16614             :       // optimize it away.
   16615         777 :       if (SingleSource.getValueType() != N->getValueType(0))
   16616         573 :         return SDValue();
   16617             :     }
   16618             : 
   16619         217 :     unsigned IdentityIndex = i * PartNumElem;
   16620             :     ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
   16621             :     // The extract index must be constant.
   16622             :     if (!CS)
   16623           0 :       return SDValue();
   16624             : 
   16625             :     // Check that we are reading from the identity index.
   16626         434 :     if (CS->getZExtValue() != IdentityIndex)
   16627           2 :       return SDValue();
   16628             :   }
   16629             : 
   16630          27 :   if (SingleSource.getNode())
   16631          27 :     return SingleSource;
   16632             : 
   16633           0 :   return SDValue();
   16634             : }
   16635             : 
   16636             : /// If we are extracting a subvector produced by a wide binary operator with at
   16637             : /// at least one operand that was the result of a vector concatenation, then try
   16638             : /// to use the narrow vector operands directly to avoid the concatenation and
   16639             : /// extraction.
   16640       67764 : static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
   16641             :   // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
   16642             :   // some of these bailouts with other transforms.
   16643             : 
   16644             :   // The extract index must be a constant, so we can map it to a concat operand.
   16645       67764 :   auto *ExtractIndex = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
   16646             :   if (!ExtractIndex)
   16647           0 :     return SDValue();
   16648             : 
   16649             :   // Only handle the case where we are doubling and then halving. A larger ratio
   16650             :   // may require more than two narrow binops to replace the wide binop.
   16651      135528 :   EVT VT = Extract->getValueType(0);
   16652             :   unsigned NumElems = VT.getVectorNumElements();
   16653             :   assert((ExtractIndex->getZExtValue() % NumElems) == 0 &&
   16654             :          "Extract index is not a multiple of the vector length.");
   16655       67764 :   if (Extract->getOperand(0).getValueSizeInBits() != VT.getSizeInBits() * 2)
   16656        9117 :     return SDValue();
   16657             : 
   16658             :   // We are looking for an optionally bitcasted wide vector binary operator
   16659             :   // feeding an extract subvector.
   16660      117294 :   SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
   16661             : 
   16662             :   // TODO: The motivating case for this transform is an x86 AVX1 target. That
   16663             :   // target has temptingly almost legal versions of bitwise logic ops in 256-bit
   16664             :   // flavors, but no other 256-bit integer support. This could be extended to
   16665             :   // handle any binop, but that may require fixing/adding other folds to avoid
   16666             :   // codegen regressions.
   16667             :   unsigned BOpcode = BinOp.getOpcode();
   16668       58647 :   if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
   16669       56085 :     return SDValue();
   16670             : 
   16671             :   // The binop must be a vector type, so we can chop it in half.
   16672        5124 :   EVT WideBVT = BinOp.getValueType();
   16673        2562 :   if (!WideBVT.isVector())
   16674          26 :     return SDValue();
   16675             : 
   16676             :   // Bail out if the target does not support a narrower version of the binop.
   16677        2536 :   EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
   16678        2536 :                                    WideBVT.getVectorNumElements() / 2);
   16679        2536 :   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   16680        2536 :   if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
   16681          14 :     return SDValue();
   16682             : 
   16683        2522 :   SDValue LHS = peekThroughBitcasts(BinOp.getOperand(0));
   16684        2522 :   SDValue RHS = peekThroughBitcasts(BinOp.getOperand(1));
   16685             : 
   16686             :   // We need at least one concatenation operation of a binop operand to make
   16687             :   // this transform worthwhile. The concat must double the input vector sizes.
   16688             :   // TODO: Should we also handle INSERT_SUBVECTOR patterns?
   16689             :   bool ConcatL =
   16690        2522 :       LHS.getOpcode() == ISD::CONCAT_VECTORS && LHS.getNumOperands() == 2;
   16691             :   bool ConcatR =
   16692        2522 :       RHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getNumOperands() == 2;
   16693        2522 :   if (!ConcatL && !ConcatR)
   16694        2377 :     return SDValue();
   16695             : 
   16696             :   // If one of the binop operands was not the result of a concat, we must
   16697             :   // extract a half-sized operand for our new narrow binop. We can't just reuse
   16698             :   // the original extract index operand because we may have bitcasted.
   16699         290 :   unsigned ConcatOpNum = ExtractIndex->getZExtValue() / NumElems;
   16700         145 :   unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
   16701         290 :   EVT ExtBOIdxVT = Extract->getOperand(1).getValueType();
   16702             :   SDLoc DL(Extract);
   16703             : 
   16704             :   // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
   16705             :   // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, N)
   16706             :   // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, N), YN
   16707         103 :   SDValue X = ConcatL ? DAG.getBitcast(NarrowBVT, LHS.getOperand(ConcatOpNum))
   16708             :                       : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
   16709             :                                     BinOp.getOperand(0),
   16710         145 :                                     DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
   16711             : 
   16712          74 :   SDValue Y = ConcatR ? DAG.getBitcast(NarrowBVT, RHS.getOperand(ConcatOpNum))
   16713             :                       : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
   16714             :                                     BinOp.getOperand(1),
   16715         219 :                                     DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
   16716             : 
   16717         145 :   SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
   16718         145 :   return DAG.getBitcast(VT, NarrowBinOp);
   16719             : }
   16720             : 
   16721             : /// If we are extracting a subvector from a wide vector load, convert to a
   16722             : /// narrow load to eliminate the extraction:
   16723             : /// (extract_subvector (load wide vector)) --> (load narrow vector)
   16724       70710 : static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
   16725             :   // TODO: Add support for big-endian. The offset calculation must be adjusted.
   16726       70710 :   if (DAG.getDataLayout().isBigEndian())
   16727         525 :     return SDValue();
   16728             : 
   16729             :   // TODO: The one-use check is overly conservative. Check the cost of the
   16730             :   // extract instead or remove that condition entirely.
   16731       70185 :   auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
   16732             :   auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
   16733       70406 :   if (!Ld || !Ld->hasOneUse() || Ld->getExtensionType() || Ld->isVolatile() ||
   16734             :       !ExtIdx)
   16735       69975 :     return SDValue();
   16736             : 
   16737             :   // The narrow load will be offset from the base address of the old load if
   16738             :   // we are extracting from something besides index 0 (little-endian).
   16739         420 :   EVT VT = Extract->getValueType(0);
   16740             :   SDLoc DL(Extract);
   16741         210 :   SDValue BaseAddr = Ld->getOperand(1);
   16742         420 :   unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
   16743             : 
   16744             :   // TODO: Use "BaseIndexOffset" to make this more effective.
   16745         210 :   SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
   16746         210 :   MachineFunction &MF = DAG.getMachineFunction();
   16747         210 :   MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
   16748             :                                                    VT.getStoreSize());
   16749         210 :   SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
   16750         210 :   DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
   16751         210 :   return NewLd;
   16752             : }
   16753             : 
   16754       74221 : SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
   16755       74221 :   EVT NVT = N->getValueType(0);
   16756       74221 :   SDValue V = N->getOperand(0);
   16757             : 
   16758             :   // Extract from UNDEF is UNDEF.
   16759       74221 :   if (V.isUndef())
   16760           3 :     return DAG.getUNDEF(NVT);
   16761             : 
   16762       74218 :   if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
   16763       70710 :     if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
   16764         210 :       return NarrowLoad;
   16765             : 
   16766             :   // Combine:
   16767             :   //    (extract_subvec (concat V1, V2, ...), i)
   16768             :   // Into:
   16769             :   //    Vi if possible
   16770             :   // Only operand 0 is checked as 'concat' assumes all inputs of the same
   16771             :   // type.
   16772       74008 :   if (V->getOpcode() == ISD::CONCAT_VECTORS &&
   16773       74008 :       isa<ConstantSDNode>(N->getOperand(1)) &&
   16774        5654 :       V->getOperand(0).getValueType() == NVT) {
   16775        1846 :     unsigned Idx = N->getConstantOperandVal(1);
   16776             :     unsigned NumElems = NVT.getVectorNumElements();
   16777             :     assert((Idx % NumElems) == 0 &&
   16778             :            "IDX in concat is not a multiple of the result vector length.");
   16779        3692 :     return V->getOperand(Idx / NumElems);
   16780             :   }
   16781             : 
   16782       72162 :   V = peekThroughBitcasts(V);
   16783             : 
   16784             :   // If the input is a build vector. Try to make a smaller build vector.
   16785       72162 :   if (V->getOpcode() == ISD::BUILD_VECTOR) {
   16786        1892 :     if (auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
   16787        1892 :       EVT InVT = V->getValueType(0);
   16788        1892 :       unsigned ExtractSize = NVT.getSizeInBits();
   16789             :       unsigned EltSize = InVT.getScalarSizeInBits();
   16790             :       // Only do this if we won't split any elements.
   16791        1892 :       if (ExtractSize % EltSize == 0) {
   16792        1891 :         unsigned NumElems = ExtractSize / EltSize;
   16793        1891 :         EVT EltVT = InVT.getVectorElementType();
   16794             :         EVT ExtractVT = NumElems == 1 ? EltVT :
   16795        1891 :           EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
   16796         120 :         if ((Level < AfterLegalizeDAG ||
   16797             :              (NumElems == 1 ||
   16798        1894 :               TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
   16799        1888 :             (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
   16800        1888 :           unsigned IdxVal = (Idx->getZExtValue() * NVT.getScalarSizeInBits()) /
   16801             :                             EltSize;
   16802        1888 :           if (NumElems == 1) {
   16803         268 :             SDValue Src = V->getOperand(IdxVal);
   16804           0 :             if (EltVT != Src.getValueType())
   16805           0 :               Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
   16806             : 
   16807         134 :             return DAG.getBitcast(NVT, Src);
   16808             :           }
   16809             : 
   16810             :           // Extract the pieces from the original build_vector.
   16811        3508 :           SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
   16812        1754 :                                             makeArrayRef(V->op_begin() + IdxVal,
   16813        3508 :                                                          NumElems));
   16814        1754 :           return DAG.getBitcast(NVT, BuildVec);
   16815             :         }
   16816             :       }
   16817             :     }
   16818             :   }
   16819             : 
   16820       70274 :   if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
   16821             :     // Handle only simple case where vector being inserted and vector
   16822             :     // being extracted are of same size.
   16823        2510 :     EVT SmallVT = V->getOperand(1).getValueType();
   16824        2510 :     if (!NVT.bitsEq(SmallVT))
   16825         499 :       return SDValue();
   16826             : 
   16827             :     // Only handle cases where both indexes are constants.
   16828        2011 :     ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
   16829             :     ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
   16830             : 
   16831        2011 :     if (InsIdx && ExtIdx) {
   16832             :       // Combine:
   16833             :       //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
   16834             :       // Into:
   16835             :       //    indices are equal or bit offsets are equal => V1
   16836             :       //    otherwise => (extract_subvec V1, ExtIdx)
   16837        2011 :       if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
   16838        4022 :           ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
   16839        3352 :         return DAG.getBitcast(NVT, V->getOperand(1));
   16840         335 :       return DAG.getNode(
   16841         335 :           ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
   16842         335 :           DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)),
   16843        1675 :           N->getOperand(1));
   16844             :     }
   16845             :   }
   16846             : 
   16847       67764 :   if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
   16848         145 :     return NarrowBOp;
   16849             : 
   16850       67619 :   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
   16851         214 :     return SDValue(N, 0);
   16852             : 
   16853       67405 :   return SDValue();
   16854             : }
   16855             : 
   16856             : // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
   16857             : // or turn a shuffle of a single concat into simpler shuffle then concat.
   16858        2336 : static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
   16859        4672 :   EVT VT = N->getValueType(0);
   16860             :   unsigned NumElts = VT.getVectorNumElements();
   16861             : 
   16862        2336 :   SDValue N0 = N->getOperand(0);
   16863        2336 :   SDValue N1 = N->getOperand(1);
   16864             :   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
   16865             : 
   16866             :   SmallVector<SDValue, 4> Ops;
   16867        4672 :   EVT ConcatVT = N0.getOperand(0).getValueType();
   16868             :   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
   16869        2336 :   unsigned NumConcats = NumElts / NumElemsPerConcat;
   16870             : 
   16871             :   // Special case: shuffle(concat(A,B)) can be more efficiently represented
   16872             :   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
   16873             :   // half vector elements.
   16874        3626 :   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
   16875        2580 :       std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
   16876        3106 :                   SVN->getMask().end(), [](int i) { return i == -1; })) {
   16877         520 :     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
   16878         520 :                               makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat));
   16879         520 :     N1 = DAG.getUNDEF(ConcatVT);
   16880        1040 :     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
   16881             :   }
   16882             : 
   16883             :   // Look at every vector that's inserted. We're looking for exact
   16884             :   // subvector-sized copies from a concatenated vector
   16885        3744 :   for (unsigned I = 0; I != NumConcats; ++I) {
   16886             :     // Make sure we're dealing with a copy.
   16887        3361 :     unsigned Begin = I * NumElemsPerConcat;
   16888             :     bool AllUndef = true, NoUndef = true;
   16889       26092 :     for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) {
   16890       45462 :       if (SVN->getMaskElt(J) >= 0)
   16891             :         AllUndef = false;
   16892             :       else
   16893             :         NoUndef = false;
   16894             :     }
   16895             : 
   16896        3361 :     if (NoUndef) {
   16897        4644 :       if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
   16898         255 :         return SDValue();
   16899             : 
   16900        8047 :       for (unsigned J = 1; J != NumElemsPerConcat; ++J)
   16901       21153 :         if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
   16902        1071 :           return SDValue();
   16903             : 
   16904         996 :       unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
   16905         996 :       if (FirstElt < N0.getNumOperands())
   16906         956 :         Ops.push_back(N0.getOperand(FirstElt));
   16907             :       else
   16908          80 :         Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
   16909             : 
   16910        1039 :     } else if (AllUndef) {
   16911        1864 :       Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType()));
   16912             :     } else { // Mixed with general masks and undefs, can't do optimization.
   16913         107 :       return SDValue();
   16914             :     }
   16915             :   }
   16916             : 
   16917         766 :   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
   16918             : }
   16919             : 
   16920             : // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
   16921             : // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
   16922             : //
   16923             : // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
   16924             : // a simplification in some sense, but it isn't appropriate in general: some
   16925             : // BUILD_VECTORs are substantially cheaper than others. The general case
   16926             : // of a BUILD_VECTOR requires inserting each element individually (or
   16927             : // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
   16928             : // all constants is a single constant pool load.  A BUILD_VECTOR where each
   16929             : // element is identical is a splat.  A BUILD_VECTOR where most of the operands
   16930             : // are undef lowers to a small number of element insertions.
   16931             : //
   16932             : // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
   16933             : // We don't fold shuffles where one side is a non-zero constant, and we don't
   16934             : // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
   16935             : // non-constant operands. This seems to work out reasonably well in practice.
   16936       43114 : static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
   16937             :                                        SelectionDAG &DAG,
   16938             :                                        const TargetLowering &TLI) {
   16939       86228 :   EVT VT = SVN->getValueType(0);
   16940             :   unsigned NumElts = VT.getVectorNumElements();
   16941       43114 :   SDValue N0 = SVN->getOperand(0);
   16942       43114 :   SDValue N1 = SVN->getOperand(1);
   16943             : 
   16944       75026 :   if (!N0->hasOneUse() || !N1->hasOneUse())
   16945       17728 :     return SDValue();
   16946             : 
   16947             :   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
   16948             :   // discussed above.
   16949       25386 :   if (!N1.isUndef()) {
   16950       12717 :     bool N0AnyConst = isAnyConstantBuildVector(N0.getNode());
   16951       12717 :     bool N1AnyConst = isAnyConstantBuildVector(N1.getNode());
   16952       12717 :     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
   16953          27 :       return SDValue();
   16954       12690 :     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
   16955          31 :       return SDValue();
   16956             :   }
   16957             : 
   16958             :   // If both inputs are splats of the same value then we can safely merge this
   16959             :   // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
   16960             :   bool IsSplat = false;
   16961             :   auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
   16962             :   auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
   16963       25328 :   if (BV0 && BV1)
   16964          73 :     if (SDValue Splat0 = BV0->getSplatValue())
   16965         110 :       IsSplat = (Splat0 == BV1->getSplatValue());
   16966             : 
   16967             :   SmallVector<SDValue, 8> Ops;
   16968       25328 :   SmallSet<SDValue, 16> DuplicateOps;
   16969       36888 :   for (int M : SVN->getMask()) {
   16970       36329 :     SDValue Op = DAG.getUNDEF(VT.getScalarType());
   16971       36329 :     if (M >= 0) {
   16972       29541 :       int Idx = M < (int)NumElts ? M : M - NumElts;
   16973       29541 :       SDValue &S = (M < (int)NumElts ? N0 : N1);
   16974       59082 :       if (S.getOpcode() == ISD::BUILD_VECTOR) {
   16975        9412 :         Op = S.getOperand(Idx);
   16976       24835 :       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
   16977             :         assert(Idx == 0 && "Unexpected SCALAR_TO_VECTOR operand index.");
   16978         184 :         Op = S.getOperand(0);
   16979             :       } else {
   16980             :         // Operand can't be combined - bail out.
   16981       24651 :         return SDValue();
   16982             :       }
   16983             :     }
   16984             : 
   16985             :     // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
   16986             :     // generating a splat; semantically, this is fine, but it's likely to
   16987             :     // generate low-quality code if the target can't reconstruct an appropriate
   16988             :     // shuffle.
   16989       23356 :     if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
   16990        2505 :       if (!IsSplat && !DuplicateOps.insert(Op).second)
   16991         118 :         return SDValue();
   16992             : 
   16993       11560 :     Ops.push_back(Op);
   16994             :   }
   16995             : 
   16996             :   // BUILD_VECTOR requires all inputs to be of the same type, find the
   16997             :   // maximum type and extend them all.
   16998         559 :   EVT SVT = VT.getScalarType();
   16999         559 :   if (SVT.isInteger())
   17000        7103 :     for (SDValue &Op : Ops)
   17001       13096 :       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
   17002         559 :   if (SVT != VT.getScalarType())
   17003          70 :     for (SDValue &Op : Ops)
   17004         128 :       Op = TLI.isZExtFree(Op.getValueType(), SVT)
   17005         128 :                ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
   17006         128 :                : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
   17007        1118 :   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
   17008             : }
   17009             : 
   17010             : // Match shuffles that can be converted to any_vector_extend_in_reg.
   17011             : // This is often generated during legalization.
   17012             : // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
   17013             : // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
   17014       66089 : static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
   17015             :                                             SelectionDAG &DAG,
   17016             :                                             const TargetLowering &TLI,
   17017             :                                             bool LegalOperations,
   17018             :                                             bool LegalTypes) {
   17019       66089 :   EVT VT = SVN->getValueType(0);
   17020       66089 :   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
   17021             : 
   17022             :   // TODO Add support for big-endian when we have a test case.
   17023       66089 :   if (!VT.isInteger() || IsBigEndian)
   17024       14816 :     return SDValue();
   17025             : 
   17026       51273 :   unsigned NumElts = VT.getVectorNumElements();
   17027             :   unsigned EltSizeInBits = VT.getScalarSizeInBits();
   17028       51273 :   ArrayRef<int> Mask = SVN->getMask();
   17029       51273 :   SDValue N0 = SVN->getOperand(0);
   17030             : 
   17031             :   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
   17032             :   auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
   17033      220318 :     for (unsigned i = 0; i != NumElts; ++i) {
   17034      436044 :       if (Mask[i] < 0)
   17035             :         continue;
   17036      185748 :       if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
   17037             :         continue;
   17038             :       return false;
   17039             :     }
   17040             :     return true;
   17041             :   };
   17042             : 
   17043             :   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
   17044             :   // power-of-2 extensions as they are the most likely.
   17045      171646 :   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
   17046             :     // Check for non power of 2 vector sizes
   17047      120659 :     if (NumElts % Scale != 0)
   17048      118363 :       continue;
   17049      120640 :     if (!isAnyExtend(Scale))
   17050             :       continue;
   17051             : 
   17052        2296 :     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
   17053        2296 :     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
   17054        2296 :     if (!LegalTypes || TLI.isTypeLegal(OutVT))
   17055        2288 :       if (!LegalOperations ||
   17056             :           TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
   17057             :         return DAG.getBitcast(VT,
   17058         572 :                             DAG.getAnyExtendVectorInReg(N0, SDLoc(SVN), OutVT));
   17059             :   }
   17060             : 
   17061       50987 :   return SDValue();
   17062             : }
   17063             : 
   17064             : // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
   17065             : // each source element of a large type into the lowest elements of a smaller
   17066             : // destination type. This is often generated during legalization.
   17067             : // If the source node itself was a '*_extend_vector_inreg' node then we should
   17068             : // then be able to remove it.
   17069       65803 : static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
   17070             :                                         SelectionDAG &DAG) {
   17071       65803 :   EVT VT = SVN->getValueType(0);
   17072       65803 :   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
   17073             : 
   17074             :   // TODO Add support for big-endian when we have a test case.
   17075       65803 :   if (!VT.isInteger() || IsBigEndian)
   17076       14816 :     return SDValue();
   17077             : 
   17078      101974 :   SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
   17079             : 
   17080       50987 :   unsigned Opcode = N0.getOpcode();
   17081       50987 :   if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
   17082       50987 :       Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
   17083             :       Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
   17084       50878 :     return SDValue();
   17085             : 
   17086         109 :   SDValue N00 = N0.getOperand(0);
   17087         109 :   ArrayRef<int> Mask = SVN->getMask();
   17088         109 :   unsigned NumElts = VT.getVectorNumElements();
   17089             :   unsigned EltSizeInBits = VT.getScalarSizeInBits();
   17090         109 :   unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
   17091         109 :   unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
   17092             : 
   17093         109 :   if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
   17094           0 :     return SDValue();
   17095         109 :   unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
   17096             : 
   17097             :   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
   17098             :   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
   17099             :   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
   17100             :   auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
   17101          39 :     for (unsigned i = 0; i != NumElts; ++i) {
   17102          76 :       if (Mask[i] < 0)
   17103             :         continue;
   17104          32 :       if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
   17105             :         continue;
   17106             :       return false;
   17107             :     }
   17108             :     return true;
   17109             :   };
   17110             : 
   17111             :   // At the moment we just handle the case where we've truncated back to the
   17112             :   // same size as before the extension.
   17113             :   // TODO: handle more extension/truncation cases as cases arise.
   17114         109 :   if (EltSizeInBits != ExtSrcSizeInBits)
   17115          91 :     return SDValue();
   17116             : 
   17117             :   // We can remove *extend_vector_inreg only if the truncation happens at
   17118             :   // the same scale as the extension.
   17119          18 :   if (isTruncate(ExtScale))
   17120           1 :     return DAG.getBitcast(VT, N00);
   17121             : 
   17122          17 :   return SDValue();
   17123             : }
   17124             : 
   17125             : // Combine shuffles of splat-shuffles of the form:
   17126             : // shuffle (shuffle V, undef, splat-mask), undef, M
   17127             : // If splat-mask contains undef elements, we need to be careful about
   17128             : // introducing undef's in the folded mask which are not the result of composing
   17129             : // the masks of the shuffles.
   17130         101 : static SDValue combineShuffleOfSplat(ArrayRef<int> UserMask,
   17131             :                                      ShuffleVectorSDNode *Splat,
   17132             :                                      SelectionDAG &DAG) {
   17133         101 :   ArrayRef<int> SplatMask = Splat->getMask();
   17134             :   assert(UserMask.size() == SplatMask.size() && "Mask length mismatch");
   17135             : 
   17136             :   // Prefer simplifying to the splat-shuffle, if possible. This is legal if
   17137             :   // every undef mask element in the splat-shuffle has a corresponding undef
   17138             :   // element in the user-shuffle's mask or if the composition of mask elements
   17139             :   // would result in undef.
   17140             :   // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
   17141             :   // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
   17142             :   //   In this case it is not legal to simplify to the splat-shuffle because we
   17143             :   //   may be exposing the users of the shuffle an undef element at index 1
   17144             :   //   which was not there before the combine.
   17145             :   // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
   17146             :   //   In this case the composition of masks yields SplatMask, so it's ok to
   17147             :   //   simplify to the splat-shuffle.
   17148             :   // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
   17149             :   //   In this case the composed mask includes all undef elements of SplatMask
   17150             :   //   and in addition sets element zero to undef. It is safe to simplify to
   17151             :   //   the splat-shuffle.
   17152             :   auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
   17153             :                                        ArrayRef<int> SplatMask) {
   17154         660 :     for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
   17155        1234 :       if (UserMask[i] != -1 && SplatMask[i] == -1 &&
   17156         392 :           SplatMask[UserMask[i]] != -1)
   17157             :         return false;
   17158             :     return true;
   17159             :   };
   17160         101 :   if (CanSimplifyToExistingSplat(UserMask, SplatMask))
   17161          43 :     return SDValue(Splat, 0);
   17162             : 
   17163             :   // Create a new shuffle with a mask that is composed of the two shuffles'
   17164             :   // masks.
   17165             :   SmallVector<int, 32> NewMask;
   17166         402 :   for (int Idx : UserMask)
   17167         344 :     NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
   17168             : 
   17169          58 :   return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
   17170          58 :                               Splat->getOperand(0), Splat->getOperand(1),
   17171         174 :                               NewMask);
   17172             : }
   17173             : 
   17174             : /// If the shuffle mask is taking exactly one element from the first vector
   17175             : /// operand and passing through all other elements from the second vector
   17176             : /// operand, return the index of the mask element that is choosing an element
   17177             : /// from the first operand. Otherwise, return -1.
   17178             : static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
   17179       68101 :   int MaskSize = Mask.size();
   17180             :   int EltFromOp0 = -1;
   17181             :   // TODO: This does not match if there are undef elements in the shuffle mask.
   17182             :   // Should we ignore undefs in the shuffle mask instead? The trade-off is
   17183             :   // removing an instruction (a shuffle), but losing the knowledge that some
   17184             :   // vector lanes are not needed.
   17185      309007 :   for (int i = 0; i != MaskSize; ++i) {
   17186      609474 :     if (Mask[i] >= 0 && Mask[i] < MaskSize) {
   17187             :       // We're looking for a shuffle of exactly one element from operand 0.
   17188      143438 :       if (EltFromOp0 != -1)
   17189             :         return -1;
   17190             :       EltFromOp0 = i;
   17191      161299 :     } else if (Mask[i] != i + MaskSize) {
   17192             :       // Nothing from operand 1 can change lanes.
   17193             :       return -1;
   17194             :     }
   17195             :   }
   17196             :   return EltFromOp0;
   17197             : }
   17198             : 
   17199             : /// If a shuffle inserts exactly one element from a source vector operand into
   17200             : /// another vector operand and we can access the specified element as a scalar,
   17201             : /// then we can eliminate the shuffle.
   17202       70737 : static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
   17203             :                                       SelectionDAG &DAG) {
   17204             :   // First, check if we are taking one element of a vector and shuffling that
   17205             :   // element into another vector.
   17206       70737 :   ArrayRef<int> Mask = Shuf->getMask();
   17207             :   SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
   17208       70737 :   SDValue Op0 = Shuf->getOperand(0);
   17209       70737 :   SDValue Op1 = Shuf->getOperand(1);
   17210             :   int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
   17211       70737 :   if (ShufOp0Index == -1) {
   17212             :     // Commute mask and check again.
   17213             :     ShuffleVectorSDNode::commuteMask(CommutedMask);
   17214             :     ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
   17215       68101 :     if (ShufOp0Index == -1)
   17216       66467 :       return SDValue();
   17217             :     // Commute operands to match the commuted shuffle mask.
   17218             :     std::swap(Op0, Op1);
   17219             :     Mask = CommutedMask;
   17220             :   }
   17221             : 
   17222             :   // The shuffle inserts exactly one element from operand 0 into operand 1.
   17223             :   // Now see if we can access that element as a scalar via a real insert element
   17224             :   // instruction.
   17225             :   // TODO: We can try harder to locate the element as a scalar. Examples: it
   17226             :   // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
   17227             :   assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
   17228             :          "Shuffle mask value must be from operand 0");
   17229        4270 :   if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
   17230        3911 :     return SDValue();
   17231             : 
   17232             :   auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
   17233        1077 :   if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
   17234           7 :     return SDValue();
   17235             : 
   17236             :   // There's an existing insertelement with constant insertion index, so we
   17237             :   // don't need to check the legality/profitability of a replacement operation
   17238             :   // that differs at most in the constant value. The target should be able to
   17239             :   // lower any of those in a similar way. If not, legalization will expand this
   17240             :   // to a scalar-to-vector plus shuffle.
   17241             :   //
   17242             :   // Note that the shuffle may move the scalar from the position that the insert
   17243             :   // element used. Therefore, our new insert element occurs at the shuffle's
   17244             :   // mask index value, not the insert's index value.
   17245             :   // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
   17246         352 :   SDValue NewInsIndex = DAG.getConstant(ShufOp0Index, SDLoc(Shuf),
   17247         352 :                                         Op0.getOperand(2).getValueType());
   17248         352 :   return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
   17249         704 :                      Op1, Op0.getOperand(1), NewInsIndex);
   17250             : }
   17251             : 
   17252       70837 : SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
   17253      141674 :   EVT VT = N->getValueType(0);
   17254             :   unsigned NumElts = VT.getVectorNumElements();
   17255             : 
   17256       70837 :   SDValue N0 = N->getOperand(0);
   17257       70837 :   SDValue N1 = N->getOperand(1);
   17258             : 
   17259             :   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
   17260             : 
   17261             :   // Canonicalize shuffle undef, undef -> undef
   17262       70837 :   if (N0.isUndef() && N1.isUndef())
   17263           2 :     return DAG.getUNDEF(VT);
   17264             : 
   17265             :   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
   17266             : 
   17267             :   // Canonicalize shuffle v, v -> v, undef
   17268             :   if (N0 == N1) {
   17269             :     SmallVector<int, 8> NewMask;
   17270         189 :     for (unsigned i = 0; i != NumElts; ++i) {
   17271         160 :       int Idx = SVN->getMaskElt(i);
   17272         160 :       if (Idx >= (int)NumElts) Idx -= NumElts;
   17273         160 :       NewMask.push_back(Idx);
   17274             :     }
   17275         116 :     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
   17276             :   }
   17277             : 
   17278             :   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
   17279       70806 :   if (N0.isUndef())
   17280          25 :     return DAG.getCommutedVectorShuffle(*SVN);
   17281             : 
   17282             :   // Remove references to rhs if it is undef
   17283       70781 :   if (N1.isUndef()) {
   17284             :     bool Changed = false;
   17285             :     SmallVector<int, 8> NewMask;
   17286      871280 :     for (unsigned i = 0; i != NumElts; ++i) {
   17287      827741 :       int Idx = SVN->getMaskElt(i);
   17288      827741 :       if (Idx >= (int)NumElts) {
   17289         127 :         Idx = -1;
   17290             :         Changed = true;
   17291             :       }
   17292      827741 :       NewMask.push_back(Idx);
   17293             :     }
   17294       43539 :     if (Changed)
   17295          88 :       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
   17296             :   }
   17297             : 
   17298       70737 :   if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
   17299         352 :     return InsElt;
   17300             : 
   17301             :   // A shuffle of a single vector that is a splat can always be folded.
   17302             :   if (auto *N0Shuf = dyn_cast<ShuffleVectorSDNode>(N0))
   17303        6831 :     if (N1->isUndef() && N0Shuf->isSplat())
   17304         101 :       return combineShuffleOfSplat(SVN->getMask(), N0Shuf, DAG);
   17305             : 
   17306             :   // If it is a splat, check if the argument vector is another splat or a
   17307             :   // build_vector.
   17308       70284 :   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
   17309             :     SDNode *V = N0.getNode();
   17310             : 
   17311             :     // If this is a bit convert that changes the element type of the vector but
   17312             :     // not the number of vector elements, look through it.  Be careful not to
   17313             :     // look though conversions that change things like v4f32 to v2f64.
   17314       12865 :     if (V->getOpcode() == ISD::BITCAST) {
   17315         698 :       SDValue ConvInput = V->getOperand(0);
   17316        2033 :       if (ConvInput.getValueType().isVector() &&
   17317        1335 :           ConvInput.getValueType().getVectorNumElements() == NumElts)
   17318             :         V = ConvInput.getNode();
   17319             :     }
   17320             : 
   17321       12865 :     if (V->getOpcode() == ISD::BUILD_VECTOR) {
   17322             :       assert(V->getNumOperands() == NumElts &&
   17323             :              "BUILD_VECTOR has wrong number of operands");
   17324             :       SDValue Base;
   17325             :       bool AllSame = true;
   17326        2457 :       for (unsigned i = 0; i != NumElts; ++i) {
   17327        7371 :         if (!V->getOperand(i).isUndef()) {
   17328        2411 :           Base = V->getOperand(i);
   17329        2411 :           break;
   17330             :         }
   17331             :       }
   17332             :       // Splat of <u, u, u, u>, return <u, u, u, u>
   17333        2411 :       if (!Base.getNode())
   17334           0 :         return N0;
   17335        4819 :       for (unsigned i = 0; i != NumElts; ++i) {
   17336        4816 :         if (V->getOperand(i) != Base) {
   17337             :           AllSame = false;
   17338             :           break;
   17339             :         }
   17340             :       }
   17341             :       // Splat of <x, x, x, x>, return <x, x, x, x>
   17342        2411 :       if (AllSame)
   17343           3 :         return N0;
   17344             : 
   17345             :       // Canonicalize any other splat as a build_vector.
   17346        2408 :       const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
   17347        2408 :       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
   17348        2424 :       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
   17349             : 
   17350             :       // We may have jumped through bitcasts, so the type of the
   17351             :       // BUILD_VECTOR may not match the type of the shuffle.
   17352        4820 :       if (V->getValueType(0) != VT)
   17353           0 :         NewBV = DAG.getBitcast(VT, NewBV);
   17354        2408 :       return NewBV;
   17355             :     }
   17356             :   }
   17357             : 
   17358             :   // Simplify source operands based on shuffle mask.
   17359       67873 :   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
   17360        1784 :     return SDValue(N, 0);
   17361             : 
   17362             :   // Match shuffles that can be converted to any_vector_extend_in_reg.
   17363       66089 :   if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations, LegalTypes))
   17364         286 :     return V;
   17365             : 
   17366             :   // Combine "truncate_vector_in_reg" style shuffles.
   17367       65803 :   if (SDValue V = combineTruncationShuffle(SVN, DAG))
   17368           1 :     return V;
   17369             : 
   17370        2988 :   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
   17371       65802 :       Level < AfterLegalizeVectorOps &&
   17372         663 :       (N1.isUndef() ||
   17373           0 :       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
   17374         720 :        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
   17375        2336 :     if (SDValue V = partitionShuffleOfConcats(N, DAG))
   17376         903 :       return V;
   17377             :   }
   17378             : 
   17379             :   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
   17380             :   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
   17381       64899 :   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
   17382       43114 :     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
   17383         559 :       return Res;
   17384             : 
   17385             :   // If this shuffle only has a single input that is a bitcasted shuffle,
   17386             :   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
   17387             :   // back to their original types.
   17388       11777 :   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
   17389       70209 :       N1.isUndef() && Level < AfterLegalizeVectorOps &&
   17390        2552 :       TLI.isTypeLegal(VT)) {
   17391             :     auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
   17392             :       if (Scale == 1)
   17393             :         return SmallVector<int, 8>(Mask.begin(), Mask.end());
   17394             : 
   17395             :       SmallVector<int, 8> NewMask;
   17396             :       for (int M : Mask)
   17397             :         for (int s = 0; s != Scale; ++s)
   17398             :           NewMask.push_back(M < 0 ? -1 : Scale * M + s);
   17399             :       return NewMask;
   17400             :     };
   17401             :     
   17402        2510 :     SDValue BC0 = peekThroughOneUseBitcasts(N0);
   17403        2510 :     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
   17404         104 :       EVT SVT = VT.getScalarType();
   17405         104 :       EVT InnerVT = BC0->getValueType(0);
   17406         104 :       EVT InnerSVT = InnerVT.getScalarType();
   17407             : 
   17408             :       // Determine which shuffle works with the smaller scalar type.
   17409         104 :       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
   17410         104 :       EVT ScaleSVT = ScaleVT.getScalarType();
   17411             : 
   17412         104 :       if (TLI.isTypeLegal(ScaleVT) &&
   17413         104 :           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
   17414         104 :           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
   17415         104 :         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
   17416         104 :         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
   17417             : 
   17418             :         // Scale the shuffle masks to the smaller scalar type.
   17419             :         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
   17420             :         SmallVector<int, 8> InnerMask =
   17421         104 :             ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
   17422             :         SmallVector<int, 8> OuterMask =
   17423         104 :             ScaleShuffleMask(SVN->getMask(), OuterScale);
   17424             : 
   17425             :         // Merge the shuffle masks.
   17426             :         SmallVector<int, 8> NewMask;
   17427        1340 :         for (int M : OuterMask)
   17428        1236 :           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
   17429             : 
   17430             :         // Test for shuffle mask legality over both commutations.
   17431         104 :         SDValue SV0 = BC0->getOperand(0);
   17432         104 :         SDValue SV1 = BC0->getOperand(1);
   17433         208 :         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
   17434         104 :         if (!LegalMask) {
   17435             :           std::swap(SV0, SV1);
   17436             :           ShuffleVectorSDNode::commuteMask(NewMask);
   17437           4 :           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
   17438             :         }
   17439             : 
   17440         104 :         if (LegalMask) {
   17441         102 :           SV0 = DAG.getBitcast(ScaleVT, SV0);
   17442         102 :           SV1 = DAG.getBitcast(ScaleVT, SV1);
   17443         102 :           return DAG.getBitcast(
   17444         204 :               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
   17445             :         }
   17446             :       }
   17447             :     }
   17448             :   }
   17449             : 
   17450             :   // Canonicalize shuffles according to rules:
   17451             :   //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
   17452             :   //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
   17453             :   //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
   17454        1951 :   if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
   17455       65052 :       N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
   17456         782 :       TLI.isTypeLegal(VT)) {
   17457             :     // The incoming shuffle must be of the same type as the result of the
   17458             :     // current shuffle.
   17459             :     assert(N1->getOperand(0).getValueType() == VT &&
   17460             :            "Shuffle types don't match");
   17461             : 
   17462         778 :     SDValue SV0 = N1->getOperand(0);
   17463         778 :     SDValue SV1 = N1->getOperand(1);
   17464             :     bool HasSameOp0 = N0 == SV0;
   17465             :     bool IsSV1Undef = SV1.isUndef();
   17466         778 :     if (HasSameOp0 || IsSV1Undef || N0 == SV1)
   17467             :       // Commute the operands of this shuffle so that next rule
   17468             :       // will trigger.
   17469         500 :       return DAG.getCommutedVectorShuffle(*SVN);
   17470             :   }
   17471             : 
   17472             :   // Try to fold according to rules:
   17473             :   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
   17474             :   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
   17475             :   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
   17476             :   // Don't try to fold shuffles with illegal type.
   17477             :   // Only fold if this shuffle is the only user of the other shuffle.
   17478        4348 :   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
   17479       66888 :       Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
   17480             :     ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
   17481             : 
   17482             :     // Don't try to fold splats; they're likely to simplify somehow, or they
   17483             :     // might be free.
   17484        3008 :     if (OtherSV->isSplat())
   17485          95 :       return SDValue();
   17486             : 
   17487             :     // The incoming shuffle must be of the same type as the result of the
   17488             :     // current shuffle.
   17489             :     assert(OtherSV->getOperand(0).getValueType() == VT &&
   17490             :            "Shuffle types don't match");
   17491             : 
   17492             :     SDValue SV0, SV1;
   17493             :     SmallVector<int, 4> Mask;
   17494             :     // Compute the combined shuffle mask for a shuffle with SV0 as the first
   17495             :     // operand, and SV1 as the second operand.
   17496       26353 :     for (unsigned i = 0; i != NumElts; ++i) {
   17497       24118 :       int Idx = SVN->getMaskElt(i);
   17498       24118 :       if (Idx < 0) {
   17499             :         // Propagate Undef.
   17500        4026 :         Mask.push_back(Idx);
   17501       14593 :         continue;
   17502             :       }
   17503             : 
   17504             :       SDValue CurrentVec;
   17505       20092 :       if (Idx < (int)NumElts) {
   17506             :         // This shuffle index refers to the inner shuffle N0. Lookup the inner
   17507             :         // shuffle mask to identify which vector is actually referenced.
   17508       12189 :         Idx = OtherSV->getMaskElt(Idx);
   17509       12189 :         if (Idx < 0) {
   17510             :           // Propagate Undef.
   17511         200 :           Mask.push_back(Idx);
   17512         200 :           continue;
   17513             :         }
   17514             : 
   17515       14576 :         CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
   17516        2587 :                                            : OtherSV->getOperand(1);
   17517             :       } else {
   17518             :         // This shuffle index references an element within N1.
   17519        7903 :         CurrentVec = N1;
   17520             :       }
   17521             : 
   17522             :       // Simple case where 'CurrentVec' is UNDEF.
   17523       19892 :       if (CurrentVec.isUndef()) {
   17524          16 :         Mask.push_back(-1);
   17525          16 :         continue;
   17526             :       }
   17527             : 
   17528             :       // Canonicalize the shuffle index. We don't know yet if CurrentVec
   17529             :       // will be the first or second operand of the combined shuffle.
   17530       19876 :       Idx = Idx % NumElts;
   17531       19876 :       if (!SV0.getNode() || SV0 == CurrentVec) {
   17532             :         // Ok. CurrentVec is the left hand side.
   17533             :         // Update the mask accordingly.
   17534       10351 :         SV0 = CurrentVec;
   17535       10351 :         Mask.push_back(Idx);
   17536       10351 :         continue;
   17537             :       }
   17538             : 
   17539             :       // Bail out if we cannot convert the shuffle pair into a single shuffle.
   17540        9525 :       if (SV1.getNode() && SV1 != CurrentVec)
   17541         678 :         return SDValue();
   17542             : 
   17543             :       // Ok. CurrentVec is the right hand side.
   17544             :       // Update the mask accordingly.
   17545        8847 :       SV1 = CurrentVec;
   17546        8847 :       Mask.push_back(Idx + NumElts);
   17547             :     }
   17548             : 
   17549             :     // Check if all indices in Mask are Undef. In case, propagate Undef.
   17550             :     bool isUndefMask = true;
   17551        4629 :     for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
   17552        4788 :       isUndefMask &= Mask[i] < 0;
   17553             : 
   17554        2235 :     if (isUndefMask)
   17555           8 :       return DAG.getUNDEF(VT);
   17556             : 
   17557        2227 :     if (!SV0.getNode())
   17558           0 :       SV0 = DAG.getUNDEF(VT);
   17559        2227 :     if (!SV1.getNode())
   17560         760 :       SV1 = DAG.getUNDEF(VT);
   17561             : 
   17562             :     // Avoid introducing shuffles with illegal mask.
   17563        4454 :     if (!TLI.isShuffleMaskLegal(Mask, VT)) {
   17564             :       ShuffleVectorSDNode::commuteMask(Mask);
   17565             : 
   17566           8 :       if (!TLI.isShuffleMaskLegal(Mask, VT))
   17567           4 :         return SDValue();
   17568             : 
   17569             :       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
   17570             :       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
   17571             :       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
   17572             :       std::swap(SV0, SV1);
   17573             :     }
   17574             : 
   17575             :     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
   17576             :     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
   17577             :     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
   17578        4458 :     return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);
   17579             :   }
   17580             : 
   17581       60730 :   return SDValue();
   17582             : }
   17583             : 
   17584       23174 : SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
   17585       23174 :   SDValue InVal = N->getOperand(0);
   17586       46348 :   EVT VT = N->getValueType(0);
   17587             : 
   17588             :   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
   17589             :   // with a VECTOR_SHUFFLE and possible truncate.
   17590       23174 :   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
   17591         484 :     SDValue InVec = InVal->getOperand(0);
   17592         484 :     SDValue EltNo = InVal->getOperand(1);
   17593         484 :     auto InVecT = InVec.getValueType();
   17594             :     if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
   17595         968 :       SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
   17596         968 :       int Elt = C0->getZExtValue();
   17597         484 :       NewMask[0] = Elt;
   17598             :       SDValue Val;
   17599             :       // If we have an implict truncate do truncate here as long as it's legal.
   17600             :       // if it's not legal, this should
   17601         510 :       if (VT.getScalarType() != InVal.getValueType() &&
   17602          26 :           InVal.getValueType().isScalarInteger() &&
   17603          26 :           isTypeLegal(VT.getScalarType())) {
   17604           0 :         Val =
   17605           0 :             DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
   17606           0 :         return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
   17607             :       }
   17608         942 :       if (VT.getScalarType() == InVecT.getScalarType() &&
   17609         458 :           VT.getVectorNumElements() <= InVecT.getVectorNumElements() &&
   17610         942 :           TLI.isShuffleMaskLegal(NewMask, VT)) {
   17611         916 :         Val = DAG.getVectorShuffle(InVecT, SDLoc(N), InVec,
   17612         916 :                                    DAG.getUNDEF(InVecT), NewMask);
   17613             :         // If the initial vector is the correct size this shuffle is a
   17614             :         // valid result.
   17615         458 :         if (VT == InVecT)
   17616         309 :           return Val;
   17617             :         // If not we must truncate the vector.
   17618         149 :         if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
   17619         149 :           MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
   17620         298 :           SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy);
   17621             :           EVT SubVT =
   17622         149 :               EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(),
   17623         149 :                                VT.getVectorNumElements());
   17624         298 :           Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, Val,
   17625         149 :                             ZeroIdx);
   17626         149 :           return Val;
   17627             :         }
   17628             :       }
   17629             :     }
   17630             :   }
   17631             : 
   17632       22716 :   return SDValue();
   17633             : }
   17634             : 
   17635       23652 : SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
   17636       23652 :   EVT VT = N->getValueType(0);
   17637       23652 :   SDValue N0 = N->getOperand(0);
   17638       23652 :   SDValue N1 = N->getOperand(1);
   17639       23652 :   SDValue N2 = N->getOperand(2);
   17640             : 
   17641             :   // If inserting an UNDEF, just return the original vector.
   17642       23652 :   if (N1.isUndef())
   17643          55 :     return N0;
   17644             : 
   17645             :   // For nested INSERT_SUBVECTORs, attempt to combine inner node first to allow
   17646             :   // us to pull BITCASTs from input to output.
   17647       23597 :   if (N0.hasOneUse() && N0->getOpcode() == ISD::INSERT_SUBVECTOR)
   17648        4330 :     if (SDValue NN0 = visitINSERT_SUBVECTOR(N0.getNode()))
   17649         288 :       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, NN0, N1, N2);
   17650             : 
   17651             :   // If this is an insert of an extracted vector into an undef vector, we can
   17652             :   // just use the input to the extract.
   17653       15954 :   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
   17654       25728 :       N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
   17655         191 :     return N1.getOperand(0);
   17656             : 
   17657             :   // If we are inserting a bitcast value into an undef, with the same
   17658             :   // number of elements, just use the bitcast input of the extract.
   17659             :   // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
   17660             :   //        BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
   17661       15763 :   if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
   17662        3433 :       N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
   17663         335 :       N1.getOperand(0).getOperand(1) == N2 &&
   17664         332 :       N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
   17665       23262 :           VT.getVectorNumElements() &&
   17666       23265 :       N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
   17667           3 :           VT.getSizeInBits()) {
   17668           6 :     return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
   17669             :   }
   17670             : 
   17671             :   // If both N1 and N2 are bitcast values on which insert_subvector
   17672             :   // would makes sense, pull the bitcast through.
   17673             :   // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
   17674             :   //        BITCAST (INSERT_SUBVECTOR N0 N1 N2)
   17675       23259 :   if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
   17676         213 :     SDValue CN0 = N0.getOperand(0);
   17677         213 :     SDValue CN1 = N1.getOperand(0);
   17678         213 :     EVT CN0VT = CN0.getValueType();
   17679         213 :     EVT CN1VT = CN1.getValueType();
   17680         213 :     if (CN0VT.isVector() && CN1VT.isVector() &&
   17681         564 :         CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
   17682             :         CN0VT.getVectorNumElements() == VT.getVectorNumElements()) {
   17683           4 :       SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
   17684           4 :                                       CN0.getValueType(), CN0, CN1, N2);
   17685           4 :       return DAG.getBitcast(VT, NewINSERT);
   17686             :     }
   17687             :   }
   17688             : 
   17689             :   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
   17690             :   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
   17691             :   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
   17692           0 :   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
   17693       23255 :       N0.getOperand(1).getValueType() == N1.getValueType() &&
   17694        4364 :       N0.getOperand(2) == N2)
   17695          46 :     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
   17696          92 :                        N1, N2);
   17697             : 
   17698             :   if (!isa<ConstantSDNode>(N2))
   17699           0 :     return SDValue();
   17700             : 
   17701       23209 :   unsigned InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
   17702             : 
   17703             :   // Canonicalize insert_subvector dag nodes.
   17704             :   // Example:
   17705             :   // (insert_subvector (insert_subvector A, Idx0), Idx1)
   17706             :   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
   17707        4318 :   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
   17708       27395 :       N1.getValueType() == N0.getOperand(1).getValueType() &&
   17709             :       isa<ConstantSDNode>(N0.getOperand(2))) {
   17710        4186 :     unsigned OtherIdx = N0.getConstantOperandVal(2);
   17711        4186 :     if (InsIdx < OtherIdx) {
   17712             :       // Swap nodes.
   17713           0 :       SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
   17714           0 :                                   N0.getOperand(0), N1, N2);
   17715           0 :       AddToWorklist(NewOp.getNode());
   17716           0 :       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
   17717           0 :                          VT, NewOp, N0.getOperand(1), N0.getOperand(2));
   17718             :     }
   17719             :   }
   17720             : 
   17721             :   // If the input vector is a concatenation, and the insert replaces
   17722             :   // one of the pieces, we can optimize into a single concat_vectors.
   17723       23209 :   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
   17724           0 :       N0.getOperand(0).getValueType() == N1.getValueType()) {
   17725           0 :     unsigned Factor = N1.getValueType().getVectorNumElements();
   17726             : 
   17727           0 :     SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
   17728           0 :     Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1;
   17729             : 
   17730           0 :     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
   17731             :   }
   17732             : 
   17733       23209 :   return SDValue();
   17734             : }
   17735             : 
   17736           0 : SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
   17737        4584 :   SDValue N0 = N->getOperand(0);
   17738             : 
   17739             :   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
   17740        4584 :   if (N0->getOpcode() == ISD::FP16_TO_FP)
   17741        1018 :     return N0->getOperand(0);
   17742             : 
   17743           0 :   return SDValue();
   17744             : }
   17745             : 
   17746           0 : SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
   17747           0 :   SDValue N0 = N->getOperand(0);
   17748             : 
   17749             :   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
   17750           0 :   if (N0->getOpcode() == ISD::AND) {
   17751           0 :     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
   17752           0 :     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
   17753           0 :       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
   17754           0 :                          N0.getOperand(0));
   17755             :     }
   17756             :   }
   17757             : 
   17758           0 :   return SDValue();
   17759             : }
   17760             : 
   17761             : /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
   17762             : /// with the destination vector and a zero vector.
   17763             : /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
   17764             : ///      vector_shuffle V, Zero, <0, 4, 2, 4>
   17765       53297 : SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
   17766             :   assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
   17767             : 
   17768       53297 :   EVT VT = N->getValueType(0);
   17769       53297 :   SDValue LHS = N->getOperand(0);
   17770       53297 :   SDValue RHS = peekThroughBitcasts(N->getOperand(1));
   17771             :   SDLoc DL(N);
   17772             : 
   17773             :   // Make sure we're not running after operation legalization where it
   17774             :   // may have custom lowered the vector shuffles.
   17775       53297 :   if (LegalOperations)
   17776       41311 :     return SDValue();
   17777             : 
   17778       23972 :   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
   17779        6918 :     return SDValue();
   17780             : 
   17781       10136 :   EVT RVT = RHS.getValueType();
   17782        5068 :   unsigned NumElts = RHS.getNumOperands();
   17783             : 
   17784             :   // Attempt to create a valid clear mask, splitting the mask into
   17785             :   // sub elements and checking to see if each is
   17786             :   // all zeros or all ones - suitable for shuffle masking.
   17787             :   auto BuildClearMask = [&](int Split) {
   17788             :     int NumSubElts = NumElts * Split;
   17789             :     int NumSubBits = RVT.getScalarSizeInBits() / Split;
   17790             : 
   17791             :     SmallVector<int, 8> Indices;
   17792             :     for (int i = 0; i != NumSubElts; ++i) {
   17793             :       int EltIdx = i / Split;
   17794             :       int SubIdx = i % Split;
   17795             :       SDValue Elt = RHS.getOperand(EltIdx);
   17796             :       if (Elt.isUndef()) {
   17797             :         Indices.push_back(-1);
   17798             :         continue;
   17799             :       }
   17800             : 
   17801             :       APInt Bits;
   17802             :       if (isa<ConstantSDNode>(Elt))
   17803             :         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
   17804             :       else if (isa<ConstantFPSDNode>(Elt))
   17805             :         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
   17806             :       else
   17807             :         return SDValue();
   17808             : 
   17809             :       // Extract the sub element from the constant bit mask.
   17810             :       if (DAG.getDataLayout().isBigEndian()) {
   17811             :         Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits);
   17812             :       } else {
   17813             :         Bits.lshrInPlace(SubIdx * NumSubBits);
   17814             :       }
   17815             : 
   17816             :       if (Split > 1)
   17817             :         Bits = Bits.trunc(NumSubBits);
   17818             : 
   17819             :       if (Bits.isAllOnesValue())
   17820             :         Indices.push_back(i);
   17821             :       else if (Bits == 0)
   17822             :         Indices.push_back(i + NumSubElts);
   17823             :       else
   17824             :         return SDValue();
   17825             :     }
   17826             : 
   17827             :     // Let's see if the target supports this vector_shuffle.
   17828             :     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
   17829             :     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
   17830             :     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
   17831             :       return SDValue();
   17832             : 
   17833             :     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
   17834             :     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
   17835             :                                                    DAG.getBitcast(ClearVT, LHS),
   17836             :                                                    Zero, Indices));
   17837        5068 :   };
   17838             : 
   17839             :   // Determine maximum split level (byte level masking).
   17840             :   int MaxSplit = 1;
   17841        5068 :   if (RVT.getScalarSizeInBits() % 8 == 0)
   17842        5056 :     MaxSplit = RVT.getScalarSizeInBits() / 8;
   17843             : 
   17844       22688 :   for (int Split = 1; Split <= MaxSplit; ++Split)
   17845       18709 :     if (RVT.getScalarSizeInBits() % Split == 0)
   17846       13296 :       if (SDValue S = BuildClearMask(Split))
   17847        1089 :         return S;
   17848             : 
   17849        3979 :   return SDValue();
   17850             : }
   17851             : 
   17852             : /// Visit a binary vector operation, like ADD.
   17853      799028 : SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
   17854             :   assert(N->getValueType(0).isVector() &&
   17855             :          "SimplifyVBinOp only works on vectors!");
   17856             : 
   17857      799028 :   SDValue LHS = N->getOperand(0);
   17858      799028 :   SDValue RHS = N->getOperand(1);
   17859      799028 :   SDValue Ops[] = {LHS, RHS};
   17860             : 
   17861             :   // See if we can constant fold the vector operation.
   17862      799028 :   if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
   17863     2537783 :           N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
   17864         438 :     return Fold;
   17865             : 
   17866             :   // Type legalization might introduce new shuffles in the DAG.
   17867             :   // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
   17868             :   //   -> (shuffle (VBinOp (A, B)), Undef, Mask).
   17869      310020 :   if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
   17870        2247 :       isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
   17871      799640 :       LHS.getOperand(1).isUndef() &&
   17872         938 :       RHS.getOperand(1).isUndef()) {
   17873             :     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
   17874             :     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
   17875             : 
   17876         934 :     if (SVN0->getMask().equals(SVN1->getMask())) {
   17877          50 :       EVT VT = N->getValueType(0);
   17878          25 :       SDValue UndefVector = LHS.getOperand(1);
   17879          50 :       SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
   17880             :                                      LHS.getOperand(0), RHS.getOperand(0),
   17881          25 :                                      N->getFlags());
   17882             :       AddUsersToWorklist(N);
   17883          25 :       return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
   17884          50 :                                   SVN0->getMask());
   17885             :     }
   17886             :   }
   17887             : 
   17888      798565 :   return SDValue();
   17889             : }
   17890             : 
   17891           0 : SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
   17892             :                                     SDValue N2) {
   17893             :   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
   17894             : 
   17895             :   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
   17896           0 :                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
   17897             : 
   17898             :   // If we got a simplified select_cc node back from SimplifySelectCC, then
   17899             :   // break it down into a new SETCC node, and a new SELECT node, and then return
   17900             :   // the SELECT node, since we were called with a SELECT node.
   17901           0 :   if (SCC.getNode()) {
   17902             :     // Check to see if we got a select_cc back (to turn into setcc/select).
   17903             :     // Otherwise, just return whatever node we got back, like fabs.
   17904           0 :     if (SCC.getOpcode() == ISD::SELECT_CC) {
   17905           0 :       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
   17906             :                                   N0.getValueType(),
   17907             :                                   SCC.getOperand(0), SCC.getOperand(1),
   17908           0 :                                   SCC.getOperand(4));
   17909           0 :       AddToWorklist(SETCC.getNode());
   17910           0 :       return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
   17911           0 :                            SCC.getOperand(2), SCC.getOperand(3));
   17912             :     }
   17913             : 
   17914           0 :     return SCC;
   17915             :   }
   17916           0 :   return SDValue();
   17917             : }
   17918             : 
   17919             : /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
   17920             : /// being selected between, see if we can simplify the select.  Callers of this
   17921             : /// should assume that TheSelect is deleted if this returns true.  As such, they
   17922             : /// should return the appropriate thing (e.g. the node) back to the top-level of
   17923             : /// the DAG combiner loop to avoid it being looked at.
   17924           0 : bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
   17925             :                                     SDValue RHS) {
   17926             :   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
   17927             :   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
   17928           0 :   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
   17929           0 :     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
   17930             :       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
   17931             :       SDValue Sqrt = RHS;
   17932             :       ISD::CondCode CC;
   17933             :       SDValue CmpLHS;
   17934             :       const ConstantFPSDNode *Zero = nullptr;
   17935             : 
   17936           0 :       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
   17937           0 :         CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
   17938           0 :         CmpLHS = TheSelect->getOperand(0);
   17939           0 :         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
   17940             :       } else {
   17941             :         // SELECT or VSELECT
   17942           0 :         SDValue Cmp = TheSelect->getOperand(0);
   17943           0 :         if (Cmp.getOpcode() == ISD::SETCC) {
   17944           0 :           CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
   17945           0 :           CmpLHS = Cmp.getOperand(0);
   17946           0 :           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
   17947             :         }
   17948             :       }
   17949           0 :       if (Zero && Zero->isZero() &&
   17950           0 :           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
   17951           0 :           CC == ISD::SETULT || CC == ISD::SETLT)) {
   17952             :         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
   17953           0 :         CombineTo(TheSelect, Sqrt);
   17954           0 :         return true;
   17955             :       }
   17956             :     }
   17957             :   }
   17958             :   // Cannot simplify select with vector condition
   17959           0 :   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
   17960             : 
   17961             :   // If this is a select from two identical things, try to pull the operation
   17962             :   // through the select.
   17963           0 :   if (LHS.getOpcode() != RHS.getOpcode() ||
   17964           0 :       !LHS.hasOneUse() || !RHS.hasOneUse())
   17965           0 :     return false;
   17966             : 
   17967             :   // If this is a load and the token chain is identical, replace the select
   17968             :   // of two loads with a load through a select of the address to load from.
   17969             :   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
   17970             :   // constants have been dropped into the constant pool.
   17971           0 :   if (LHS.getOpcode() == ISD::LOAD) {
   17972             :     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
   17973             :     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
   17974             : 
   17975             :     // Token chains must be identical.
   17976           0 :     if (LHS.getOperand(0) != RHS.getOperand(0) ||
   17977             :         // Do not let this transformation reduce the number of volatile loads.
   17978           0 :         LLD->isVolatile() || RLD->isVolatile() ||
   17979             :         // FIXME: If either is a pre/post inc/dec load,
   17980             :         // we'd need to split out the address adjustment.
   17981           0 :         LLD->isIndexed() || RLD->isIndexed() ||
   17982             :         // If this is an EXTLOAD, the VT's must match.
   17983           0 :         LLD->getMemoryVT() != RLD->getMemoryVT() ||
   17984             :         // If this is an EXTLOAD, the kind of extension must match.
   17985           0 :         (LLD->getExtensionType() != RLD->getExtensionType() &&
   17986             :          // The only exception is if one of the extensions is anyext.
   17987           0 :          LLD->getExtensionType() != ISD::EXTLOAD &&
   17988           0 :          RLD->getExtensionType() != ISD::EXTLOAD) ||
   17989             :         // FIXME: this discards src value information.  This is
   17990             :         // over-conservative. It would be beneficial to be able to remember
   17991             :         // both potential memory locations.  Since we are discarding
   17992             :         // src value info, don't do the transformation if the memory
   17993             :         // locations are not in the default address space.
   17994           0 :         LLD->getPointerInfo().getAddrSpace() != 0 ||
   17995           0 :         RLD->getPointerInfo().getAddrSpace() != 0 ||
   17996           0 :         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
   17997             :                                       LLD->getBasePtr().getValueType()))
   17998           0 :       return false;
   17999             : 
   18000             :     // The loads must not depend on one another.
   18001           0 :     if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
   18002           0 :       return false;
   18003             : 
   18004             :     // Check that the select condition doesn't reach either load.  If so,
   18005             :     // folding this will induce a cycle into the DAG.  If not, this is safe to
   18006             :     // xform, so create a select of the addresses.
   18007             : 
   18008             :     SmallPtrSet<const SDNode *, 32> Visited;
   18009             :     SmallVector<const SDNode *, 16> Worklist;
   18010             : 
   18011             :     // Always fail if LLD and RLD are not independent. TheSelect is a
   18012             :     // predecessor to all Nodes in question so we need not search past it.
   18013             : 
   18014           0 :     Visited.insert(TheSelect);
   18015           0 :     Worklist.push_back(LLD);
   18016           0 :     Worklist.push_back(RLD);
   18017             : 
   18018           0 :     if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
   18019           0 :         SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
   18020           0 :       return false;
   18021             : 
   18022           0 :     SDValue Addr;
   18023           0 :     if (TheSelect->getOpcode() == ISD::SELECT) {
   18024             :       // We cannot do this optimization if any pair of {RLD, LLD} is a
   18025             :       // predecessor to {RLD, LLD, CondNode}. As we've already compared the
   18026             :       // Loads, we only need to check if CondNode is a successor to one of the
   18027             :       // loads. We can further avoid this if there's no use of their chain
   18028             :       // value.
   18029           0 :       SDNode *CondNode = TheSelect->getOperand(0).getNode();
   18030           0 :       Worklist.push_back(CondNode);
   18031             : 
   18032           0 :       if ((LLD->hasAnyUseOfValue(1) &&
   18033           0 :            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
   18034           0 :           (RLD->hasAnyUseOfValue(1) &&
   18035           0 :            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
   18036           0 :         return false;
   18037             : 
   18038           0 :       Addr = DAG.getSelect(SDLoc(TheSelect),
   18039             :                            LLD->getBasePtr().getValueType(),
   18040           0 :                            TheSelect->getOperand(0), LLD->getBasePtr(),
   18041           0 :                            RLD->getBasePtr());
   18042             :     } else {  // Otherwise SELECT_CC
   18043             :       // We cannot do this optimization if any pair of {RLD, LLD} is a
   18044             :       // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
   18045             :       // the Loads, we only need to check if CondLHS/CondRHS is a successor to
   18046             :       // one of the loads. We can further avoid this if there's no use of their
   18047             :       // chain value.
   18048             : 
   18049           0 :       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
   18050           0 :       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
   18051           0 :       Worklist.push_back(CondLHS);
   18052           0 :       Worklist.push_back(CondRHS);
   18053             : 
   18054           0 :       if ((LLD->hasAnyUseOfValue(1) &&
   18055           0 :            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
   18056           0 :           (RLD->hasAnyUseOfValue(1) &&
   18057           0 :            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
   18058           0 :         return false;
   18059             : 
   18060           0 :       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
   18061             :                          LLD->getBasePtr().getValueType(),
   18062             :                          TheSelect->getOperand(0),
   18063             :                          TheSelect->getOperand(1),
   18064             :                          LLD->getBasePtr(), RLD->getBasePtr(),
   18065           0 :                          TheSelect->getOperand(4));
   18066             :     }
   18067             : 
   18068             :     SDValue Load;
   18069             :     // It is safe to replace the two loads if they have different alignments,
   18070             :     // but the new load must be the minimum (most restrictive) alignment of the
   18071             :     // inputs.
   18072           0 :     unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
   18073           0 :     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
   18074           0 :     if (!RLD->isInvariant())
   18075             :       MMOFlags &= ~MachineMemOperand::MOInvariant;
   18076           0 :     if (!RLD->isDereferenceable())
   18077             :       MMOFlags &= ~MachineMemOperand::MODereferenceable;
   18078           0 :     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
   18079             :       // FIXME: Discards pointer and AA info.
   18080           0 :       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
   18081             :                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
   18082           0 :                          MMOFlags);
   18083             :     } else {
   18084             :       // FIXME: Discards pointer and AA info.
   18085           0 :       Load = DAG.getExtLoad(
   18086             :           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
   18087             :                                                   : LLD->getExtensionType(),
   18088           0 :           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
   18089           0 :           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
   18090             :     }
   18091             : 
   18092             :     // Users of the select now use the result of the load.
   18093           0 :     CombineTo(TheSelect, Load);
   18094             : 
   18095             :     // Users of the old loads now use the new load's chain.  We know the
   18096             :     // old-load value is dead now.
   18097             :     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
   18098             :     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
   18099           0 :     return true;
   18100             :   }
   18101             : 
   18102             :   return false;
   18103             : }
   18104             : 
   18105             : /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
   18106             : /// bitwise 'and'.
   18107       70497 : SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
   18108             :                                             SDValue N1, SDValue N2, SDValue N3,
   18109             :                                             ISD::CondCode CC) {
   18110             :   // If this is a select where the false operand is zero and the compare is a
   18111             :   // check of the sign bit, see if we can perform the "gzip trick":
   18112             :   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
   18113             :   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
   18114       70497 :   EVT XType = N0.getValueType();
   18115       70497 :   EVT AType = N2.getValueType();
   18116       70497 :   if (!isNullConstant(N3) || !XType.bitsGE(AType))
   18117       33748 :     return SDValue();
   18118             : 
   18119             :   // If the comparison is testing for a positive value, we have to invert
   18120             :   // the sign bit mask, so only do that transform if the target has a bitwise
   18121             :   // 'and not' instruction (the invert is free).
   18122       36749 :   if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
   18123             :     // (X > -1) ? A : 0
   18124             :     // (X >  0) ? X : 0 <-- This is canonical signed max.
   18125         162 :     if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
   18126         148 :       return SDValue();
   18127       36587 :   } else if (CC == ISD::SETLT) {
   18128             :     // (X <  0) ? A : 0
   18129             :     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
   18130         674 :     if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
   18131         299 :       return SDValue();
   18132             :   } else {
   18133       35913 :     return SDValue();
   18134             :   }
   18135             : 
   18136             :   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
   18137             :   // constant.
   18138         389 :   EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
   18139             :   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
   18140        2156 :   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
   18141         255 :     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
   18142         255 :     SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
   18143         510 :     SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
   18144         255 :     AddToWorklist(Shift.getNode());
   18145             : 
   18146         255 :     if (XType.bitsGT(AType)) {
   18147         450 :       Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
   18148         225 :       AddToWorklist(Shift.getNode());
   18149             :     }
   18150             : 
   18151         255 :     if (CC == ISD::SETGT)
   18152           5 :       Shift = DAG.getNOT(DL, Shift, AType);
   18153             : 
   18154         510 :     return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
   18155             :   }
   18156             : 
   18157         134 :   SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
   18158         268 :   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
   18159         134 :   AddToWorklist(Shift.getNode());
   18160             : 
   18161         134 :   if (XType.bitsGT(AType)) {
   18162         130 :     Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
   18163          65 :     AddToWorklist(Shift.getNode());
   18164             :   }
   18165             : 
   18166         134 :   if (CC == ISD::SETGT)
   18167           9 :     Shift = DAG.getNOT(DL, Shift, AType);
   18168             : 
   18169         268 :   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
   18170             : }
   18171             : 
   18172             : /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
   18173             : /// where 'cond' is the comparison specified by CC.
   18174       70586 : SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
   18175             :                                       SDValue N2, SDValue N3, ISD::CondCode CC,
   18176             :                                       bool NotExtCompare) {
   18177             :   // (x ? y : y) -> y.
   18178       70586 :   if (N2 == N3) return N2;
   18179             : 
   18180      141172 :   EVT VT = N2.getValueType();
   18181             :   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
   18182             :   ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
   18183             : 
   18184             :   // Determine if the condition we're dealing with is constant
   18185             :   SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
   18186       70586 :                               N0, N1, CC, DL, false);
   18187       70586 :   if (SCC.getNode()) AddToWorklist(SCC.getNode());
   18188             : 
   18189             :   if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
   18190             :     // fold select_cc true, x, y -> x
   18191             :     // fold select_cc false, x, y -> y
   18192         119 :     return !SCCC->isNullValue() ? N2 : N3;
   18193             :   }
   18194             : 
   18195             :   // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
   18196             :   // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
   18197             :   // in it.  This is a win when the constant is not otherwise available because
   18198             :   // it replaces two constant pool loads with one.  We only do this if the FP
   18199             :   // type is known to be legal, because if it isn't, then we are before legalize
   18200             :   // types an we want the other legalization to happen first (e.g. to avoid
   18201             :   // messing with soft float) and if the ConstantFP is not legal, because if
   18202             :   // it is legal, we may not need to store the FP constant in a constant pool.
   18203             :   if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
   18204             :     if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
   18205        1558 :       if (TLI.isTypeLegal(N2.getValueType()) &&
   18206             :           (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
   18207         461 :                TargetLowering::Legal &&
   18208        1475 :            !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) &&
   18209         276 :            !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) &&
   18210             :           // If both constants have multiple uses, then we won't need to do an
   18211             :           // extra load, they are likely around in registers for other users.
   18212             :           (TV->hasOneUse() || FV->hasOneUse())) {
   18213             :         Constant *Elts[] = {
   18214          43 :           const_cast<ConstantFP*>(FV->getConstantFPValue()),
   18215          43 :           const_cast<ConstantFP*>(TV->getConstantFPValue())
   18216          86 :         };
   18217          43 :         Type *FPTy = Elts[0]->getType();
   18218          43 :         const DataLayout &TD = DAG.getDataLayout();
   18219             : 
   18220             :         // Create a ConstantArray of the two constants.
   18221          43 :         Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
   18222             :         SDValue CPIdx =
   18223          86 :             DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
   18224          86 :                                 TD.getPrefTypeAlignment(FPTy));
   18225          43 :         unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
   18226             : 
   18227             :         // Get the offsets to the 0 and 1 element of the array so that we can
   18228             :         // select between them.
   18229          43 :         SDValue Zero = DAG.getIntPtrConstant(0, DL);
   18230          43 :         unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
   18231          43 :         SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
   18232             : 
   18233          43 :         SDValue Cond = DAG.getSetCC(DL,
   18234             :                                     getSetCCResultType(N0.getValueType()),
   18235          43 :                                     N0, N1, CC);
   18236          43 :         AddToWorklist(Cond.getNode());
   18237          43 :         SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(),
   18238          86 :                                           Cond, One, Zero);
   18239          43 :         AddToWorklist(CstOffset.getNode());
   18240          43 :         CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
   18241          43 :                             CstOffset);
   18242          43 :         AddToWorklist(CPIdx.getNode());
   18243          43 :         return DAG.getLoad(
   18244          43 :             TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
   18245             :             MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
   18246          43 :             Alignment);
   18247             :       }
   18248             :     }
   18249             : 
   18250       70497 :   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
   18251         389 :     return V;
   18252             : 
   18253             :   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
   18254             :   // where y is has a single bit set.
   18255             :   // A plaintext description would be, we can turn the SELECT_CC into an AND
   18256             :   // when the condition can be materialized as an all-ones register.  Any
   18257             :   // single bit-test can be materialized as an all-ones register with
   18258             :   // shift-left and shift-right-arith.
   18259       37882 :   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
   18260       73349 :       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
   18261          33 :     SDValue AndLHS = N0->getOperand(0);
   18262             :     ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
   18263          66 :     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
   18264             :       // Shift the tested bit over the sign bit.
   18265             :       const APInt &AndMask = ConstAndRHS->getAPIntValue();
   18266             :       SDValue ShlAmt =
   18267          30 :         DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
   18268          60 :                         getShiftAmountTy(AndLHS.getValueType()));
   18269          54 :       SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
   18270             : 
   18271             :       // Now arithmetic right shift it all the way over, so the result is either
   18272             :       // all-ones, or zero.
   18273             :       SDValue ShrAmt =
   18274          30 :         DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
   18275          60 :                         getShiftAmountTy(Shl.getValueType()));
   18276          54 :       SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
   18277             : 
   18278          60 :       return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
   18279             :     }
   18280             :   }
   18281             : 
   18282             :   // fold select C, 16, 0 -> shl C, 4
   18283       70078 :   if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() &&
   18284       60952 :       TLI.getBooleanContents(N0.getValueType()) ==
   18285             :           TargetLowering::ZeroOrOneBooleanContent) {
   18286             : 
   18287             :     // If the caller doesn't want us to simplify this into a zext of a compare,
   18288             :     // don't do it.
   18289       57702 :     if (NotExtCompare && N2C->isOne())
   18290       28810 :       return SDValue();
   18291             : 
   18292             :     // Get a SetCC of the condition
   18293             :     // NOTE: Don't create a SETCC if it's not legal on this target.
   18294          82 :     if (!LegalOperations ||
   18295             :         TLI.isOperationLegal(ISD::SETCC, N0.getValueType())) {
   18296             :       SDValue Temp, SCC;
   18297             :       // cast from setcc result type to select result type
   18298          63 :       if (LegalTypes) {
   18299           4 :         SCC  = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()),
   18300           2 :                             N0, N1, CC);
   18301           6 :         if (N2.getValueType().bitsLT(SCC.getValueType()))
   18302           0 :           Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2),
   18303           0 :                                         N2.getValueType());
   18304             :         else
   18305           4 :           Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
   18306           4 :                              N2.getValueType(), SCC);
   18307             :       } else {
   18308          64 :         SCC  = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
   18309         122 :         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
   18310         122 :                            N2.getValueType(), SCC);
   18311             :       }
   18312             : 
   18313          63 :       AddToWorklist(SCC.getNode());
   18314          63 :       AddToWorklist(Temp.getNode());
   18315             : 
   18316         126 :       if (N2C->isOne())
   18317          15 :         return Temp;
   18318             : 
   18319             :       // shl setcc result by log2 n2c
   18320          48 :       return DAG.getNode(
   18321             :           ISD::SHL, DL, N2.getValueType(), Temp,
   18322          48 :           DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp),
   18323         144 :                           getShiftAmountTy(Temp.getValueType())));
   18324             :     }
   18325             :   }
   18326             : 
   18327             :   // Check to see if this is an integer abs.
   18328             :   // select_cc setg[te] X,  0,  X, -X ->
   18329             :   // select_cc setgt    X, -1,  X, -X ->
   18330             :   // select_cc setl[te] X,  0, -X,  X ->
   18331             :   // select_cc setlt    X,  1, -X,  X ->
   18332             :   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
   18333       41205 :   if (N1C) {
   18334             :     ConstantSDNode *SubC = nullptr;
   18335       66701 :     if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
   18336         891 :          (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
   18337       25836 :         N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
   18338             :       SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
   18339       40347 :     else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
   18340         443 :               (N1C->isOne() && CC == ISD::SETLT)) &&
   18341       26772 :              N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
   18342             :       SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
   18343             : 
   18344       24694 :     EVT XType = N0.getValueType();
   18345       24805 :     if (SubC && SubC->isNullValue() && XType.isInteger()) {
   18346             :       SDLoc DL(N0);
   18347         111 :       SDValue Shift = DAG.getNode(ISD::SRA, DL, XType,
   18348             :                                   N0,
   18349         111 :                                   DAG.getConstant(XType.getSizeInBits() - 1, DL,
   18350         111 :                                          getShiftAmountTy(N0.getValueType())));
   18351         111 :       SDValue Add = DAG.getNode(ISD::ADD, DL,
   18352         111 :                                 XType, N0, Shift);
   18353         111 :       AddToWorklist(Shift.getNode());
   18354         111 :       AddToWorklist(Add.getNode());
   18355         222 :       return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
   18356             :     }
   18357             :   }
   18358             : 
   18359             :   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
   18360             :   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
   18361             :   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
   18362             :   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
   18363             :   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
   18364             :   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
   18365             :   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
   18366             :   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
   18367       65677 :   if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
   18368       15374 :     SDValue ValueOnZero = N2;
   18369       15374 :     SDValue Count = N3;
   18370             :     // If the condition is NE instead of E, swap the operands.
   18371       15374 :     if (CC == ISD::SETNE)
   18372             :       std::swap(ValueOnZero, Count);
   18373             :     // Check if the value on zero is a constant equal to the bits in the type.
   18374             :     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
   18375        5234 :       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
   18376             :         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
   18377             :         // legal, combine to just cttz.
   18378         161 :         if ((Count.getOpcode() == ISD::CTTZ ||
   18379             :              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
   18380         185 :             N0 == Count.getOperand(0) &&
   18381          22 :             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
   18382          44 :           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
   18383             :         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
   18384             :         // legal, combine to just ctlz.
   18385         139 :         if ((Count.getOpcode() == ISD::CTLZ ||
   18386             :              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
   18387         154 :             N0 == Count.getOperand(0) &&
   18388          15 :             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
   18389          30 :           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
   18390             :       }
   18391             :     }
   18392             :   }
   18393             : 
   18394       41057 :   return SDValue();
   18395             : }
   18396             : 
   18397             : /// This is a stub for TargetLowering::SimplifySetCC.
   18398             : SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
   18399             :                                    ISD::CondCode Cond, const SDLoc &DL,
   18400             :                                    bool foldBooleans) {
   18401             :   TargetLowering::DAGCombinerInfo
   18402      391408 :     DagCombineInfo(DAG, Level, false, this);
   18403      391408 :   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
   18404             : }
   18405             : 
   18406             : /// Given an ISD::SDIV node expressing a divide by constant, return
   18407             : /// a DAG expression to select that will generate the same value by multiplying
   18408             : /// by a magic number.
   18409             : /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
   18410        4025 : SDValue DAGCombiner::BuildSDIV(SDNode *N) {
   18411             :   // when optimising for minimum size, we don't want to expand a div to a mul
   18412             :   // and a shift.
   18413        4025 :   if (DAG.getMachineFunction().getFunction().optForMinSize())
   18414          34 :     return SDValue();
   18415             : 
   18416             :   SmallVector<SDNode *, 8> Built;
   18417        3991 :   if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
   18418        9819 :     for (SDNode *N : Built)
   18419        5924 :       AddToWorklist(N);
   18420        3895 :     return S;
   18421             :   }
   18422             : 
   18423          96 :   return SDValue();
   18424             : }
   18425             : 
   18426             : /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
   18427             : /// DAG expression that will generate the same value by right shifting.
   18428         595 : SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
   18429        1190 :   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
   18430         595 :   if (!C)
   18431         112 :     return SDValue();
   18432             : 
   18433             :   // Avoid division by zero.
   18434         966 :   if (C->isNullValue())
   18435           0 :     return SDValue();
   18436             : 
   18437             :   SmallVector<SDNode *, 8> Built;
   18438         483 :   if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
   18439          89 :     for (SDNode *N : Built)
   18440          57 :       AddToWorklist(N);
   18441          32 :     return S;
   18442             :   }
   18443             : 
   18444         451 :   return SDValue();
   18445             : }
   18446             : 
   18447             : /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
   18448             : /// expression that will generate the same value by multiplying by a magic
   18449             : /// number.
   18450             : /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
   18451         957 : SDValue DAGCombiner::BuildUDIV(SDNode *N) {
   18452             :   // when optimising for minimum size, we don't want to expand a div to a mul
   18453             :   // and a shift.
   18454         957 :   if (DAG.getMachineFunction().getFunction().optForMinSize())
   18455          32 :     return SDValue();
   18456             : 
   18457             :   SmallVector<SDNode *, 8> Built;
   18458         925 :   if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
   18459        4100 :     for (SDNode *N : Built)
   18460        3288 :       AddToWorklist(N);
   18461         812 :     return S;
   18462             :   }
   18463             : 
   18464         113 :   return SDValue();
   18465             : }
   18466             : 
   18467             : /// Determines the LogBase2 value for a non-null input value using the
   18468             : /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
   18469           0 : SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
   18470           0 :   EVT VT = V.getValueType();
   18471             :   unsigned EltBits = VT.getScalarSizeInBits();
   18472           0 :   SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
   18473           0 :   SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
   18474           0 :   SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
   18475           0 :   return LogBase2;
   18476             : }
   18477             : 
   18478             : /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
   18479             : /// For the reciprocal, we need to find the zero of the function:
   18480             : ///   F(X) = A X - 1 [which has a zero at X = 1/A]
   18481             : ///     =>
   18482             : ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
   18483             : ///     does not require additional intermediate precision]
   18484        1147 : SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {
   18485        1147 :   if (Level >= AfterLegalizeDAG)
   18486         170 :     return SDValue();
   18487             : 
   18488             :   // TODO: Handle half and/or extended types?
   18489         977 :   EVT VT = Op.getValueType();
   18490         977 :   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
   18491          18 :     return SDValue();
   18492             : 
   18493             :   // If estimates are explicitly disabled for this function, we're done.
   18494         959 :   MachineFunction &MF = DAG.getMachineFunction();
   18495         959 :   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
   18496         959 :   if (Enabled == TLI.ReciprocalEstimate::Disabled)
   18497          74 :     return SDValue();
   18498             : 
   18499             :   // Estimates may be explicitly enabled for this type with a custom number of
   18500             :   // refinement steps.
   18501         885 :   int Iterations = TLI.getDivRefinementSteps(VT, MF);
   18502         885 :   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
   18503         640 :     AddToWorklist(Est.getNode());
   18504             : 
   18505         640 :     if (Iterations) {
   18506         276 :       EVT VT = Op.getValueType();
   18507             :       SDLoc DL(Op);
   18508         276 :       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
   18509             : 
   18510             :       // Newton iterations: Est = Est + Est (1 - Arg * Est)
   18511         652 :       for (int i = 0; i < Iterations; ++i) {
   18512         376 :         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
   18513         376 :         AddToWorklist(NewEst.getNode());
   18514             : 
   18515         376 :         NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
   18516         376 :         AddToWorklist(NewEst.getNode());
   18517             : 
   18518         376 :         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
   18519         376 :         AddToWorklist(NewEst.getNode());
   18520             : 
   18521         376 :         Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
   18522         376 :         AddToWorklist(Est.getNode());
   18523             :       }
   18524             :     }
   18525         640 :     return Est;
   18526             :   }
   18527             : 
   18528         245 :   return SDValue();
   18529             : }
   18530             : 
   18531             : /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
   18532             : /// For the reciprocal sqrt, we need to find the zero of the function:
   18533             : ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
   18534             : ///     =>
   18535             : ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
   18536             : /// As a result, we precompute A/2 prior to the iteration loop.
   18537          21 : SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
   18538             :                                          unsigned Iterations,
   18539             :                                          SDNodeFlags Flags, bool Reciprocal) {
   18540          21 :   EVT VT = Arg.getValueType();
   18541             :   SDLoc DL(Arg);
   18542          21 :   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
   18543             : 
   18544             :   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
   18545             :   // this entire sequence requires only one FP constant.
   18546          21 :   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
   18547          21 :   AddToWorklist(HalfArg.getNode());
   18548             : 
   18549          21 :   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
   18550          21 :   AddToWorklist(HalfArg.getNode());
   18551             : 
   18552             :   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
   18553          52 :   for (unsigned i = 0; i < Iterations; ++i) {
   18554          31 :     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
   18555          31 :     AddToWorklist(NewEst.getNode());
   18556             : 
   18557          31 :     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
   18558          31 :     AddToWorklist(NewEst.getNode());
   18559             : 
   18560          31 :     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
   18561          31 :     AddToWorklist(NewEst.getNode());
   18562             : 
   18563          31 :     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
   18564          31 :     AddToWorklist(Est.getNode());
   18565             :   }
   18566             : 
   18567             :   // If non-reciprocal square root is requested, multiply the result by Arg.
   18568          21 :   if (!Reciprocal) {
   18569           6 :     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
   18570           6 :     AddToWorklist(Est.getNode());
   18571             :   }
   18572             : 
   18573          21 :   return Est;
   18574             : }
   18575             : 
   18576             : /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
   18577             : /// For the reciprocal sqrt, we need to find the zero of the function:
   18578             : ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
   18579             : ///     =>
   18580             : ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
   18581          54 : SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
   18582             :                                          unsigned Iterations,
   18583             :                                          SDNodeFlags Flags, bool Reciprocal) {
   18584          54 :   EVT VT = Arg.getValueType();
   18585             :   SDLoc DL(Arg);
   18586          54 :   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
   18587          54 :   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
   18588             : 
   18589             :   // This routine must enter the loop below to work correctly
   18590             :   // when (Reciprocal == false).
   18591             :   assert(Iterations > 0);
   18592             : 
   18593             :   // Newton iterations for reciprocal square root:
   18594             :   // E = (E * -0.5) * ((A * E) * E + -3.0)
   18595         110 :   for (unsigned i = 0; i < Iterations; ++i) {
   18596          56 :     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
   18597          56 :     AddToWorklist(AE.getNode());
   18598             : 
   18599          56 :     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
   18600          56 :     AddToWorklist(AEE.getNode());
   18601             : 
   18602          56 :     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
   18603          56 :     AddToWorklist(RHS.getNode());
   18604             : 
   18605             :     // When calculating a square root at the last iteration build:
   18606             :     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
   18607             :     // (notice a common subexpression)
   18608          56 :     SDValue LHS;
   18609          56 :     if (Reciprocal || (i + 1) < Iterations) {
   18610             :       // RSQRT: LHS = (E * -0.5)
   18611          26 :       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
   18612             :     } else {
   18613             :       // SQRT: LHS = (A * E) * -0.5
   18614          30 :       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
   18615             :     }
   18616          56 :     AddToWorklist(LHS.getNode());
   18617             : 
   18618          56 :     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
   18619          56 :     AddToWorklist(Est.getNode());
   18620             :   }
   18621             : 
   18622          54 :   return Est;
   18623             : }
   18624             : 
   18625             : /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
   18626             : /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
   18627             : /// Op can be zero.
   18628         368 : SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
   18629             :                                            bool Reciprocal) {
   18630         368 :   if (Level >= AfterLegalizeDAG)
   18631         111 :     return SDValue();
   18632             : 
   18633             :   // TODO: Handle half and/or extended types?
   18634         257 :   EVT VT = Op.getValueType();
   18635         257 :   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
   18636           6 :     return SDValue();
   18637             : 
   18638             :   // If estimates are explicitly disabled for this function, we're done.
   18639         251 :   MachineFunction &MF = DAG.getMachineFunction();
   18640         251 :   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
   18641         251 :   if (Enabled == TLI.ReciprocalEstimate::Disabled)
   18642          43 :     return SDValue();
   18643             : 
   18644             :   // Estimates may be explicitly enabled for this type with a custom number of
   18645             :   // refinement steps.
   18646         208 :   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
   18647             : 
   18648         208 :   bool UseOneConstNR = false;
   18649         208 :   if (SDValue Est =
   18650         208 :       TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
   18651         208 :                           Reciprocal)) {
   18652         115 :     AddToWorklist(Est.getNode());
   18653             : 
   18654         115 :     if (Iterations) {
   18655          75 :       Est = UseOneConstNR
   18656          75 :             ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
   18657          54 :             : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
   18658             : 
   18659          75 :       if (!Reciprocal) {
   18660             :         // The estimate is now completely wrong if the input was exactly 0.0 or
   18661             :         // possibly a denormal. Force the answer to 0.0 for those cases.
   18662          36 :         EVT VT = Op.getValueType();
   18663             :         SDLoc DL(Op);
   18664          36 :         EVT CCVT = getSetCCResultType(VT);
   18665          36 :         ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
   18666          36 :         const Function &F = DAG.getMachineFunction().getFunction();
   18667          36 :         Attribute Denorms = F.getFnAttribute("denormal-fp-math");
   18668          66 :         if (Denorms.getValueAsString().equals("ieee")) {
   18669             :           // fabs(X) < SmallestNormal ? 0.0 : Est
   18670           6 :           const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
   18671             :           APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
   18672           6 :           SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
   18673           6 :           SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
   18674          12 :           SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
   18675           6 :           SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
   18676          12 :           Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
   18677           6 :           AddToWorklist(Fabs.getNode());
   18678           6 :           AddToWorklist(IsDenorm.getNode());
   18679           6 :           AddToWorklist(Est.getNode());
   18680             :         } else {
   18681             :           // X == 0.0 ? 0.0 : Est
   18682          30 :           SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
   18683          30 :           SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
   18684          60 :           Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
   18685          30 :           AddToWorklist(IsZero.getNode());
   18686          30 :           AddToWorklist(Est.getNode());
   18687             :         }
   18688             :       }
   18689             :     }
   18690         115 :     return Est;
   18691             :   }
   18692             : 
   18693          93 :   return SDValue();
   18694             : }
   18695             : 
   18696             : SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
   18697         130 :   return buildSqrtEstimateImpl(Op, Flags, true);
   18698             : }
   18699             : 
   18700             : SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
   18701         238 :   return buildSqrtEstimateImpl(Op, Flags, false);
   18702             : }
   18703             : 
   18704             : /// Return true if there is any possibility that the two addresses overlap.
   18705           0 : bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
   18706             :   // If they are the same then they must be aliases.
   18707           0 :   if (Op0->getBasePtr() == Op1->getBasePtr()) return true;
   18708             : 
   18709             :   // If they are both volatile then they cannot be reordered.
   18710           0 :   if (Op0->isVolatile() && Op1->isVolatile()) return true;
   18711             : 
   18712             :   // If one operation reads from invariant memory, and the other may store, they
   18713             :   // cannot alias. These should really be checking the equivalent of mayWrite,
   18714             :   // but it only matters for memory nodes other than load /store.
   18715           0 :   if (Op0->isInvariant() && Op1->writeMem())
   18716           0 :     return false;
   18717             : 
   18718           0 :   if (Op1->isInvariant() && Op0->writeMem())
   18719           0 :     return false;
   18720             : 
   18721           0 :   unsigned NumBytes0 = Op0->getMemoryVT().getStoreSize();
   18722           0 :   unsigned NumBytes1 = Op1->getMemoryVT().getStoreSize();
   18723             : 
   18724             :   // Check for BaseIndexOffset matching.
   18725           0 :   BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0, DAG);
   18726           0 :   BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1, DAG);
   18727             :   int64_t PtrDiff;
   18728           0 :   if (BasePtr0.getBase().getNode() && BasePtr1.getBase().getNode()) {
   18729           0 :     if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))
   18730           0 :       return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0));
   18731             : 
   18732             :     // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
   18733             :     // able to calculate their relative offset if at least one arises
   18734             :     // from an alloca. However, these allocas cannot overlap and we
   18735             :     // can infer there is no alias.
   18736           0 :     if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase()))
   18737           0 :       if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) {
   18738           0 :         MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
   18739             :         // If the base are the same frame index but the we couldn't find a
   18740             :         // constant offset, (indices are different) be conservative.
   18741           0 :         if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) ||
   18742           0 :                        !MFI.isFixedObjectIndex(B->getIndex())))
   18743           0 :           return false;
   18744             :       }
   18745             : 
   18746           0 :     bool IsFI0 = isa<FrameIndexSDNode>(BasePtr0.getBase());
   18747           0 :     bool IsFI1 = isa<FrameIndexSDNode>(BasePtr1.getBase());
   18748           0 :     bool IsGV0 = isa<GlobalAddressSDNode>(BasePtr0.getBase());
   18749           0 :     bool IsGV1 = isa<GlobalAddressSDNode>(BasePtr1.getBase());
   18750           0 :     bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase());
   18751           0 :     bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase());
   18752             : 
   18753             :     // If of mismatched base types or checkable indices we can check
   18754             :     // they do not alias.
   18755           0 :     if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) ||
   18756           0 :          (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) &&
   18757           0 :         (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1))
   18758           0 :       return false;
   18759             :   }
   18760             : 
   18761             :   // If we know required SrcValue1 and SrcValue2 have relatively large
   18762             :   // alignment compared to the size and offset of the access, we may be able
   18763             :   // to prove they do not alias. This check is conservative for now to catch
   18764             :   // cases created by splitting vector types.
   18765           0 :   int64_t SrcValOffset0 = Op0->getSrcValueOffset();
   18766           0 :   int64_t SrcValOffset1 = Op1->getSrcValueOffset();
   18767             :   unsigned OrigAlignment0 = Op0->getOriginalAlignment();
   18768             :   unsigned OrigAlignment1 = Op1->getOriginalAlignment();
   18769           0 :   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
   18770           0 :       NumBytes0 == NumBytes1 && OrigAlignment0 > NumBytes0) {
   18771           0 :     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
   18772           0 :     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
   18773             : 
   18774             :     // There is no overlap between these relatively aligned accesses of
   18775             :     // similar size. Return no alias.
   18776           0 :     if ((OffAlign0 + NumBytes0) <= OffAlign1 ||
   18777           0 :         (OffAlign1 + NumBytes1) <= OffAlign0)
   18778           0 :       return false;
   18779             :   }
   18780             : 
   18781           0 :   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
   18782           0 :                    ? CombinerGlobalAA
   18783           0 :                    : DAG.getSubtarget().useAA();
   18784             : #ifndef NDEBUG
   18785             :   if (CombinerAAOnlyFunc.getNumOccurrences() &&
   18786             :       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
   18787             :     UseAA = false;
   18788             : #endif
   18789             : 
   18790           0 :   if (UseAA && AA &&
   18791           0 :       Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
   18792             :     // Use alias analysis information.
   18793           0 :     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
   18794           0 :     int64_t Overlap0 = NumBytes0 + SrcValOffset0 - MinOffset;
   18795           0 :     int64_t Overlap1 = NumBytes1 + SrcValOffset1 - MinOffset;
   18796             :     AliasResult AAResult =
   18797           0 :         AA->alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0,
   18798           0 :                                  UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
   18799           0 :                   MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1,
   18800           0 :                                  UseTBAA ? Op1->getAAInfo() : AAMDNodes()) );
   18801           0 :     if (AAResult == NoAlias)
   18802           0 :       return false;
   18803             :   }
   18804             : 
   18805             :   // Otherwise we have to assume they alias.
   18806             :   return true;
   18807             : }
   18808             : 
   18809             : /// Walk up chain skipping non-aliasing memory nodes,
   18810             : /// looking for aliasing nodes and adding them to the Aliases vector.
   18811     6560055 : void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
   18812             :                                    SmallVectorImpl<SDValue> &Aliases) {
   18813             :   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
   18814             :   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
   18815             : 
   18816             :   // Get alias information for node.
   18817     6560055 :   bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile();
   18818             : 
   18819             :   // Starting off.
   18820     6560055 :   Chains.push_back(OriginalChain);
   18821             :   unsigned Depth = 0;
   18822             : 
   18823             :   // Look at each chain and determine if it is an alias.  If so, add it to the
   18824             :   // aliases list.  If not, then continue up the chain looking for the next
   18825             :   // candidate.
   18826    21458263 :   while (!Chains.empty()) {
   18827    15085585 :     SDValue Chain = Chains.pop_back_val();
   18828             : 
   18829             :     // For TokenFactor nodes, look at each operand and only continue up the
   18830             :     // chain until we reach the depth limit.
   18831             :     //
   18832             :     // FIXME: The depth check could be made to return the last non-aliasing
   18833             :     // chain we found before we hit a tokenfactor rather than the original
   18834             :     // chain.
   18835    15085585 :     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
   18836             :       Aliases.clear();
   18837      187377 :       Aliases.push_back(OriginalChain);
   18838      187377 :       return;
   18839             :     }
   18840             : 
   18841             :     // Don't bother if we've been before.
   18842    14898208 :     if (!Visited.insert(Chain.getNode()).second)
   18843      387763 :       continue;
   18844             : 
   18845    29020890 :     switch (Chain.getOpcode()) {
   18846             :     case ISD::EntryToken:
   18847             :       // Entry token is ideal chain operand, but handled in FindBetterChain.
   18848             :       break;
   18849             : 
   18850     8801760 :     case ISD::LOAD:
   18851             :     case ISD::STORE: {
   18852             :       // Get alias information for Chain.
   18853     8801760 :       bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) &&
   18854     4290841 :           !cast<LSBaseSDNode>(Chain.getNode())->isVolatile();
   18855             : 
   18856             :       // If chain is alias then stop here.
   18857    17216372 :       if (!(IsLoad && IsOpLoad) &&
   18858     8414612 :           isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) {
   18859     4764900 :         Aliases.push_back(Chain);
   18860             :       } else {
   18861             :         // Look further up the chain.
   18862     8073720 :         Chains.push_back(Chain.getOperand(0));
   18863     4036860 :         ++Depth;
   18864             :       }
   18865             :       break;
   18866             :     }
   18867             : 
   18868     2278834 :     case ISD::TokenFactor:
   18869             :       // We have to check each of the operands of the token factor for "small"
   18870             :       // token factors, so we queue them up.  Adding the operands to the queue
   18871             :       // (stack) in reverse order maintains the original order and increases the
   18872             :       // likelihood that getNode will find a matching token factor (CSE.)
   18873     2278834 :       if (Chain.getNumOperands() > 16) {
   18874       61863 :         Aliases.push_back(Chain);
   18875       61863 :         break;
   18876             :       }
   18877     8834928 :       for (unsigned n = Chain.getNumOperands(); n;)
   18878    15452885 :         Chains.push_back(Chain.getOperand(--n));
   18879     2216971 :       ++Depth;
   18880     2216971 :       break;
   18881             : 
   18882       45016 :     case ISD::CopyFromReg:
   18883             :       // Forward past CopyFromReg.
   18884       45016 :       Chains.push_back(Chain.getOperand(0));
   18885       45016 :       ++Depth;
   18886       45016 :       break;
   18887             : 
   18888     1113133 :     default:
   18889             :       // For all other instructions we will just have to take what we can get.
   18890     1113133 :       Aliases.push_back(Chain);
   18891     1113133 :       break;
   18892             :     }
   18893             :   }
   18894             : }
   18895             : 
   18896             : /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
   18897             : /// (aliasing node.)
   18898    10404506 : SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
   18899    10404506 :   if (OptLevel == CodeGenOpt::None)
   18900     3844451 :     return OldChain;
   18901             : 
   18902             :   // Ops for replacing token factor.
   18903             :   SmallVector<SDValue, 8> Aliases;
   18904             : 
   18905             :   // Accumulate all the aliases to this node.
   18906     6560055 :   GatherAllAliases(N, OldChain, Aliases);
   18907             : 
   18908             :   // If no operands then chain to entry token.
   18909    13120110 :   if (Aliases.size() == 0)
   18910     2011188 :     return DAG.getEntryNode();
   18911             : 
   18912             :   // If a single operand then chain to it.  We don't need to revisit it.
   18913     4548867 :   if (Aliases.size() == 1)
   18914     3812138 :     return Aliases[0];
   18915             : 
   18916             :   // Construct a custom tailored token factor.
   18917     2047551 :   return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
   18918             : }
   18919             : 
   18920             : // This function tries to collect a bunch of potentially interesting
   18921             : // nodes to improve the chains of, all at once. This might seem
   18922             : // redundant, as this function gets called when visiting every store
   18923             : // node, so why not let the work be done on each store as it's visited?
   18924             : //
   18925             : // I believe this is mainly important because MergeConsecutiveStores
   18926             : // is unable to deal with merging stores of different sizes, so unless
   18927             : // we improve the chains of all the potential candidates up-front
   18928             : // before running MergeConsecutiveStores, it might only see some of
   18929             : // the nodes that will eventually be candidates, and then not be able
   18930             : // to go from a partially-merged state to the desired final
   18931             : // fully-merged state.
   18932     7832510 : bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
   18933     7832510 :   if (OptLevel == CodeGenOpt::None)
   18934             :     return false;
   18935             : 
   18936             :   // This holds the base pointer, index, and the offset in bytes from the base
   18937             :   // pointer.
   18938     3619287 :   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
   18939             : 
   18940             :   // We must have a base and an offset.
   18941     3619287 :   if (!BasePtr.getBase().getNode())
   18942             :     return false;
   18943             : 
   18944             :   // Do not handle stores to undef base pointers.
   18945     3619287 :   if (BasePtr.getBase().isUndef())
   18946             :     return false;
   18947             : 
   18948             :   SmallVector<StoreSDNode *, 8> ChainedStores;
   18949     3607780 :   ChainedStores.push_back(St);
   18950             : 
   18951             :   // Walk up the chain and look for nodes with offsets from the same
   18952             :   // base pointer. Stop when reaching an instruction with a different kind
   18953             :   // or instruction which has a different base pointer.
   18954     3607780 :   StoreSDNode *Index = St;
   18955     7226774 :   while (Index) {
   18956             :     // If the chain has more than one use, then we can't reorder the mem ops.
   18957     4055512 :     if (Index != St && !SDValue(Index, 0)->hasOneUse())
   18958             :       break;
   18959             : 
   18960     3897033 :     if (Index->isVolatile() || Index->isIndexed())
   18961             :       break;
   18962             : 
   18963             :     // Find the base pointer and offset for this memory node.
   18964     3860421 :     BaseIndexOffset Ptr = BaseIndexOffset::match(Index, DAG);
   18965             : 
   18966             :     // Check that the base pointer is the same as the original one.
   18967     3860421 :     if (!BasePtr.equalBaseIndex(Ptr, DAG))
   18968             :       break;
   18969             : 
   18970             :     // Walk up the chain to find the next store node, ignoring any
   18971             :     // intermediate loads. Any other kind of node will halt the loop.
   18972     3622697 :     SDNode *NextInChain = Index->getChain().getNode();
   18973             :     while (true) {
   18974     5523810 :       if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
   18975             :         // We found a store node. Use it for the next iteration.
   18976      451435 :         if (STn->isVolatile() || STn->isIndexed()) {
   18977             :           Index = nullptr;
   18978     3622697 :           break;
   18979             :         }
   18980      447732 :         ChainedStores.push_back(STn);
   18981      447732 :         Index = STn;
   18982      447732 :         break;
   18983             :       } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
   18984     1901113 :         NextInChain = Ldn->getChain().getNode();
   18985     1901113 :         continue;
   18986             :       } else {
   18987             :         Index = nullptr;
   18988             :         break;
   18989             :       }
   18990             :     }// end while
   18991             :   }
   18992             : 
   18993             :   // At this point, ChainedStores lists all of the Store nodes
   18994             :   // reachable by iterating up through chain nodes matching the above
   18995             :   // conditions.  For each such store identified, try to find an
   18996             :   // earlier chain to attach the store to which won't violate the
   18997             :   // required ordering.
   18998             :   bool MadeChangeToSt = false;
   18999             :   SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
   19000             : 
   19001     7663292 :   for (StoreSDNode *ChainedStore : ChainedStores) {
   19002     4055512 :     SDValue Chain = ChainedStore->getChain();
   19003     4055512 :     SDValue BetterChain = FindBetterChain(ChainedStore, Chain);
   19004             : 
   19005             :     if (Chain != BetterChain) {
   19006      420021 :       if (ChainedStore == St)
   19007             :         MadeChangeToSt = true;
   19008      420021 :       BetterChains.push_back(std::make_pair(ChainedStore, BetterChain));
   19009             :     }
   19010             :   }
   19011             : 
   19012             :   // Do all replacements after finding the replacements to make to avoid making
   19013             :   // the chains more complicated by introducing new TokenFactors.
   19014     4027801 :   for (auto Replacement : BetterChains)
   19015      420021 :     replaceStoreChain(Replacement.first, Replacement.second);
   19016             : 
   19017             :   return MadeChangeToSt;
   19018             : }
   19019             : 
   19020             : /// This is the entry point for the file.
   19021     2767992 : void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
   19022             :                            CodeGenOpt::Level OptLevel) {
   19023             :   /// This is the main entry point to this class.
   19024     2767992 :   DAGCombiner(*this, AA, OptLevel).Run(Level);
   19025     2767992 : }

Generated by: LCOV version 1.13