LLVM  8.0.0svn
DAGCombiner.cpp
Go to the documentation of this file.
1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
11 // both before and after the DAG is legalized.
12 //
13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
14 // primarily intended to handle simplification opportunities that are implicit
15 // in the LLVM IR and exposed by the various codegen lowering phases.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SetVector.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/Statistic.h"
48 #include "llvm/IR/Attributes.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/DataLayout.h"
51 #include "llvm/IR/DerivedTypes.h"
52 #include "llvm/IR/Function.h"
53 #include "llvm/IR/LLVMContext.h"
54 #include "llvm/IR/Metadata.h"
55 #include "llvm/Support/Casting.h"
56 #include "llvm/Support/CodeGen.h"
58 #include "llvm/Support/Compiler.h"
59 #include "llvm/Support/Debug.h"
61 #include "llvm/Support/KnownBits.h"
67 #include <algorithm>
68 #include <cassert>
69 #include <cstdint>
70 #include <functional>
71 #include <iterator>
72 #include <string>
73 #include <tuple>
74 #include <utility>
75 
76 using namespace llvm;
77 
78 #define DEBUG_TYPE "dagcombine"
79 
80 STATISTIC(NodesCombined , "Number of dag nodes combined");
81 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
82 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
83 STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
84 STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
85 STATISTIC(SlicedLoads, "Number of load sliced");
86 
87 static cl::opt<bool>
88 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
89  cl::desc("Enable DAG combiner's use of IR alias analysis"));
90 
91 static cl::opt<bool>
92 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
93  cl::desc("Enable DAG combiner's use of TBAA"));
94 
95 #ifndef NDEBUG
97 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
98  cl::desc("Only use DAG-combiner alias analysis in this"
99  " function"));
100 #endif
101 
102 /// Hidden option to stress test load slicing, i.e., when this option
103 /// is enabled, load slicing bypasses most of its profitability guards.
104 static cl::opt<bool>
105 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
106  cl::desc("Bypass the profitability model of load slicing"),
107  cl::init(false));
108 
109 static cl::opt<bool>
110  MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
111  cl::desc("DAG combiner may split indexing from loads"));
112 
113 namespace {
114 
115  class DAGCombiner {
116  SelectionDAG &DAG;
117  const TargetLowering &TLI;
119  CodeGenOpt::Level OptLevel;
120  bool LegalOperations = false;
121  bool LegalTypes = false;
122  bool ForCodeSize;
123 
124  /// Worklist of all of the nodes that need to be simplified.
125  ///
126  /// This must behave as a stack -- new nodes to process are pushed onto the
127  /// back and when processing we pop off of the back.
128  ///
129  /// The worklist will not contain duplicates but may contain null entries
130  /// due to nodes being deleted from the underlying DAG.
131  SmallVector<SDNode *, 64> Worklist;
132 
133  /// Mapping from an SDNode to its position on the worklist.
134  ///
135  /// This is used to find and remove nodes from the worklist (by nulling
136  /// them) when they are deleted from the underlying DAG. It relies on
137  /// stable indices of nodes within the worklist.
138  DenseMap<SDNode *, unsigned> WorklistMap;
139 
140  /// Set of nodes which have been combined (at least once).
141  ///
142  /// This is used to allow us to reliably add any operands of a DAG node
143  /// which have not yet been combined to the worklist.
144  SmallPtrSet<SDNode *, 32> CombinedNodes;
145 
146  // AA - Used for DAG load/store alias analysis.
147  AliasAnalysis *AA;
148 
149  /// When an instruction is simplified, add all users of the instruction to
150  /// the work lists because they might get more simplified now.
151  void AddUsersToWorklist(SDNode *N) {
152  for (SDNode *Node : N->uses())
153  AddToWorklist(Node);
154  }
155 
156  /// Call the node-specific routine that folds each particular type of node.
157  SDValue visit(SDNode *N);
158 
159  public:
160  DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
161  : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
162  OptLevel(OL), AA(AA) {
163  ForCodeSize = DAG.getMachineFunction().getFunction().optForSize();
164 
165  MaximumLegalStoreInBits = 0;
166  for (MVT VT : MVT::all_valuetypes())
167  if (EVT(VT).isSimple() && VT != MVT::Other &&
168  TLI.isTypeLegal(EVT(VT)) &&
169  VT.getSizeInBits() >= MaximumLegalStoreInBits)
170  MaximumLegalStoreInBits = VT.getSizeInBits();
171  }
172 
173  /// Add to the worklist making sure its instance is at the back (next to be
174  /// processed.)
175  void AddToWorklist(SDNode *N) {
177  "Deleted Node added to Worklist");
178 
179  // Skip handle nodes as they can't usefully be combined and confuse the
180  // zero-use deletion strategy.
181  if (N->getOpcode() == ISD::HANDLENODE)
182  return;
183 
184  if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
185  Worklist.push_back(N);
186  }
187 
188  /// Remove all instances of N from the worklist.
189  void removeFromWorklist(SDNode *N) {
190  CombinedNodes.erase(N);
191 
192  auto It = WorklistMap.find(N);
193  if (It == WorklistMap.end())
194  return; // Not in the worklist.
195 
196  // Null out the entry rather than erasing it to avoid a linear operation.
197  Worklist[It->second] = nullptr;
198  WorklistMap.erase(It);
199  }
200 
201  void deleteAndRecombine(SDNode *N);
202  bool recursivelyDeleteUnusedNodes(SDNode *N);
203 
204  /// Replaces all uses of the results of one DAG node with new values.
205  SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
206  bool AddTo = true);
207 
208  /// Replaces all uses of the results of one DAG node with new values.
209  SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
210  return CombineTo(N, &Res, 1, AddTo);
211  }
212 
213  /// Replaces all uses of the results of one DAG node with new values.
214  SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
215  bool AddTo = true) {
216  SDValue To[] = { Res0, Res1 };
217  return CombineTo(N, To, 2, AddTo);
218  }
219 
220  void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
221 
222  private:
223  unsigned MaximumLegalStoreInBits;
224 
225  /// Check the specified integer node value to see if it can be simplified or
226  /// if things it uses can be simplified by bit propagation.
227  /// If so, return true.
228  bool SimplifyDemandedBits(SDValue Op) {
229  unsigned BitWidth = Op.getScalarValueSizeInBits();
230  APInt Demanded = APInt::getAllOnesValue(BitWidth);
231  return SimplifyDemandedBits(Op, Demanded);
232  }
233 
234  /// Check the specified vector node value to see if it can be simplified or
235  /// if things it uses can be simplified as it only uses some of the
236  /// elements. If so, return true.
237  bool SimplifyDemandedVectorElts(SDValue Op) {
238  unsigned NumElts = Op.getValueType().getVectorNumElements();
239  APInt Demanded = APInt::getAllOnesValue(NumElts);
240  return SimplifyDemandedVectorElts(Op, Demanded);
241  }
242 
243  bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
244  bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
245  bool AssumeSingleUse = false);
246 
247  bool CombineToPreIndexedLoadStore(SDNode *N);
248  bool CombineToPostIndexedLoadStore(SDNode *N);
249  SDValue SplitIndexingFromLoad(LoadSDNode *LD);
250  bool SliceUpLoad(SDNode *N);
251 
252  /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
253  /// load.
254  ///
255  /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
256  /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
257  /// \param EltNo index of the vector element to load.
258  /// \param OriginalLoad load that EVE came from to be replaced.
259  /// \returns EVE on success SDValue() on failure.
260  SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
261  SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
262  void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
263  SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
264  SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
265  SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
266  SDValue PromoteIntBinOp(SDValue Op);
267  SDValue PromoteIntShiftOp(SDValue Op);
268  SDValue PromoteExtend(SDValue Op);
269  bool PromoteLoad(SDValue Op);
270 
271  /// Call the node-specific routine that knows how to fold each
272  /// particular type of node. If that doesn't do anything, try the
273  /// target-specific DAG combines.
274  SDValue combine(SDNode *N);
275 
276  // Visitation implementation - Implement dag node combining for different
277  // node types. The semantics are as follows:
278  // Return Value:
279  // SDValue.getNode() == 0 - No change was made
280  // SDValue.getNode() == N - N was replaced, is dead and has been handled.
281  // otherwise - N should be replaced by the returned Operand.
282  //
283  SDValue visitTokenFactor(SDNode *N);
284  SDValue visitMERGE_VALUES(SDNode *N);
285  SDValue visitADD(SDNode *N);
286  SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
287  SDValue visitSUB(SDNode *N);
288  SDValue visitADDC(SDNode *N);
289  SDValue visitUADDO(SDNode *N);
290  SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
291  SDValue visitSUBC(SDNode *N);
292  SDValue visitUSUBO(SDNode *N);
293  SDValue visitADDE(SDNode *N);
294  SDValue visitADDCARRY(SDNode *N);
295  SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
296  SDValue visitSUBE(SDNode *N);
297  SDValue visitSUBCARRY(SDNode *N);
298  SDValue visitMUL(SDNode *N);
299  SDValue useDivRem(SDNode *N);
300  SDValue visitSDIV(SDNode *N);
301  SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
302  SDValue visitUDIV(SDNode *N);
303  SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
304  SDValue visitREM(SDNode *N);
305  SDValue visitMULHU(SDNode *N);
306  SDValue visitMULHS(SDNode *N);
307  SDValue visitSMUL_LOHI(SDNode *N);
308  SDValue visitUMUL_LOHI(SDNode *N);
309  SDValue visitSMULO(SDNode *N);
310  SDValue visitUMULO(SDNode *N);
311  SDValue visitIMINMAX(SDNode *N);
312  SDValue visitAND(SDNode *N);
313  SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
314  SDValue visitOR(SDNode *N);
315  SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
316  SDValue visitXOR(SDNode *N);
317  SDValue SimplifyVBinOp(SDNode *N);
318  SDValue visitSHL(SDNode *N);
319  SDValue visitSRA(SDNode *N);
320  SDValue visitSRL(SDNode *N);
321  SDValue visitRotate(SDNode *N);
322  SDValue visitABS(SDNode *N);
323  SDValue visitBSWAP(SDNode *N);
324  SDValue visitBITREVERSE(SDNode *N);
325  SDValue visitCTLZ(SDNode *N);
326  SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
327  SDValue visitCTTZ(SDNode *N);
328  SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
329  SDValue visitCTPOP(SDNode *N);
330  SDValue visitSELECT(SDNode *N);
331  SDValue visitVSELECT(SDNode *N);
332  SDValue visitSELECT_CC(SDNode *N);
333  SDValue visitSETCC(SDNode *N);
334  SDValue visitSETCCCARRY(SDNode *N);
335  SDValue visitSIGN_EXTEND(SDNode *N);
336  SDValue visitZERO_EXTEND(SDNode *N);
337  SDValue visitANY_EXTEND(SDNode *N);
338  SDValue visitAssertExt(SDNode *N);
339  SDValue visitSIGN_EXTEND_INREG(SDNode *N);
340  SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
341  SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
342  SDValue visitTRUNCATE(SDNode *N);
343  SDValue visitBITCAST(SDNode *N);
344  SDValue visitBUILD_PAIR(SDNode *N);
345  SDValue visitFADD(SDNode *N);
346  SDValue visitFSUB(SDNode *N);
347  SDValue visitFMUL(SDNode *N);
348  SDValue visitFMA(SDNode *N);
349  SDValue visitFDIV(SDNode *N);
350  SDValue visitFREM(SDNode *N);
351  SDValue visitFSQRT(SDNode *N);
352  SDValue visitFCOPYSIGN(SDNode *N);
353  SDValue visitSINT_TO_FP(SDNode *N);
354  SDValue visitUINT_TO_FP(SDNode *N);
355  SDValue visitFP_TO_SINT(SDNode *N);
356  SDValue visitFP_TO_UINT(SDNode *N);
357  SDValue visitFP_ROUND(SDNode *N);
358  SDValue visitFP_ROUND_INREG(SDNode *N);
359  SDValue visitFP_EXTEND(SDNode *N);
360  SDValue visitFNEG(SDNode *N);
361  SDValue visitFABS(SDNode *N);
362  SDValue visitFCEIL(SDNode *N);
363  SDValue visitFTRUNC(SDNode *N);
364  SDValue visitFFLOOR(SDNode *N);
365  SDValue visitFMINNUM(SDNode *N);
366  SDValue visitFMAXNUM(SDNode *N);
367  SDValue visitBRCOND(SDNode *N);
368  SDValue visitBR_CC(SDNode *N);
369  SDValue visitLOAD(SDNode *N);
370 
371  SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
372  SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
373 
374  SDValue visitSTORE(SDNode *N);
375  SDValue visitINSERT_VECTOR_ELT(SDNode *N);
376  SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
377  SDValue visitBUILD_VECTOR(SDNode *N);
378  SDValue visitCONCAT_VECTORS(SDNode *N);
379  SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
380  SDValue visitVECTOR_SHUFFLE(SDNode *N);
381  SDValue visitSCALAR_TO_VECTOR(SDNode *N);
382  SDValue visitINSERT_SUBVECTOR(SDNode *N);
383  SDValue visitMLOAD(SDNode *N);
384  SDValue visitMSTORE(SDNode *N);
385  SDValue visitMGATHER(SDNode *N);
386  SDValue visitMSCATTER(SDNode *N);
387  SDValue visitFP_TO_FP16(SDNode *N);
388  SDValue visitFP16_TO_FP(SDNode *N);
389 
390  SDValue visitFADDForFMACombine(SDNode *N);
391  SDValue visitFSUBForFMACombine(SDNode *N);
392  SDValue visitFMULForFMADistributiveCombine(SDNode *N);
393 
394  SDValue XformToShuffleWithZero(SDNode *N);
395  SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
396  SDValue N1, SDNodeFlags Flags);
397 
398  SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
399 
400  SDValue foldSelectOfConstants(SDNode *N);
401  SDValue foldVSelectOfConstants(SDNode *N);
402  SDValue foldBinOpIntoSelect(SDNode *BO);
403  bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
404  SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
405  SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
406  SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
407  SDValue N2, SDValue N3, ISD::CondCode CC,
408  bool NotExtCompare = false);
409  SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
410  SDValue N2, SDValue N3, ISD::CondCode CC);
411  SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
412  const SDLoc &DL);
413  SDValue unfoldMaskedMerge(SDNode *N);
414  SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
415  SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
416  const SDLoc &DL, bool foldBooleans);
417  SDValue rebuildSetCC(SDValue N);
418 
419  bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
420  SDValue &CC) const;
421  bool isOneUseSetCC(SDValue N) const;
422 
423  SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
424  unsigned HiOp);
425  SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
426  SDValue CombineExtLoad(SDNode *N);
427  SDValue CombineZExtLogicopShiftLoad(SDNode *N);
428  SDValue combineRepeatedFPDivisors(SDNode *N);
429  SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
430  SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
431  SDValue BuildSDIV(SDNode *N);
432  SDValue BuildSDIVPow2(SDNode *N);
433  SDValue BuildUDIV(SDNode *N);
434  SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
435  SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
436  SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
437  SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
438  SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
439  SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
440  SDNodeFlags Flags, bool Reciprocal);
441  SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
442  SDNodeFlags Flags, bool Reciprocal);
443  SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
444  bool DemandHighBits = true);
445  SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
446  SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
447  SDValue InnerPos, SDValue InnerNeg,
448  unsigned PosOpcode, unsigned NegOpcode,
449  const SDLoc &DL);
450  SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
451  SDValue MatchLoadCombine(SDNode *N);
452  SDValue ReduceLoadWidth(SDNode *N);
453  SDValue ReduceLoadOpStoreWidth(SDNode *N);
455  SDValue TransformFPLoadStorePair(SDNode *N);
456  SDValue convertBuildVecZextToZext(SDNode *N);
457  SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
458  SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
459  SDValue reduceBuildVecToShuffle(SDNode *N);
460  SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
461  ArrayRef<int> VectorMask, SDValue VecIn1,
462  SDValue VecIn2, unsigned LeftIdx);
463  SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
464 
465  /// Walk up chain skipping non-aliasing memory nodes,
466  /// looking for aliasing nodes and adding them to the Aliases vector.
467  void GatherAllAliases(SDNode *N, SDValue OriginalChain,
468  SmallVectorImpl<SDValue> &Aliases);
469 
470  /// Return true if there is any possibility that the two addresses overlap.
471  bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
472 
473  /// Walk up chain skipping non-aliasing memory nodes, looking for a better
474  /// chain (aliasing node.)
475  SDValue FindBetterChain(SDNode *N, SDValue Chain);
476 
477  /// Try to replace a store and any possibly adjacent stores on
478  /// consecutive chains with better chains. Return true only if St is
479  /// replaced.
480  ///
481  /// Notice that other chains may still be replaced even if the function
482  /// returns false.
483  bool findBetterNeighborChains(StoreSDNode *St);
484 
485  /// Holds a pointer to an LSBaseSDNode as well as information on where it
486  /// is located in a sequence of memory operations connected by a chain.
487  struct MemOpLink {
488  // Ptr to the mem node.
489  LSBaseSDNode *MemNode;
490 
491  // Offset from the base ptr.
492  int64_t OffsetFromBase;
493 
494  MemOpLink(LSBaseSDNode *N, int64_t Offset)
495  : MemNode(N), OffsetFromBase(Offset) {}
496  };
497 
498  /// This is a helper function for visitMUL to check the profitability
499  /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
500  /// MulNode is the original multiply, AddNode is (add x, c1),
501  /// and ConstNode is c2.
502  bool isMulAddWithConstProfitable(SDNode *MulNode,
503  SDValue &AddNode,
504  SDValue &ConstNode);
505 
506  /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
507  /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
508  /// the type of the loaded value to be extended.
509  bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
510  EVT LoadResultTy, EVT &ExtVT);
511 
512  /// Helper function to calculate whether the given Load/Store can have its
513  /// width reduced to ExtVT.
514  bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
515  EVT &MemVT, unsigned ShAmt = 0);
516 
517  /// Used by BackwardsPropagateMask to find suitable loads.
518  bool SearchForAndLoads(SDNode *N, SmallPtrSetImpl<LoadSDNode*> &Loads,
519  SmallPtrSetImpl<SDNode*> &NodesWithConsts,
520  ConstantSDNode *Mask, SDNode *&NodeToMask);
521  /// Attempt to propagate a given AND node back to load leaves so that they
522  /// can be combined into narrow loads.
523  bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
524 
525  /// Helper function for MergeConsecutiveStores which merges the
526  /// component store chains.
527  SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
528  unsigned NumStores);
529 
530  /// This is a helper function for MergeConsecutiveStores. When the
531  /// source elements of the consecutive stores are all constants or
532  /// all extracted vector elements, try to merge them into one
533  /// larger store introducing bitcasts if necessary. \return True
534  /// if a merged store was created.
535  bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
536  EVT MemVT, unsigned NumStores,
537  bool IsConstantSrc, bool UseVector,
538  bool UseTrunc);
539 
540  /// This is a helper function for MergeConsecutiveStores. Stores
541  /// that potentially may be merged with St are placed in
542  /// StoreNodes. RootNode is a chain predecessor to all store
543  /// candidates.
544  void getStoreMergeCandidates(StoreSDNode *St,
545  SmallVectorImpl<MemOpLink> &StoreNodes,
546  SDNode *&Root);
547 
548  /// Helper function for MergeConsecutiveStores. Checks if
549  /// candidate stores have indirect dependency through their
550  /// operands. RootNode is the predecessor to all stores calculated
551  /// by getStoreMergeCandidates and is used to prune the dependency check.
552  /// \return True if safe to merge.
553  bool checkMergeStoreCandidatesForDependencies(
554  SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
555  SDNode *RootNode);
556 
557  /// Merge consecutive store operations into a wide store.
558  /// This optimization uses wide integers or vectors when possible.
559  /// \return number of stores that were merged into a merged store (the
560  /// affected nodes are stored as a prefix in \p StoreNodes).
561  bool MergeConsecutiveStores(StoreSDNode *St);
562 
563  /// Try to transform a truncation where C is a constant:
564  /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
565  ///
566  /// \p N needs to be a truncation and its first operand an AND. Other
567  /// requirements are checked by the function (e.g. that trunc is
568  /// single-use) and if missed an empty SDValue is returned.
569  SDValue distributeTruncateThroughAnd(SDNode *N);
570 
571  /// Helper function to determine whether the target supports operation
572  /// given by \p Opcode for type \p VT, that is, whether the operation
573  /// is legal or custom before legalizing operations, and whether is
574  /// legal (but not custom) after legalization.
575  bool hasOperation(unsigned Opcode, EVT VT) {
576  if (LegalOperations)
577  return TLI.isOperationLegal(Opcode, VT);
578  return TLI.isOperationLegalOrCustom(Opcode, VT);
579  }
580 
581  public:
582  /// Runs the dag combiner on all nodes in the work list
583  void Run(CombineLevel AtLevel);
584 
585  SelectionDAG &getDAG() const { return DAG; }
586 
587  /// Returns a type large enough to hold any valid shift amount - before type
588  /// legalization these can be huge.
589  EVT getShiftAmountTy(EVT LHSTy) {
590  assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
591  return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
592  }
593 
594  /// This method returns true if we are running before type legalization or
595  /// if the specified VT is legal.
596  bool isTypeLegal(const EVT &VT) {
597  if (!LegalTypes) return true;
598  return TLI.isTypeLegal(VT);
599  }
600 
601  /// Convenience wrapper around TargetLowering::getSetCCResultType
602  EVT getSetCCResultType(EVT VT) const {
603  return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
604  }
605 
606  void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
607  SDValue OrigLoad, SDValue ExtLoad,
609  };
610 
611 /// This class is a DAGUpdateListener that removes any deleted
612 /// nodes from the worklist.
613 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
614  DAGCombiner &DC;
615 
616 public:
617  explicit WorklistRemover(DAGCombiner &dc)
618  : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
619 
620  void NodeDeleted(SDNode *N, SDNode *E) override {
621  DC.removeFromWorklist(N);
622  }
623 };
624 
625 } // end anonymous namespace
626 
627 //===----------------------------------------------------------------------===//
628 // TargetLowering::DAGCombinerInfo implementation
629 //===----------------------------------------------------------------------===//
630 
632  ((DAGCombiner*)DC)->AddToWorklist(N);
633 }
634 
636 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
637  return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
638 }
639 
641 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
642  return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
643 }
644 
646 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
647  return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
648 }
649 
652  return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
653 }
654 
655 //===----------------------------------------------------------------------===//
656 // Helper Functions
657 //===----------------------------------------------------------------------===//
658 
659 void DAGCombiner::deleteAndRecombine(SDNode *N) {
660  removeFromWorklist(N);
661 
662  // If the operands of this node are only used by the node, they will now be
663  // dead. Make sure to re-visit them and recursively delete dead nodes.
664  for (const SDValue &Op : N->ops())
665  // For an operand generating multiple values, one of the values may
666  // become dead allowing further simplification (e.g. split index
667  // arithmetic from an indexed load).
668  if (Op->hasOneUse() || Op->getNumValues() > 1)
669  AddToWorklist(Op.getNode());
670 
671  DAG.DeleteNode(N);
672 }
673 
674 /// Return 1 if we can compute the negated form of the specified expression for
675 /// the same cost as the expression itself, or 2 if we can compute the negated
676 /// form more cheaply than the expression itself.
677 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
678  const TargetLowering &TLI,
679  const TargetOptions *Options,
680  unsigned Depth = 0) {
681  // fneg is removable even if it has multiple uses.
682  if (Op.getOpcode() == ISD::FNEG) return 2;
683 
684  // Don't allow anything with multiple uses unless we know it is free.
685  EVT VT = Op.getValueType();
686  const SDNodeFlags Flags = Op->getFlags();
687  if (!Op.hasOneUse())
688  if (!(Op.getOpcode() == ISD::FP_EXTEND &&
689  TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
690  return 0;
691 
692  // Don't recurse exponentially.
693  if (Depth > 6) return 0;
694 
695  switch (Op.getOpcode()) {
696  default: return false;
697  case ISD::ConstantFP: {
698  if (!LegalOperations)
699  return 1;
700 
701  // Don't invert constant FP values after legalization unless the target says
702  // the negated constant is legal.
703  return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
704  TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
705  }
706  case ISD::FADD:
707  if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros())
708  return 0;
709 
710  // After operation legalization, it might not be legal to create new FSUBs.
711  if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
712  return 0;
713 
714  // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
715  if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
716  Options, Depth + 1))
717  return V;
718  // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
719  return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
720  Depth + 1);
721  case ISD::FSUB:
722  // We can't turn -(A-B) into B-A when we honor signed zeros.
723  if (!Options->NoSignedZerosFPMath &&
724  !Flags.hasNoSignedZeros())
725  return 0;
726 
727  // fold (fneg (fsub A, B)) -> (fsub B, A)
728  return 1;
729 
730  case ISD::FMUL:
731  case ISD::FDIV:
732  // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
733  if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
734  Options, Depth + 1))
735  return V;
736 
737  return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
738  Depth + 1);
739 
740  case ISD::FP_EXTEND:
741  case ISD::FP_ROUND:
742  case ISD::FSIN:
743  return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
744  Depth + 1);
745  }
746 }
747 
748 /// If isNegatibleForFree returns true, return the newly negated expression.
750  bool LegalOperations, unsigned Depth = 0) {
751  const TargetOptions &Options = DAG.getTarget().Options;
752  // fneg is removable even if it has multiple uses.
753  if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
754 
755  assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
756 
757  const SDNodeFlags Flags = Op.getNode()->getFlags();
758 
759  switch (Op.getOpcode()) {
760  default: llvm_unreachable("Unknown code");
761  case ISD::ConstantFP: {
762  APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
763  V.changeSign();
764  return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
765  }
766  case ISD::FADD:
767  assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros());
768 
769  // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
770  if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
771  DAG.getTargetLoweringInfo(), &Options, Depth+1))
772  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
773  GetNegatedExpression(Op.getOperand(0), DAG,
774  LegalOperations, Depth+1),
775  Op.getOperand(1), Flags);
776  // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
777  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
778  GetNegatedExpression(Op.getOperand(1), DAG,
779  LegalOperations, Depth+1),
780  Op.getOperand(0), Flags);
781  case ISD::FSUB:
782  // fold (fneg (fsub 0, B)) -> B
783  if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
784  if (N0CFP->isZero())
785  return Op.getOperand(1);
786 
787  // fold (fneg (fsub A, B)) -> (fsub B, A)
788  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
789  Op.getOperand(1), Op.getOperand(0), Flags);
790 
791  case ISD::FMUL:
792  case ISD::FDIV:
793  // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
794  if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
795  DAG.getTargetLoweringInfo(), &Options, Depth+1))
796  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
797  GetNegatedExpression(Op.getOperand(0), DAG,
798  LegalOperations, Depth+1),
799  Op.getOperand(1), Flags);
800 
801  // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
802  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
803  Op.getOperand(0),
804  GetNegatedExpression(Op.getOperand(1), DAG,
805  LegalOperations, Depth+1), Flags);
806 
807  case ISD::FP_EXTEND:
808  case ISD::FSIN:
809  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
810  GetNegatedExpression(Op.getOperand(0), DAG,
811  LegalOperations, Depth+1));
812  case ISD::FP_ROUND:
813  return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
814  GetNegatedExpression(Op.getOperand(0), DAG,
815  LegalOperations, Depth+1),
816  Op.getOperand(1));
817  }
818 }
819 
820 // APInts must be the same size for most operations, this helper
821 // function zero extends the shorter of the pair so that they match.
822 // We provide an Offset so that we can create bitwidths that won't overflow.
823 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
824  unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
825  LHS = LHS.zextOrSelf(Bits);
826  RHS = RHS.zextOrSelf(Bits);
827 }
828 
829 // Return true if this node is a setcc, or is a select_cc
830 // that selects between the target values used for true and false, making it
831 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
832 // the appropriate nodes based on the type of node we are checking. This
833 // simplifies life a bit for the callers.
834 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
835  SDValue &CC) const {
836  if (N.getOpcode() == ISD::SETCC) {
837  LHS = N.getOperand(0);
838  RHS = N.getOperand(1);
839  CC = N.getOperand(2);
840  return true;
841  }
842 
843  if (N.getOpcode() != ISD::SELECT_CC ||
844  !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
845  !TLI.isConstFalseVal(N.getOperand(3).getNode()))
846  return false;
847 
848  if (TLI.getBooleanContents(N.getValueType()) ==
850  return false;
851 
852  LHS = N.getOperand(0);
853  RHS = N.getOperand(1);
854  CC = N.getOperand(4);
855  return true;
856 }
857 
858 /// Return true if this is a SetCC-equivalent operation with only one use.
859 /// If this is true, it allows the users to invert the operation for free when
860 /// it is profitable to do so.
861 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
862  SDValue N0, N1, N2;
863  if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
864  return true;
865  return false;
866 }
867 
869  while (V.getOpcode() == ISD::BITCAST)
870  V = V.getOperand(0);
871  return V;
872 }
873 
874 // Returns the SDNode if it is a constant float BuildVector
875 // or constant float.
877  if (isa<ConstantFPSDNode>(N))
878  return N.getNode();
880  return N.getNode();
881  return nullptr;
882 }
883 
884 // Determines if it is a constant integer or a build vector of constant
885 // integers (and undefs).
886 // Do not permit build vector implicit truncation.
887 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
888  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
889  return !(Const->isOpaque() && NoOpaques);
890  if (N.getOpcode() != ISD::BUILD_VECTOR)
891  return false;
892  unsigned BitWidth = N.getScalarValueSizeInBits();
893  for (const SDValue &Op : N->op_values()) {
894  if (Op.isUndef())
895  continue;
897  if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
898  (Const->isOpaque() && NoOpaques))
899  return false;
900  }
901  return true;
902 }
903 
904 // Determines if it is a constant null integer or a splatted vector of a
905 // constant null integer (with no undefs).
906 // Build vector implicit truncation is not an issue for null values.
908  // TODO: may want to use peekThroughBitcast() here.
909  if (ConstantSDNode *Splat = isConstOrConstSplat(N))
910  return Splat->isNullValue();
911  return false;
912 }
913 
914 // Determines if it is a constant integer of one or a splatted vector of a
915 // constant integer of one (with no undefs).
916 // Do not permit build vector implicit truncation.
918  // TODO: may want to use peekThroughBitcast() here.
919  unsigned BitWidth = N.getScalarValueSizeInBits();
920  if (ConstantSDNode *Splat = isConstOrConstSplat(N))
921  return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth;
922  return false;
923 }
924 
925 // Determines if it is a constant integer of all ones or a splatted vector of a
926 // constant integer of all ones (with no undefs).
927 // Do not permit build vector implicit truncation.
929  N = peekThroughBitcast(N);
930  unsigned BitWidth = N.getScalarValueSizeInBits();
931  if (ConstantSDNode *Splat = isConstOrConstSplat(N))
932  return Splat->isAllOnesValue() &&
933  Splat->getAPIntValue().getBitWidth() == BitWidth;
934  return false;
935 }
936 
937 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
938 // undef's.
939 static bool isAnyConstantBuildVector(const SDNode *N) {
942 }
943 
944 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
945  SDValue N1, SDNodeFlags Flags) {
946  // Don't reassociate reductions.
947  if (Flags.hasVectorReduction())
948  return SDValue();
949 
950  EVT VT = N0.getValueType();
951  if (N0.getOpcode() == Opc && !N0->getFlags().hasVectorReduction()) {
954  // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
955  if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
956  return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
957  return SDValue();
958  }
959  if (N0.hasOneUse()) {
960  // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
961  // use
962  SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
963  if (!OpNode.getNode())
964  return SDValue();
965  AddToWorklist(OpNode.getNode());
966  return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
967  }
968  }
969  }
970 
971  if (N1.getOpcode() == Opc && !N1->getFlags().hasVectorReduction()) {
974  // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
975  if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
976  return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
977  return SDValue();
978  }
979  if (N1.hasOneUse()) {
980  // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
981  // use
982  SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
983  if (!OpNode.getNode())
984  return SDValue();
985  AddToWorklist(OpNode.getNode());
986  return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
987  }
988  }
989  }
990 
991  return SDValue();
992 }
993 
994 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
995  bool AddTo) {
996  assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
997  ++NodesCombined;
998  LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
999  To[0].getNode()->dump(&DAG);
1000  dbgs() << " and " << NumTo - 1 << " other values\n");
1001  for (unsigned i = 0, e = NumTo; i != e; ++i)
1002  assert((!To[i].getNode() ||
1003  N->getValueType(i) == To[i].getValueType()) &&
1004  "Cannot combine value to value of different type!");
1005 
1006  WorklistRemover DeadNodes(*this);
1007  DAG.ReplaceAllUsesWith(N, To);
1008  if (AddTo) {
1009  // Push the new nodes and any users onto the worklist
1010  for (unsigned i = 0, e = NumTo; i != e; ++i) {
1011  if (To[i].getNode()) {
1012  AddToWorklist(To[i].getNode());
1013  AddUsersToWorklist(To[i].getNode());
1014  }
1015  }
1016  }
1017 
1018  // Finally, if the node is now dead, remove it from the graph. The node
1019  // may not be dead if the replacement process recursively simplified to
1020  // something else needing this node.
1021  if (N->use_empty())
1022  deleteAndRecombine(N);
1023  return SDValue(N, 0);
1024 }
1025 
1026 void DAGCombiner::
1027 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1028  // Replace all uses. If any nodes become isomorphic to other nodes and
1029  // are deleted, make sure to remove them from our worklist.
1030  WorklistRemover DeadNodes(*this);
1031  DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1032 
1033  // Push the new node and any (possibly new) users onto the worklist.
1034  AddToWorklist(TLO.New.getNode());
1035  AddUsersToWorklist(TLO.New.getNode());
1036 
1037  // Finally, if the node is now dead, remove it from the graph. The node
1038  // may not be dead if the replacement process recursively simplified to
1039  // something else needing this node.
1040  if (TLO.Old.getNode()->use_empty())
1041  deleteAndRecombine(TLO.Old.getNode());
1042 }
1043 
1044 /// Check the specified integer node value to see if it can be simplified or if
1045 /// things it uses can be simplified by bit propagation. If so, return true.
1046 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
1047  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1048  KnownBits Known;
1049  if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO))
1050  return false;
1051 
1052  // Revisit the node.
1053  AddToWorklist(Op.getNode());
1054 
1055  // Replace the old value with the new one.
1056  ++NodesCombined;
1057  LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1058  dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1059  dbgs() << '\n');
1060 
1061  CommitTargetLoweringOpt(TLO);
1062  return true;
1063 }
1064 
1065 /// Check the specified vector node value to see if it can be simplified or
1066 /// if things it uses can be simplified as it only uses some of the elements.
1067 /// If so, return true.
1068 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
1069  bool AssumeSingleUse) {
1070  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1071  APInt KnownUndef, KnownZero;
1072  if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO,
1073  0, AssumeSingleUse))
1074  return false;
1075 
1076  // Revisit the node.
1077  AddToWorklist(Op.getNode());
1078 
1079  // Replace the old value with the new one.
1080  ++NodesCombined;
1081  LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1082  dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1083  dbgs() << '\n');
1084 
1085  CommitTargetLoweringOpt(TLO);
1086  return true;
1087 }
1088 
1089 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1090  SDLoc DL(Load);
1091  EVT VT = Load->getValueType(0);
1092  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1093 
1094  LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1095  Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1096  WorklistRemover DeadNodes(*this);
1097  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1098  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1099  deleteAndRecombine(Load);
1100  AddToWorklist(Trunc.getNode());
1101 }
1102 
1103 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1104  Replace = false;
1105  SDLoc DL(Op);
1106  if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1107  LoadSDNode *LD = cast<LoadSDNode>(Op);
1108  EVT MemVT = LD->getMemoryVT();
1110  : LD->getExtensionType();
1111  Replace = true;
1112  return DAG.getExtLoad(ExtType, DL, PVT,
1113  LD->getChain(), LD->getBasePtr(),
1114  MemVT, LD->getMemOperand());
1115  }
1116 
1117  unsigned Opc = Op.getOpcode();
1118  switch (Opc) {
1119  default: break;
1120  case ISD::AssertSext:
1121  if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1122  return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1123  break;
1124  case ISD::AssertZext:
1125  if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1126  return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1127  break;
1128  case ISD::Constant: {
1129  unsigned ExtOpc =
1131  return DAG.getNode(ExtOpc, DL, PVT, Op);
1132  }
1133  }
1134 
1135  if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1136  return SDValue();
1137  return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1138 }
1139 
1140 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1142  return SDValue();
1143  EVT OldVT = Op.getValueType();
1144  SDLoc DL(Op);
1145  bool Replace = false;
1146  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1147  if (!NewOp.getNode())
1148  return SDValue();
1149  AddToWorklist(NewOp.getNode());
1150 
1151  if (Replace)
1152  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1153  return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1154  DAG.getValueType(OldVT));
1155 }
1156 
1157 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1158  EVT OldVT = Op.getValueType();
1159  SDLoc DL(Op);
1160  bool Replace = false;
1161  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1162  if (!NewOp.getNode())
1163  return SDValue();
1164  AddToWorklist(NewOp.getNode());
1165 
1166  if (Replace)
1167  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1168  return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1169 }
1170 
1171 /// Promote the specified integer binary operation if the target indicates it is
1172 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1173 /// i32 since i16 instructions are longer.
1174 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1175  if (!LegalOperations)
1176  return SDValue();
1177 
1178  EVT VT = Op.getValueType();
1179  if (VT.isVector() || !VT.isInteger())
1180  return SDValue();
1181 
1182  // If operation type is 'undesirable', e.g. i16 on x86, consider
1183  // promoting it.
1184  unsigned Opc = Op.getOpcode();
1185  if (TLI.isTypeDesirableForOp(Opc, VT))
1186  return SDValue();
1187 
1188  EVT PVT = VT;
1189  // Consult target whether it is a good idea to promote this operation and
1190  // what's the right type to promote it to.
1191  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1192  assert(PVT != VT && "Don't know what type to promote to!");
1193 
1194  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1195 
1196  bool Replace0 = false;
1197  SDValue N0 = Op.getOperand(0);
1198  SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1199 
1200  bool Replace1 = false;
1201  SDValue N1 = Op.getOperand(1);
1202  SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1203  SDLoc DL(Op);
1204 
1205  SDValue RV =
1206  DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1207 
1208  // We are always replacing N0/N1's use in N and only need
1209  // additional replacements if there are additional uses.
1210  Replace0 &= !N0->hasOneUse();
1211  Replace1 &= (N0 != N1) && !N1->hasOneUse();
1212 
1213  // Combine Op here so it is preserved past replacements.
1214  CombineTo(Op.getNode(), RV);
1215 
1216  // If operands have a use ordering, make sure we deal with
1217  // predecessor first.
1218  if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1219  std::swap(N0, N1);
1220  std::swap(NN0, NN1);
1221  }
1222 
1223  if (Replace0) {
1224  AddToWorklist(NN0.getNode());
1225  ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1226  }
1227  if (Replace1) {
1228  AddToWorklist(NN1.getNode());
1229  ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1230  }
1231  return Op;
1232  }
1233  return SDValue();
1234 }
1235 
1236 /// Promote the specified integer shift operation if the target indicates it is
1237 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1238 /// i32 since i16 instructions are longer.
1239 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1240  if (!LegalOperations)
1241  return SDValue();
1242 
1243  EVT VT = Op.getValueType();
1244  if (VT.isVector() || !VT.isInteger())
1245  return SDValue();
1246 
1247  // If operation type is 'undesirable', e.g. i16 on x86, consider
1248  // promoting it.
1249  unsigned Opc = Op.getOpcode();
1250  if (TLI.isTypeDesirableForOp(Opc, VT))
1251  return SDValue();
1252 
1253  EVT PVT = VT;
1254  // Consult target whether it is a good idea to promote this operation and
1255  // what's the right type to promote it to.
1256  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1257  assert(PVT != VT && "Don't know what type to promote to!");
1258 
1259  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1260 
1261  bool Replace = false;
1262  SDValue N0 = Op.getOperand(0);
1263  SDValue N1 = Op.getOperand(1);
1264  if (Opc == ISD::SRA)
1265  N0 = SExtPromoteOperand(N0, PVT);
1266  else if (Opc == ISD::SRL)
1267  N0 = ZExtPromoteOperand(N0, PVT);
1268  else
1269  N0 = PromoteOperand(N0, PVT, Replace);
1270 
1271  if (!N0.getNode())
1272  return SDValue();
1273 
1274  SDLoc DL(Op);
1275  SDValue RV =
1276  DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1277 
1278  AddToWorklist(N0.getNode());
1279  if (Replace)
1280  ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1281 
1282  // Deal with Op being deleted.
1283  if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1284  return RV;
1285  }
1286  return SDValue();
1287 }
1288 
1289 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1290  if (!LegalOperations)
1291  return SDValue();
1292 
1293  EVT VT = Op.getValueType();
1294  if (VT.isVector() || !VT.isInteger())
1295  return SDValue();
1296 
1297  // If operation type is 'undesirable', e.g. i16 on x86, consider
1298  // promoting it.
1299  unsigned Opc = Op.getOpcode();
1300  if (TLI.isTypeDesirableForOp(Opc, VT))
1301  return SDValue();
1302 
1303  EVT PVT = VT;
1304  // Consult target whether it is a good idea to promote this operation and
1305  // what's the right type to promote it to.
1306  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1307  assert(PVT != VT && "Don't know what type to promote to!");
1308  // fold (aext (aext x)) -> (aext x)
1309  // fold (aext (zext x)) -> (zext x)
1310  // fold (aext (sext x)) -> (sext x)
1311  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1312  return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1313  }
1314  return SDValue();
1315 }
1316 
1317 bool DAGCombiner::PromoteLoad(SDValue Op) {
1318  if (!LegalOperations)
1319  return false;
1320 
1321  if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1322  return false;
1323 
1324  EVT VT = Op.getValueType();
1325  if (VT.isVector() || !VT.isInteger())
1326  return false;
1327 
1328  // If operation type is 'undesirable', e.g. i16 on x86, consider
1329  // promoting it.
1330  unsigned Opc = Op.getOpcode();
1331  if (TLI.isTypeDesirableForOp(Opc, VT))
1332  return false;
1333 
1334  EVT PVT = VT;
1335  // Consult target whether it is a good idea to promote this operation and
1336  // what's the right type to promote it to.
1337  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1338  assert(PVT != VT && "Don't know what type to promote to!");
1339 
1340  SDLoc DL(Op);
1341  SDNode *N = Op.getNode();
1342  LoadSDNode *LD = cast<LoadSDNode>(N);
1343  EVT MemVT = LD->getMemoryVT();
1345  : LD->getExtensionType();
1346  SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1347  LD->getChain(), LD->getBasePtr(),
1348  MemVT, LD->getMemOperand());
1349  SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1350 
1351  LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1352  Result.getNode()->dump(&DAG); dbgs() << '\n');
1353  WorklistRemover DeadNodes(*this);
1354  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1355  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1356  deleteAndRecombine(N);
1357  AddToWorklist(Result.getNode());
1358  return true;
1359  }
1360  return false;
1361 }
1362 
1363 /// Recursively delete a node which has no uses and any operands for
1364 /// which it is the only use.
1365 ///
1366 /// Note that this both deletes the nodes and removes them from the worklist.
1367 /// It also adds any nodes who have had a user deleted to the worklist as they
1368 /// may now have only one use and subject to other combines.
1369 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1370  if (!N->use_empty())
1371  return false;
1372 
1374  Nodes.insert(N);
1375  do {
1376  N = Nodes.pop_back_val();
1377  if (!N)
1378  continue;
1379 
1380  if (N->use_empty()) {
1381  for (const SDValue &ChildN : N->op_values())
1382  Nodes.insert(ChildN.getNode());
1383 
1384  removeFromWorklist(N);
1385  DAG.DeleteNode(N);
1386  } else {
1387  AddToWorklist(N);
1388  }
1389  } while (!Nodes.empty());
1390  return true;
1391 }
1392 
1393 //===----------------------------------------------------------------------===//
1394 // Main DAG Combiner implementation
1395 //===----------------------------------------------------------------------===//
1396 
1397 void DAGCombiner::Run(CombineLevel AtLevel) {
1398  // set the instance variables, so that the various visit routines may use it.
1399  Level = AtLevel;
1400  LegalOperations = Level >= AfterLegalizeVectorOps;
1401  LegalTypes = Level >= AfterLegalizeTypes;
1402 
1403  // Add all the dag nodes to the worklist.
1404  for (SDNode &Node : DAG.allnodes())
1405  AddToWorklist(&Node);
1406 
1407  // Create a dummy node (which is not added to allnodes), that adds a reference
1408  // to the root node, preventing it from being deleted, and tracking any
1409  // changes of the root.
1410  HandleSDNode Dummy(DAG.getRoot());
1411 
1412  // While the worklist isn't empty, find a node and try to combine it.
1413  while (!WorklistMap.empty()) {
1414  SDNode *N;
1415  // The Worklist holds the SDNodes in order, but it may contain null entries.
1416  do {
1417  N = Worklist.pop_back_val();
1418  } while (!N);
1419 
1420  bool GoodWorklistEntry = WorklistMap.erase(N);
1421  (void)GoodWorklistEntry;
1422  assert(GoodWorklistEntry &&
1423  "Found a worklist entry without a corresponding map entry!");
1424 
1425  // If N has no uses, it is dead. Make sure to revisit all N's operands once
1426  // N is deleted from the DAG, since they too may now be dead or may have a
1427  // reduced number of uses, allowing other xforms.
1428  if (recursivelyDeleteUnusedNodes(N))
1429  continue;
1430 
1431  WorklistRemover DeadNodes(*this);
1432 
1433  // If this combine is running after legalizing the DAG, re-legalize any
1434  // nodes pulled off the worklist.
1435  if (Level == AfterLegalizeDAG) {
1436  SmallSetVector<SDNode *, 16> UpdatedNodes;
1437  bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1438 
1439  for (SDNode *LN : UpdatedNodes) {
1440  AddToWorklist(LN);
1441  AddUsersToWorklist(LN);
1442  }
1443  if (!NIsValid)
1444  continue;
1445  }
1446 
1447  LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1448 
1449  // Add any operands of the new node which have not yet been combined to the
1450  // worklist as well. Because the worklist uniques things already, this
1451  // won't repeatedly process the same operand.
1452  CombinedNodes.insert(N);
1453  for (const SDValue &ChildN : N->op_values())
1454  if (!CombinedNodes.count(ChildN.getNode()))
1455  AddToWorklist(ChildN.getNode());
1456 
1457  SDValue RV = combine(N);
1458 
1459  if (!RV.getNode())
1460  continue;
1461 
1462  ++NodesCombined;
1463 
1464  // If we get back the same node we passed in, rather than a new node or
1465  // zero, we know that the node must have defined multiple values and
1466  // CombineTo was used. Since CombineTo takes care of the worklist
1467  // mechanics for us, we have no work to do in this case.
1468  if (RV.getNode() == N)
1469  continue;
1470 
1472  RV.getOpcode() != ISD::DELETED_NODE &&
1473  "Node was deleted but visit returned new node!");
1474 
1475  LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1476 
1477  if (N->getNumValues() == RV.getNode()->getNumValues())
1478  DAG.ReplaceAllUsesWith(N, RV.getNode());
1479  else {
1480  assert(N->getValueType(0) == RV.getValueType() &&
1481  N->getNumValues() == 1 && "Type mismatch");
1482  DAG.ReplaceAllUsesWith(N, &RV);
1483  }
1484 
1485  // Push the new node and any users onto the worklist
1486  AddToWorklist(RV.getNode());
1487  AddUsersToWorklist(RV.getNode());
1488 
1489  // Finally, if the node is now dead, remove it from the graph. The node
1490  // may not be dead if the replacement process recursively simplified to
1491  // something else needing this node. This will also take care of adding any
1492  // operands which have lost a user to the worklist.
1493  recursivelyDeleteUnusedNodes(N);
1494  }
1495 
1496  // If the root changed (e.g. it was a dead load, update the root).
1497  DAG.setRoot(Dummy.getValue());
1498  DAG.RemoveDeadNodes();
1499 }
1500 
1501 SDValue DAGCombiner::visit(SDNode *N) {
1502  switch (N->getOpcode()) {
1503  default: break;
1504  case ISD::TokenFactor: return visitTokenFactor(N);
1505  case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1506  case ISD::ADD: return visitADD(N);
1507  case ISD::SUB: return visitSUB(N);
1508  case ISD::ADDC: return visitADDC(N);
1509  case ISD::UADDO: return visitUADDO(N);
1510  case ISD::SUBC: return visitSUBC(N);
1511  case ISD::USUBO: return visitUSUBO(N);
1512  case ISD::ADDE: return visitADDE(N);
1513  case ISD::ADDCARRY: return visitADDCARRY(N);
1514  case ISD::SUBE: return visitSUBE(N);
1515  case ISD::SUBCARRY: return visitSUBCARRY(N);
1516  case ISD::MUL: return visitMUL(N);
1517  case ISD::SDIV: return visitSDIV(N);
1518  case ISD::UDIV: return visitUDIV(N);
1519  case ISD::SREM:
1520  case ISD::UREM: return visitREM(N);
1521  case ISD::MULHU: return visitMULHU(N);
1522  case ISD::MULHS: return visitMULHS(N);
1523  case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1524  case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1525  case ISD::SMULO: return visitSMULO(N);
1526  case ISD::UMULO: return visitUMULO(N);
1527  case ISD::SMIN:
1528  case ISD::SMAX:
1529  case ISD::UMIN:
1530  case ISD::UMAX: return visitIMINMAX(N);
1531  case ISD::AND: return visitAND(N);
1532  case ISD::OR: return visitOR(N);
1533  case ISD::XOR: return visitXOR(N);
1534  case ISD::SHL: return visitSHL(N);
1535  case ISD::SRA: return visitSRA(N);
1536  case ISD::SRL: return visitSRL(N);
1537  case ISD::ROTR:
1538  case ISD::ROTL: return visitRotate(N);
1539  case ISD::ABS: return visitABS(N);
1540  case ISD::BSWAP: return visitBSWAP(N);
1541  case ISD::BITREVERSE: return visitBITREVERSE(N);
1542  case ISD::CTLZ: return visitCTLZ(N);
1543  case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1544  case ISD::CTTZ: return visitCTTZ(N);
1545  case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1546  case ISD::CTPOP: return visitCTPOP(N);
1547  case ISD::SELECT: return visitSELECT(N);
1548  case ISD::VSELECT: return visitVSELECT(N);
1549  case ISD::SELECT_CC: return visitSELECT_CC(N);
1550  case ISD::SETCC: return visitSETCC(N);
1551  case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1552  case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1553  case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1554  case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1555  case ISD::AssertSext:
1556  case ISD::AssertZext: return visitAssertExt(N);
1557  case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1558  case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1559  case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1560  case ISD::TRUNCATE: return visitTRUNCATE(N);
1561  case ISD::BITCAST: return visitBITCAST(N);
1562  case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1563  case ISD::FADD: return visitFADD(N);
1564  case ISD::FSUB: return visitFSUB(N);
1565  case ISD::FMUL: return visitFMUL(N);
1566  case ISD::FMA: return visitFMA(N);
1567  case ISD::FDIV: return visitFDIV(N);
1568  case ISD::FREM: return visitFREM(N);
1569  case ISD::FSQRT: return visitFSQRT(N);
1570  case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1571  case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1572  case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1573  case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1574  case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1575  case ISD::FP_ROUND: return visitFP_ROUND(N);
1576  case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N);
1577  case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1578  case ISD::FNEG: return visitFNEG(N);
1579  case ISD::FABS: return visitFABS(N);
1580  case ISD::FFLOOR: return visitFFLOOR(N);
1581  case ISD::FMINNUM: return visitFMINNUM(N);
1582  case ISD::FMAXNUM: return visitFMAXNUM(N);
1583  case ISD::FCEIL: return visitFCEIL(N);
1584  case ISD::FTRUNC: return visitFTRUNC(N);
1585  case ISD::BRCOND: return visitBRCOND(N);
1586  case ISD::BR_CC: return visitBR_CC(N);
1587  case ISD::LOAD: return visitLOAD(N);
1588  case ISD::STORE: return visitSTORE(N);
1589  case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1590  case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1591  case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1592  case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1593  case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1594  case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1595  case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1596  case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1597  case ISD::MGATHER: return visitMGATHER(N);
1598  case ISD::MLOAD: return visitMLOAD(N);
1599  case ISD::MSCATTER: return visitMSCATTER(N);
1600  case ISD::MSTORE: return visitMSTORE(N);
1601  case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1602  case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1603  }
1604  return SDValue();
1605 }
1606 
1607 SDValue DAGCombiner::combine(SDNode *N) {
1608  SDValue RV = visit(N);
1609 
1610  // If nothing happened, try a target-specific DAG combine.
1611  if (!RV.getNode()) {
1613  "Node was deleted but visit returned NULL!");
1614 
1615  if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1617 
1618  // Expose the DAG combiner to the target combiner impls.
1620  DagCombineInfo(DAG, Level, false, this);
1621 
1622  RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1623  }
1624  }
1625 
1626  // If nothing happened still, try promoting the operation.
1627  if (!RV.getNode()) {
1628  switch (N->getOpcode()) {
1629  default: break;
1630  case ISD::ADD:
1631  case ISD::SUB:
1632  case ISD::MUL:
1633  case ISD::AND:
1634  case ISD::OR:
1635  case ISD::XOR:
1636  RV = PromoteIntBinOp(SDValue(N, 0));
1637  break;
1638  case ISD::SHL:
1639  case ISD::SRA:
1640  case ISD::SRL:
1641  RV = PromoteIntShiftOp(SDValue(N, 0));
1642  break;
1643  case ISD::SIGN_EXTEND:
1644  case ISD::ZERO_EXTEND:
1645  case ISD::ANY_EXTEND:
1646  RV = PromoteExtend(SDValue(N, 0));
1647  break;
1648  case ISD::LOAD:
1649  if (PromoteLoad(SDValue(N, 0)))
1650  RV = SDValue(N, 0);
1651  break;
1652  }
1653  }
1654 
1655  // If N is a commutative binary node, try eliminate it if the commuted
1656  // version is already present in the DAG.
1657  if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1658  N->getNumValues() == 1) {
1659  SDValue N0 = N->getOperand(0);
1660  SDValue N1 = N->getOperand(1);
1661 
1662  // Constant operands are canonicalized to RHS.
1663  if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1664  SDValue Ops[] = {N1, N0};
1665  SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1666  N->getFlags());
1667  if (CSENode)
1668  return SDValue(CSENode, 0);
1669  }
1670  }
1671 
1672  return RV;
1673 }
1674 
1675 /// Given a node, return its input chain if it has one, otherwise return a null
1676 /// sd operand.
1678  if (unsigned NumOps = N->getNumOperands()) {
1679  if (N->getOperand(0).getValueType() == MVT::Other)
1680  return N->getOperand(0);
1681  if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1682  return N->getOperand(NumOps-1);
1683  for (unsigned i = 1; i < NumOps-1; ++i)
1684  if (N->getOperand(i).getValueType() == MVT::Other)
1685  return N->getOperand(i);
1686  }
1687  return SDValue();
1688 }
1689 
1690 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1691  // If N has two operands, where one has an input chain equal to the other,
1692  // the 'other' chain is redundant.
1693  if (N->getNumOperands() == 2) {
1694  if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1695  return N->getOperand(0);
1696  if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1697  return N->getOperand(1);
1698  }
1699 
1700  // Don't simplify token factors if optnone.
1701  if (OptLevel == CodeGenOpt::None)
1702  return SDValue();
1703 
1704  SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
1705  SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
1706  SmallPtrSet<SDNode*, 16> SeenOps;
1707  bool Changed = false; // If we should replace this token factor.
1708 
1709  // Start out with this token factor.
1710  TFs.push_back(N);
1711 
1712  // Iterate through token factors. The TFs grows when new token factors are
1713  // encountered.
1714  for (unsigned i = 0; i < TFs.size(); ++i) {
1715  SDNode *TF = TFs[i];
1716 
1717  // Check each of the operands.
1718  for (const SDValue &Op : TF->op_values()) {
1719  switch (Op.getOpcode()) {
1720  case ISD::EntryToken:
1721  // Entry tokens don't need to be added to the list. They are
1722  // redundant.
1723  Changed = true;
1724  break;
1725 
1726  case ISD::TokenFactor:
1727  if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1728  // Queue up for processing.
1729  TFs.push_back(Op.getNode());
1730  // Clean up in case the token factor is removed.
1731  AddToWorklist(Op.getNode());
1732  Changed = true;
1733  break;
1734  }
1736 
1737  default:
1738  // Only add if it isn't already in the list.
1739  if (SeenOps.insert(Op.getNode()).second)
1740  Ops.push_back(Op);
1741  else
1742  Changed = true;
1743  break;
1744  }
1745  }
1746  }
1747 
1748  // Remove Nodes that are chained to another node in the list. Do so
1749  // by walking up chains breath-first stopping when we've seen
1750  // another operand. In general we must climb to the EntryNode, but we can exit
1751  // early if we find all remaining work is associated with just one operand as
1752  // no further pruning is possible.
1753 
1754  // List of nodes to search through and original Ops from which they originate.
1756  SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1757  SmallPtrSet<SDNode *, 16> SeenChains;
1758  bool DidPruneOps = false;
1759 
1760  unsigned NumLeftToConsider = 0;
1761  for (const SDValue &Op : Ops) {
1762  Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1763  OpWorkCount.push_back(1);
1764  }
1765 
1766  auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1767  // If this is an Op, we can remove the op from the list. Remark any
1768  // search associated with it as from the current OpNumber.
1769  if (SeenOps.count(Op) != 0) {
1770  Changed = true;
1771  DidPruneOps = true;
1772  unsigned OrigOpNumber = 0;
1773  while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1774  OrigOpNumber++;
1775  assert((OrigOpNumber != Ops.size()) &&
1776  "expected to find TokenFactor Operand");
1777  // Re-mark worklist from OrigOpNumber to OpNumber
1778  for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1779  if (Worklist[i].second == OrigOpNumber) {
1780  Worklist[i].second = OpNumber;
1781  }
1782  }
1783  OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1784  OpWorkCount[OrigOpNumber] = 0;
1785  NumLeftToConsider--;
1786  }
1787  // Add if it's a new chain
1788  if (SeenChains.insert(Op).second) {
1789  OpWorkCount[OpNumber]++;
1790  Worklist.push_back(std::make_pair(Op, OpNumber));
1791  }
1792  };
1793 
1794  for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1795  // We need at least be consider at least 2 Ops to prune.
1796  if (NumLeftToConsider <= 1)
1797  break;
1798  auto CurNode = Worklist[i].first;
1799  auto CurOpNumber = Worklist[i].second;
1800  assert((OpWorkCount[CurOpNumber] > 0) &&
1801  "Node should not appear in worklist");
1802  switch (CurNode->getOpcode()) {
1803  case ISD::EntryToken:
1804  // Hitting EntryToken is the only way for the search to terminate without
1805  // hitting
1806  // another operand's search. Prevent us from marking this operand
1807  // considered.
1808  NumLeftToConsider++;
1809  break;
1810  case ISD::TokenFactor:
1811  for (const SDValue &Op : CurNode->op_values())
1812  AddToWorklist(i, Op.getNode(), CurOpNumber);
1813  break;
1814  case ISD::CopyFromReg:
1815  case ISD::CopyToReg:
1816  AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1817  break;
1818  default:
1819  if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1820  AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1821  break;
1822  }
1823  OpWorkCount[CurOpNumber]--;
1824  if (OpWorkCount[CurOpNumber] == 0)
1825  NumLeftToConsider--;
1826  }
1827 
1828  // If we've changed things around then replace token factor.
1829  if (Changed) {
1830  SDValue Result;
1831  if (Ops.empty()) {
1832  // The entry token is the only possible outcome.
1833  Result = DAG.getEntryNode();
1834  } else {
1835  if (DidPruneOps) {
1836  SmallVector<SDValue, 8> PrunedOps;
1837  //
1838  for (const SDValue &Op : Ops) {
1839  if (SeenChains.count(Op.getNode()) == 0)
1840  PrunedOps.push_back(Op);
1841  }
1842  Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps);
1843  } else {
1844  Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
1845  }
1846  }
1847  return Result;
1848  }
1849  return SDValue();
1850 }
1851 
1852 /// MERGE_VALUES can always be eliminated.
1853 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1854  WorklistRemover DeadNodes(*this);
1855  // Replacing results may cause a different MERGE_VALUES to suddenly
1856  // be CSE'd with N, and carry its uses with it. Iterate until no
1857  // uses remain, to ensure that the node can be safely deleted.
1858  // First add the users of this node to the work list so that they
1859  // can be tried again once they have new operands.
1860  AddUsersToWorklist(N);
1861  do {
1862  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1863  DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
1864  } while (!N->use_empty());
1865  deleteAndRecombine(N);
1866  return SDValue(N, 0); // Return N so it doesn't get rechecked!
1867 }
1868 
1869 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1870 /// ConstantSDNode pointer else nullptr.
1873  return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1874 }
1875 
1876 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
1877  auto BinOpcode = BO->getOpcode();
1878  assert((BinOpcode == ISD::ADD || BinOpcode == ISD::SUB ||
1879  BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV ||
1880  BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM ||
1881  BinOpcode == ISD::UREM || BinOpcode == ISD::AND ||
1882  BinOpcode == ISD::OR || BinOpcode == ISD::XOR ||
1883  BinOpcode == ISD::SHL || BinOpcode == ISD::SRL ||
1884  BinOpcode == ISD::SRA || BinOpcode == ISD::FADD ||
1885  BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL ||
1886  BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) &&
1887  "Unexpected binary operator");
1888 
1889  // Don't do this unless the old select is going away. We want to eliminate the
1890  // binary operator, not replace a binop with a select.
1891  // TODO: Handle ISD::SELECT_CC.
1892  unsigned SelOpNo = 0;
1893  SDValue Sel = BO->getOperand(0);
1894  if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
1895  SelOpNo = 1;
1896  Sel = BO->getOperand(1);
1897  }
1898 
1899  if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1900  return SDValue();
1901 
1902  SDValue CT = Sel.getOperand(1);
1903  if (!isConstantOrConstantVector(CT, true) &&
1905  return SDValue();
1906 
1907  SDValue CF = Sel.getOperand(2);
1908  if (!isConstantOrConstantVector(CF, true) &&
1910  return SDValue();
1911 
1912  // Bail out if any constants are opaque because we can't constant fold those.
1913  // The exception is "and" and "or" with either 0 or -1 in which case we can
1914  // propagate non constant operands into select. I.e.:
1915  // and (select Cond, 0, -1), X --> select Cond, 0, X
1916  // or X, (select Cond, -1, 0) --> select Cond, -1, X
1917  bool CanFoldNonConst = (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
1922 
1923  SDValue CBO = BO->getOperand(SelOpNo ^ 1);
1924  if (!CanFoldNonConst &&
1925  !isConstantOrConstantVector(CBO, true) &&
1927  return SDValue();
1928 
1929  EVT VT = Sel.getValueType();
1930 
1931  // In case of shift value and shift amount may have different VT. For instance
1932  // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
1933  // swapped operands and value types do not match. NB: x86 is fine if operands
1934  // are not swapped with shift amount VT being not bigger than shifted value.
1935  // TODO: that is possible to check for a shift operation, correct VTs and
1936  // still perform optimization on x86 if needed.
1937  if (SelOpNo && VT != CBO.getValueType())
1938  return SDValue();
1939 
1940  // We have a select-of-constants followed by a binary operator with a
1941  // constant. Eliminate the binop by pulling the constant math into the select.
1942  // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
1943  SDLoc DL(Sel);
1944  SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
1945  : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
1946  if (!CanFoldNonConst && !NewCT.isUndef() &&
1947  !isConstantOrConstantVector(NewCT, true) &&
1949  return SDValue();
1950 
1951  SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
1952  : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
1953  if (!CanFoldNonConst && !NewCF.isUndef() &&
1954  !isConstantOrConstantVector(NewCF, true) &&
1956  return SDValue();
1957 
1958  return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
1959 }
1960 
1962  assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
1963  "Expecting add or sub");
1964 
1965  // Match a constant operand and a zext operand for the math instruction:
1966  // add Z, C
1967  // sub C, Z
1968  bool IsAdd = N->getOpcode() == ISD::ADD;
1969  SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
1970  SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
1971  auto *CN = dyn_cast<ConstantSDNode>(C);
1972  if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
1973  return SDValue();
1974 
1975  // Match the zext operand as a setcc of a boolean.
1976  if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
1977  Z.getOperand(0).getValueType() != MVT::i1)
1978  return SDValue();
1979 
1980  // Match the compare as: setcc (X & 1), 0, eq.
1981  SDValue SetCC = Z.getOperand(0);
1982  ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
1983  if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
1984  SetCC.getOperand(0).getOpcode() != ISD::AND ||
1985  !isOneConstant(SetCC.getOperand(0).getOperand(1)))
1986  return SDValue();
1987 
1988  // We are adding/subtracting a constant and an inverted low bit. Turn that
1989  // into a subtract/add of the low bit with incremented/decremented constant:
1990  // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
1991  // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
1992  EVT VT = C.getValueType();
1993  SDLoc DL(N);
1994  SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
1995  SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
1996  DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
1997  return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
1998 }
1999 
2000 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2001 /// a shift and add with a different constant.
2003  assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2004  "Expecting add or sub");
2005 
2006  // We need a constant operand for the add/sub, and the other operand is a
2007  // logical shift right: add (srl), C or sub C, (srl).
2008  bool IsAdd = N->getOpcode() == ISD::ADD;
2009  SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2010  SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2011  ConstantSDNode *C = isConstOrConstSplat(ConstantOp);
2012  if (!C || ShiftOp.getOpcode() != ISD::SRL)
2013  return SDValue();
2014 
2015  // The shift must be of a 'not' value.
2016  // TODO: Use isBitwiseNot() if it works with vectors.
2017  SDValue Not = ShiftOp.getOperand(0);
2018  if (!Not.hasOneUse() || Not.getOpcode() != ISD::XOR ||
2020  return SDValue();
2021 
2022  // The shift must be moving the sign bit to the least-significant-bit.
2023  EVT VT = ShiftOp.getValueType();
2024  SDValue ShAmt = ShiftOp.getOperand(1);
2025  ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2026  if (!ShAmtC || ShAmtC->getZExtValue() != VT.getScalarSizeInBits() - 1)
2027  return SDValue();
2028 
2029  // Eliminate the 'not' by adjusting the shift and add/sub constant:
2030  // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2031  // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2032  SDLoc DL(N);
2033  auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2034  SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2035  APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1;
2036  return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
2037 }
2038 
2039 SDValue DAGCombiner::visitADD(SDNode *N) {
2040  SDValue N0 = N->getOperand(0);
2041  SDValue N1 = N->getOperand(1);
2042  EVT VT = N0.getValueType();
2043  SDLoc DL(N);
2044 
2045  // fold vector ops
2046  if (VT.isVector()) {
2047  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2048  return FoldedVOp;
2049 
2050  // fold (add x, 0) -> x, vector edition
2052  return N0;
2054  return N1;
2055  }
2056 
2057  // fold (add x, undef) -> undef
2058  if (N0.isUndef())
2059  return N0;
2060 
2061  if (N1.isUndef())
2062  return N1;
2063 
2065  // canonicalize constant to RHS
2067  return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2068  // fold (add c1, c2) -> c1+c2
2069  return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
2070  N1.getNode());
2071  }
2072 
2073  // fold (add x, 0) -> x
2074  if (isNullConstant(N1))
2075  return N0;
2076 
2077  if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2078  // fold ((c1-A)+c2) -> (c1+c2)-A
2079  if (N0.getOpcode() == ISD::SUB &&
2080  isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2081  // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
2082  return DAG.getNode(ISD::SUB, DL, VT,
2083  DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2084  N0.getOperand(1));
2085  }
2086 
2087  // add (sext i1 X), 1 -> zext (not i1 X)
2088  // We don't transform this pattern:
2089  // add (zext i1 X), -1 -> sext (not i1 X)
2090  // because most (?) targets generate better code for the zext form.
2091  if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2093  SDValue X = N0.getOperand(0);
2094  if ((!LegalOperations ||
2095  (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2096  TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2097  X.getScalarValueSizeInBits() == 1) {
2098  SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2099  return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2100  }
2101  }
2102 
2103  // Undo the add -> or combine to merge constant offsets from a frame index.
2104  if (N0.getOpcode() == ISD::OR &&
2105  isa<FrameIndexSDNode>(N0.getOperand(0)) &&
2106  isa<ConstantSDNode>(N0.getOperand(1)) &&
2107  DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2108  SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
2109  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2110  }
2111  }
2112 
2113  if (SDValue NewSel = foldBinOpIntoSelect(N))
2114  return NewSel;
2115 
2116  // reassociate add
2117  if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2118  return RADD;
2119 
2120  // fold ((0-A) + B) -> B-A
2121  if (N0.getOpcode() == ISD::SUB &&
2123  return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2124 
2125  // fold (A + (0-B)) -> A-B
2126  if (N1.getOpcode() == ISD::SUB &&
2128  return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2129 
2130  // fold (A+(B-A)) -> B
2131  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2132  return N1.getOperand(0);
2133 
2134  // fold ((B-A)+A) -> B
2135  if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2136  return N0.getOperand(0);
2137 
2138  // fold (A+(B-(A+C))) to (B-C)
2139  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2140  N0 == N1.getOperand(1).getOperand(0))
2141  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2142  N1.getOperand(1).getOperand(1));
2143 
2144  // fold (A+(B-(C+A))) to (B-C)
2145  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2146  N0 == N1.getOperand(1).getOperand(1))
2147  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2148  N1.getOperand(1).getOperand(0));
2149 
2150  // fold (A+((B-A)+or-C)) to (B+or-C)
2151  if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2152  N1.getOperand(0).getOpcode() == ISD::SUB &&
2153  N0 == N1.getOperand(0).getOperand(1))
2154  return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2155  N1.getOperand(1));
2156 
2157  // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2158  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2159  SDValue N00 = N0.getOperand(0);
2160  SDValue N01 = N0.getOperand(1);
2161  SDValue N10 = N1.getOperand(0);
2162  SDValue N11 = N1.getOperand(1);
2163 
2165  return DAG.getNode(ISD::SUB, DL, VT,
2166  DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2167  DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2168  }
2169 
2170  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2171  return V;
2172 
2173  if (SDValue V = foldAddSubOfSignBit(N, DAG))
2174  return V;
2175 
2176  if (SimplifyDemandedBits(SDValue(N, 0)))
2177  return SDValue(N, 0);
2178 
2179  // fold (a+b) -> (a|b) iff a and b share no bits.
2180  if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2181  DAG.haveNoCommonBitsSet(N0, N1))
2182  return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2183 
2184  // fold (add (xor a, -1), 1) -> (sub 0, a)
2186  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2187  N0.getOperand(0));
2188 
2189  if (SDValue Combined = visitADDLike(N0, N1, N))
2190  return Combined;
2191 
2192  if (SDValue Combined = visitADDLike(N1, N0, N))
2193  return Combined;
2194 
2195  return SDValue();
2196 }
2197 
2198 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2199  bool Masked = false;
2200 
2201  // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2202  while (true) {
2203  if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2204  V = V.getOperand(0);
2205  continue;
2206  }
2207 
2208  if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2209  Masked = true;
2210  V = V.getOperand(0);
2211  continue;
2212  }
2213 
2214  break;
2215  }
2216 
2217  // If this is not a carry, return.
2218  if (V.getResNo() != 1)
2219  return SDValue();
2220 
2221  if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2222  V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2223  return SDValue();
2224 
2225  // If the result is masked, then no matter what kind of bool it is we can
2226  // return. If it isn't, then we need to make sure the bool type is either 0 or
2227  // 1 and not other values.
2228  if (Masked ||
2229  TLI.getBooleanContents(V.getValueType()) ==
2231  return V;
2232 
2233  return SDValue();
2234 }
2235 
2236 SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
2237  EVT VT = N0.getValueType();
2238  SDLoc DL(LocReference);
2239 
2240  // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2241  if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2243  return DAG.getNode(ISD::SUB, DL, VT, N0,
2244  DAG.getNode(ISD::SHL, DL, VT,
2245  N1.getOperand(0).getOperand(1),
2246  N1.getOperand(1)));
2247 
2248  if (N1.getOpcode() == ISD::AND) {
2249  SDValue AndOp0 = N1.getOperand(0);
2250  unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
2251  unsigned DestBits = VT.getScalarSizeInBits();
2252 
2253  // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
2254  // and similar xforms where the inner op is either ~0 or 0.
2255  if (NumSignBits == DestBits &&
2257  return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0);
2258  }
2259 
2260  // add (sext i1), X -> sub X, (zext i1)
2261  if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2262  N0.getOperand(0).getValueType() == MVT::i1 &&
2264  SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2265  return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2266  }
2267 
2268  // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2269  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2270  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2271  if (TN->getVT() == MVT::i1) {
2272  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2273  DAG.getConstant(1, DL, VT));
2274  return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2275  }
2276  }
2277 
2278  // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2279  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2280  N1.getResNo() == 0)
2281  return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2282  N0, N1.getOperand(0), N1.getOperand(2));
2283 
2284  // (add X, Carry) -> (addcarry X, 0, Carry)
2286  if (SDValue Carry = getAsCarry(TLI, N1))
2287  return DAG.getNode(ISD::ADDCARRY, DL,
2288  DAG.getVTList(VT, Carry.getValueType()), N0,
2289  DAG.getConstant(0, DL, VT), Carry);
2290 
2291  return SDValue();
2292 }
2293 
2294 SDValue DAGCombiner::visitADDC(SDNode *N) {
2295  SDValue N0 = N->getOperand(0);
2296  SDValue N1 = N->getOperand(1);
2297  EVT VT = N0.getValueType();
2298  SDLoc DL(N);
2299 
2300  // If the flag result is dead, turn this into an ADD.
2301  if (!N->hasAnyUseOfValue(1))
2302  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2303  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2304 
2305  // canonicalize constant to RHS.
2308  if (N0C && !N1C)
2309  return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2310 
2311  // fold (addc x, 0) -> x + no carry out
2312  if (isNullConstant(N1))
2313  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2314  DL, MVT::Glue));
2315 
2316  // If it cannot overflow, transform into an add.
2317  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2318  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2319  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2320 
2321  return SDValue();
2322 }
2323 
2324 static SDValue flipBoolean(SDValue V, const SDLoc &DL, EVT VT,
2325  SelectionDAG &DAG, const TargetLowering &TLI) {
2326  SDValue Cst;
2327  switch (TLI.getBooleanContents(VT)) {
2330  Cst = DAG.getConstant(1, DL, VT);
2331  break;
2333  Cst = DAG.getConstant(-1, DL, VT);
2334  break;
2335  }
2336 
2337  return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
2338 }
2339 
2340 static bool isBooleanFlip(SDValue V, EVT VT, const TargetLowering &TLI) {
2341  if (V.getOpcode() != ISD::XOR) return false;
2343  if (!Const) return false;
2344 
2345  switch(TLI.getBooleanContents(VT)) {
2347  return Const->isOne();
2349  return Const->isAllOnesValue();
2351  return (Const->getAPIntValue() & 0x01) == 1;
2352  }
2353  llvm_unreachable("Unsupported boolean content");
2354 }
2355 
2356 SDValue DAGCombiner::visitUADDO(SDNode *N) {
2357  SDValue N0 = N->getOperand(0);
2358  SDValue N1 = N->getOperand(1);
2359  EVT VT = N0.getValueType();
2360  if (VT.isVector())
2361  return SDValue();
2362 
2363  EVT CarryVT = N->getValueType(1);
2364  SDLoc DL(N);
2365 
2366  // If the flag result is dead, turn this into an ADD.
2367  if (!N->hasAnyUseOfValue(1))
2368  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2369  DAG.getUNDEF(CarryVT));
2370 
2371  // canonicalize constant to RHS.
2374  if (N0C && !N1C)
2375  return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0);
2376 
2377  // fold (uaddo x, 0) -> x + no carry out
2378  if (isNullConstant(N1))
2379  return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2380 
2381  // If it cannot overflow, transform into an add.
2382  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2383  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2384  DAG.getConstant(0, DL, CarryVT));
2385 
2386  // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2388  SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2389  DAG.getConstant(0, DL, VT),
2390  N0.getOperand(0));
2391  return CombineTo(N, Sub,
2392  flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
2393  }
2394 
2395  if (SDValue Combined = visitUADDOLike(N0, N1, N))
2396  return Combined;
2397 
2398  if (SDValue Combined = visitUADDOLike(N1, N0, N))
2399  return Combined;
2400 
2401  return SDValue();
2402 }
2403 
2404 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2405  auto VT = N0.getValueType();
2406 
2407  // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2408  // If Y + 1 cannot overflow.
2409  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2410  SDValue Y = N1.getOperand(0);
2411  SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2412  if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2413  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2414  N1.getOperand(2));
2415  }
2416 
2417  // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2419  if (SDValue Carry = getAsCarry(TLI, N1))
2420  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2421  DAG.getConstant(0, SDLoc(N), VT), Carry);
2422 
2423  return SDValue();
2424 }
2425 
2426 SDValue DAGCombiner::visitADDE(SDNode *N) {
2427  SDValue N0 = N->getOperand(0);
2428  SDValue N1 = N->getOperand(1);
2429  SDValue CarryIn = N->getOperand(2);
2430 
2431  // canonicalize constant to RHS
2434  if (N0C && !N1C)
2435  return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2436  N1, N0, CarryIn);
2437 
2438  // fold (adde x, y, false) -> (addc x, y)
2439  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2440  return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2441 
2442  return SDValue();
2443 }
2444 
2445 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2446  SDValue N0 = N->getOperand(0);
2447  SDValue N1 = N->getOperand(1);
2448  SDValue CarryIn = N->getOperand(2);
2449  SDLoc DL(N);
2450 
2451  // canonicalize constant to RHS
2454  if (N0C && !N1C)
2455  return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2456 
2457  // fold (addcarry x, y, false) -> (uaddo x, y)
2458  if (isNullConstant(CarryIn)) {
2459  if (!LegalOperations ||
2461  return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2462  }
2463 
2464  EVT CarryVT = CarryIn.getValueType();
2465 
2466  // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2467  if (isNullConstant(N0) && isNullConstant(N1)) {
2468  EVT VT = N0.getValueType();
2469  SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2470  AddToWorklist(CarryExt.getNode());
2471  return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2472  DAG.getConstant(1, DL, VT)),
2473  DAG.getConstant(0, DL, CarryVT));
2474  }
2475 
2476  // fold (addcarry (xor a, -1), 0, !b) -> (subcarry 0, a, b) and flip carry.
2477  if (isBitwiseNot(N0) && isNullConstant(N1) &&
2478  isBooleanFlip(CarryIn, CarryVT, TLI)) {
2479  SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(),
2480  DAG.getConstant(0, DL, N0.getValueType()),
2481  N0.getOperand(0), CarryIn.getOperand(0));
2482  return CombineTo(N, Sub,
2483  flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
2484  }
2485 
2486  if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2487  return Combined;
2488 
2489  if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2490  return Combined;
2491 
2492  return SDValue();
2493 }
2494 
2495 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2496  SDNode *N) {
2497  // Iff the flag result is dead:
2498  // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2499  if ((N0.getOpcode() == ISD::ADD ||
2500  (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) &&
2501  isNullConstant(N1) && !N->hasAnyUseOfValue(1))
2502  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2503  N0.getOperand(0), N0.getOperand(1), CarryIn);
2504 
2505  /**
2506  * When one of the addcarry argument is itself a carry, we may be facing
2507  * a diamond carry propagation. In which case we try to transform the DAG
2508  * to ensure linear carry propagation if that is possible.
2509  *
2510  * We are trying to get:
2511  * (addcarry X, 0, (addcarry A, B, Z):Carry)
2512  */
2513  if (auto Y = getAsCarry(TLI, N1)) {
2514  /**
2515  * (uaddo A, B)
2516  * / \
2517  * Carry Sum
2518  * | \
2519  * | (addcarry *, 0, Z)
2520  * | /
2521  * \ Carry
2522  * | /
2523  * (addcarry X, *, *)
2524  */
2525  if (Y.getOpcode() == ISD::UADDO &&
2526  CarryIn.getResNo() == 1 &&
2527  CarryIn.getOpcode() == ISD::ADDCARRY &&
2528  isNullConstant(CarryIn.getOperand(1)) &&
2529  CarryIn.getOperand(0) == Y.getValue(0)) {
2530  auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
2531  Y.getOperand(0), Y.getOperand(1),
2532  CarryIn.getOperand(2));
2533  AddToWorklist(NewY.getNode());
2534  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2535  DAG.getConstant(0, SDLoc(N), N0.getValueType()),
2536  NewY.getValue(1));
2537  }
2538  }
2539 
2540  return SDValue();
2541 }
2542 
2543 // Since it may not be valid to emit a fold to zero for vector initializers
2544 // check if we can before folding.
2545 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
2546  SelectionDAG &DAG, bool LegalOperations,
2547  bool LegalTypes) {
2548  if (!VT.isVector())
2549  return DAG.getConstant(0, DL, VT);
2550  if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
2551  return DAG.getConstant(0, DL, VT);
2552  return SDValue();
2553 }
2554 
2555 SDValue DAGCombiner::visitSUB(SDNode *N) {
2556  SDValue N0 = N->getOperand(0);
2557  SDValue N1 = N->getOperand(1);
2558  EVT VT = N0.getValueType();
2559  SDLoc DL(N);
2560 
2561  // fold vector ops
2562  if (VT.isVector()) {
2563  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2564  return FoldedVOp;
2565 
2566  // fold (sub x, 0) -> x, vector edition
2568  return N0;
2569  }
2570 
2571  // fold (sub x, x) -> 0
2572  // FIXME: Refactor this and xor and other similar operations together.
2573  if (N0 == N1)
2574  return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations, LegalTypes);
2577  // fold (sub c1, c2) -> c1-c2
2578  return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
2579  N1.getNode());
2580  }
2581 
2582  if (SDValue NewSel = foldBinOpIntoSelect(N))
2583  return NewSel;
2584 
2586 
2587  // fold (sub x, c) -> (add x, -c)
2588  if (N1C) {
2589  return DAG.getNode(ISD::ADD, DL, VT, N0,
2590  DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
2591  }
2592 
2594  unsigned BitWidth = VT.getScalarSizeInBits();
2595  // Right-shifting everything out but the sign bit followed by negation is
2596  // the same as flipping arithmetic/logical shift type without the negation:
2597  // -(X >>u 31) -> (X >>s 31)
2598  // -(X >>s 31) -> (X >>u 31)
2599  if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
2600  ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
2601  if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
2602  auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
2603  if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
2604  return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
2605  }
2606  }
2607 
2608  // 0 - X --> 0 if the sub is NUW.
2609  if (N->getFlags().hasNoUnsignedWrap())
2610  return N0;
2611 
2612  if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
2613  // N1 is either 0 or the minimum signed value. If the sub is NSW, then
2614  // N1 must be 0 because negating the minimum signed value is undefined.
2615  if (N->getFlags().hasNoSignedWrap())
2616  return N0;
2617 
2618  // 0 - X --> X if X is 0 or the minimum signed value.
2619  return N1;
2620  }
2621  }
2622 
2623  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
2625  return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
2626 
2627  // fold (A - (0-B)) -> A+B
2628  if (N1.getOpcode() == ISD::SUB &&
2630  return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
2631 
2632  // fold A-(A-B) -> B
2633  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
2634  return N1.getOperand(1);
2635 
2636  // fold (A+B)-A -> B
2637  if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
2638  return N0.getOperand(1);
2639 
2640  // fold (A+B)-B -> A
2641  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
2642  return N0.getOperand(0);
2643 
2644  // fold C2-(A+C1) -> (C2-C1)-A
2645  if (N1.getOpcode() == ISD::ADD) {
2646  SDValue N11 = N1.getOperand(1);
2647  if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
2648  isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
2649  SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
2650  return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
2651  }
2652  }
2653 
2654  // fold ((A+(B+or-C))-B) -> A+or-C
2655  if (N0.getOpcode() == ISD::ADD &&
2656  (N0.getOperand(1).getOpcode() == ISD::SUB ||
2657  N0.getOperand(1).getOpcode() == ISD::ADD) &&
2658  N0.getOperand(1).getOperand(0) == N1)
2659  return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
2660  N0.getOperand(1).getOperand(1));
2661 
2662  // fold ((A+(C+B))-B) -> A+C
2663  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
2664  N0.getOperand(1).getOperand(1) == N1)
2665  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
2666  N0.getOperand(1).getOperand(0));
2667 
2668  // fold ((A-(B-C))-C) -> A-B
2669  if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
2670  N0.getOperand(1).getOperand(1) == N1)
2671  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2672  N0.getOperand(1).getOperand(0));
2673 
2674  // fold (A-(B-C)) -> A+(C-B)
2675  if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
2676  return DAG.getNode(ISD::ADD, DL, VT, N0,
2677  DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
2678  N1.getOperand(0)));
2679 
2680  // fold (X - (-Y * Z)) -> (X + (Y * Z))
2681  if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
2682  if (N1.getOperand(0).getOpcode() == ISD::SUB &&
2684  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
2685  N1.getOperand(0).getOperand(1),
2686  N1.getOperand(1));
2687  return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
2688  }
2689  if (N1.getOperand(1).getOpcode() == ISD::SUB &&
2691  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
2692  N1.getOperand(0),
2693  N1.getOperand(1).getOperand(1));
2694  return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
2695  }
2696  }
2697 
2698  // If either operand of a sub is undef, the result is undef
2699  if (N0.isUndef())
2700  return N0;
2701  if (N1.isUndef())
2702  return N1;
2703 
2704  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2705  return V;
2706 
2707  if (SDValue V = foldAddSubOfSignBit(N, DAG))
2708  return V;
2709 
2710  // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
2711  if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
2712  if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
2713  SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
2714  SDValue S0 = N1.getOperand(0);
2715  if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
2716  unsigned OpSizeInBits = VT.getScalarSizeInBits();
2718  if (C->getAPIntValue() == (OpSizeInBits - 1))
2719  return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
2720  }
2721  }
2722  }
2723 
2724  // If the relocation model supports it, consider symbol offsets.
2725  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
2726  if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2727  // fold (sub Sym, c) -> Sym-c
2728  if (N1C && GA->getOpcode() == ISD::GlobalAddress)
2729  return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
2730  GA->getOffset() -
2731  (uint64_t)N1C->getSExtValue());
2732  // fold (sub Sym+c1, Sym+c2) -> c1-c2
2733  if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
2734  if (GA->getGlobal() == GB->getGlobal())
2735  return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
2736  DL, VT);
2737  }
2738 
2739  // sub X, (sextinreg Y i1) -> add X, (and Y 1)
2740  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2741  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2742  if (TN->getVT() == MVT::i1) {
2743  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2744  DAG.getConstant(1, DL, VT));
2745  return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
2746  }
2747  }
2748 
2749  // Prefer an add for more folding potential and possibly better codegen:
2750  // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
2751  if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
2752  SDValue ShAmt = N1.getOperand(1);
2753  ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2754  if (ShAmtC && ShAmtC->getZExtValue() == N1.getScalarValueSizeInBits() - 1) {
2755  SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
2756  return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
2757  }
2758  }
2759 
2760  return SDValue();
2761 }
2762 
2763 SDValue DAGCombiner::visitSUBC(SDNode *N) {
2764  SDValue N0 = N->getOperand(0);
2765  SDValue N1 = N->getOperand(1);
2766  EVT VT = N0.getValueType();
2767  SDLoc DL(N);
2768 
2769  // If the flag result is dead, turn this into an SUB.
2770  if (!N->hasAnyUseOfValue(1))
2771  return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2772  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2773 
2774  // fold (subc x, x) -> 0 + no borrow
2775  if (N0 == N1)
2776  return CombineTo(N, DAG.getConstant(0, DL, VT),
2777  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2778 
2779  // fold (subc x, 0) -> x + no borrow
2780  if (isNullConstant(N1))
2781  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2782 
2783  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2784  if (isAllOnesConstant(N0))
2785  return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2786  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2787 
2788  return SDValue();
2789 }
2790 
2791 SDValue DAGCombiner::visitUSUBO(SDNode *N) {
2792  SDValue N0 = N->getOperand(0);
2793  SDValue N1 = N->getOperand(1);
2794  EVT VT = N0.getValueType();
2795  if (VT.isVector())
2796  return SDValue();
2797 
2798  EVT CarryVT = N->getValueType(1);
2799  SDLoc DL(N);
2800 
2801  // If the flag result is dead, turn this into an SUB.
2802  if (!N->hasAnyUseOfValue(1))
2803  return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2804  DAG.getUNDEF(CarryVT));
2805 
2806  // fold (usubo x, x) -> 0 + no borrow
2807  if (N0 == N1)
2808  return CombineTo(N, DAG.getConstant(0, DL, VT),
2809  DAG.getConstant(0, DL, CarryVT));
2810 
2811  // fold (usubo x, 0) -> x + no borrow
2812  if (isNullConstant(N1))
2813  return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2814 
2815  // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2816  if (isAllOnesConstant(N0))
2817  return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2818  DAG.getConstant(0, DL, CarryVT));
2819 
2820  return SDValue();
2821 }
2822 
2823 SDValue DAGCombiner::visitSUBE(SDNode *N) {
2824  SDValue N0 = N->getOperand(0);
2825  SDValue N1 = N->getOperand(1);
2826  SDValue CarryIn = N->getOperand(2);
2827 
2828  // fold (sube x, y, false) -> (subc x, y)
2829  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2830  return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
2831 
2832  return SDValue();
2833 }
2834 
2835 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
2836  SDValue N0 = N->getOperand(0);
2837  SDValue N1 = N->getOperand(1);
2838  SDValue CarryIn = N->getOperand(2);
2839 
2840  // fold (subcarry x, y, false) -> (usubo x, y)
2841  if (isNullConstant(CarryIn)) {
2842  if (!LegalOperations ||
2844  return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
2845  }
2846 
2847  return SDValue();
2848 }
2849 
2850 SDValue DAGCombiner::visitMUL(SDNode *N) {
2851  SDValue N0 = N->getOperand(0);
2852  SDValue N1 = N->getOperand(1);
2853  EVT VT = N0.getValueType();
2854 
2855  // fold (mul x, undef) -> 0
2856  if (N0.isUndef() || N1.isUndef())
2857  return DAG.getConstant(0, SDLoc(N), VT);
2858 
2859  bool N0IsConst = false;
2860  bool N1IsConst = false;
2861  bool N1IsOpaqueConst = false;
2862  bool N0IsOpaqueConst = false;
2863  APInt ConstValue0, ConstValue1;
2864  // fold vector ops
2865  if (VT.isVector()) {
2866  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2867  return FoldedVOp;
2868 
2869  N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
2870  N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
2871  assert((!N0IsConst ||
2872  ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
2873  "Splat APInt should be element width");
2874  assert((!N1IsConst ||
2875  ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
2876  "Splat APInt should be element width");
2877  } else {
2878  N0IsConst = isa<ConstantSDNode>(N0);
2879  if (N0IsConst) {
2880  ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
2881  N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
2882  }
2883  N1IsConst = isa<ConstantSDNode>(N1);
2884  if (N1IsConst) {
2885  ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
2886  N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
2887  }
2888  }
2889 
2890  // fold (mul c1, c2) -> c1*c2
2891  if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
2892  return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
2893  N0.getNode(), N1.getNode());
2894 
2895  // canonicalize constant to RHS (vector doesn't have to splat)
2898  return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
2899  // fold (mul x, 0) -> 0
2900  if (N1IsConst && ConstValue1.isNullValue())
2901  return N1;
2902  // fold (mul x, 1) -> x
2903  if (N1IsConst && ConstValue1.isOneValue())
2904  return N0;
2905 
2906  if (SDValue NewSel = foldBinOpIntoSelect(N))
2907  return NewSel;
2908 
2909  // fold (mul x, -1) -> 0-x
2910  if (N1IsConst && ConstValue1.isAllOnesValue()) {
2911  SDLoc DL(N);
2912  return DAG.getNode(ISD::SUB, DL, VT,
2913  DAG.getConstant(0, DL, VT), N0);
2914  }
2915  // fold (mul x, (1 << c)) -> x << c
2916  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
2917  DAG.isKnownToBeAPowerOfTwo(N1) &&
2918  (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
2919  SDLoc DL(N);
2920  SDValue LogBase2 = BuildLogBase2(N1, DL);
2921  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
2922  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
2923  return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
2924  }
2925  // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
2926  if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
2927  unsigned Log2Val = (-ConstValue1).logBase2();
2928  SDLoc DL(N);
2929  // FIXME: If the input is something that is easily negated (e.g. a
2930  // single-use add), we should put the negate there.
2931  return DAG.getNode(ISD::SUB, DL, VT,
2932  DAG.getConstant(0, DL, VT),
2933  DAG.getNode(ISD::SHL, DL, VT, N0,
2934  DAG.getConstant(Log2Val, DL,
2935  getShiftAmountTy(N0.getValueType()))));
2936  }
2937 
2938  // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
2939  if (N0.getOpcode() == ISD::SHL &&
2940  isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
2941  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
2942  SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
2944  return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
2945  }
2946 
2947  // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
2948  // use.
2949  {
2950  SDValue Sh(nullptr, 0), Y(nullptr, 0);
2951 
2952  // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
2953  if (N0.getOpcode() == ISD::SHL &&
2955  N0.getNode()->hasOneUse()) {
2956  Sh = N0; Y = N1;
2957  } else if (N1.getOpcode() == ISD::SHL &&
2959  N1.getNode()->hasOneUse()) {
2960  Sh = N1; Y = N0;
2961  }
2962 
2963  if (Sh.getNode()) {
2964  SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
2965  return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
2966  }
2967  }
2968 
2969  // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
2971  N0.getOpcode() == ISD::ADD &&
2973  isMulAddWithConstProfitable(N, N0, N1))
2974  return DAG.getNode(ISD::ADD, SDLoc(N), VT,
2975  DAG.getNode(ISD::MUL, SDLoc(N0), VT,
2976  N0.getOperand(0), N1),
2977  DAG.getNode(ISD::MUL, SDLoc(N1), VT,
2978  N0.getOperand(1), N1));
2979 
2980  // reassociate mul
2981  if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
2982  return RMUL;
2983 
2984  return SDValue();
2985 }
2986 
2987 /// Return true if divmod libcall is available.
2988 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
2989  const TargetLowering &TLI) {
2990  RTLIB::Libcall LC;
2991  EVT NodeType = Node->getValueType(0);
2992  if (!NodeType.isSimple())
2993  return false;
2994  switch (NodeType.getSimpleVT().SimpleTy) {
2995  default: return false; // No libcall for vector types.
2996  case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
2997  case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
2998  case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
2999  case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
3000  case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
3001  }
3002 
3003  return TLI.getLibcallName(LC) != nullptr;
3004 }
3005 
3006 /// Issue divrem if both quotient and remainder are needed.
3007 SDValue DAGCombiner::useDivRem(SDNode *Node) {
3008  if (Node->use_empty())
3009  return SDValue(); // This is a dead node, leave it alone.
3010 
3011  unsigned Opcode = Node->getOpcode();
3012  bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
3013  unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
3014 
3015  // DivMod lib calls can still work on non-legal types if using lib-calls.
3016  EVT VT = Node->getValueType(0);
3017  if (VT.isVector() || !VT.isInteger())
3018  return SDValue();
3019 
3020  if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
3021  return SDValue();
3022 
3023  // If DIVREM is going to get expanded into a libcall,
3024  // but there is no libcall available, then don't combine.
3025  if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
3026  !isDivRemLibcallAvailable(Node, isSigned, TLI))
3027  return SDValue();
3028 
3029  // If div is legal, it's better to do the normal expansion
3030  unsigned OtherOpcode = 0;
3031  if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
3032  OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
3033  if (TLI.isOperationLegalOrCustom(Opcode, VT))
3034  return SDValue();
3035  } else {
3036  OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3037  if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
3038  return SDValue();
3039  }
3040 
3041  SDValue Op0 = Node->getOperand(0);
3042  SDValue Op1 = Node->getOperand(1);
3043  SDValue combined;
3044  for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
3045  UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
3046  SDNode *User = *UI;
3047  if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
3048  User->use_empty())
3049  continue;
3050  // Convert the other matching node(s), too;
3051  // otherwise, the DIVREM may get target-legalized into something
3052  // target-specific that we won't be able to recognize.
3053  unsigned UserOpc = User->getOpcode();
3054  if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
3055  User->getOperand(0) == Op0 &&
3056  User->getOperand(1) == Op1) {
3057  if (!combined) {
3058  if (UserOpc == OtherOpcode) {
3059  SDVTList VTs = DAG.getVTList(VT, VT);
3060  combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
3061  } else if (UserOpc == DivRemOpc) {
3062  combined = SDValue(User, 0);
3063  } else {
3064  assert(UserOpc == Opcode);
3065  continue;
3066  }
3067  }
3068  if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
3069  CombineTo(User, combined);
3070  else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
3071  CombineTo(User, combined.getValue(1));
3072  }
3073  }
3074  return combined;
3075 }
3076 
3078  SDValue N0 = N->getOperand(0);
3079  SDValue N1 = N->getOperand(1);
3080  EVT VT = N->getValueType(0);
3081  SDLoc DL(N);
3082 
3083  // X / undef -> undef
3084  // X % undef -> undef
3085  // X / 0 -> undef
3086  // X % 0 -> undef
3087  // NOTE: This includes vectors where any divisor element is zero/undef.
3088  if (DAG.isUndef(N->getOpcode(), {N0, N1}))
3089  return DAG.getUNDEF(VT);
3090 
3091  // undef / X -> 0
3092  // undef % X -> 0
3093  if (N0.isUndef())
3094  return DAG.getConstant(0, DL, VT);
3095 
3096  return SDValue();
3097 }
3098 
3099 SDValue DAGCombiner::visitSDIV(SDNode *N) {
3100  SDValue N0 = N->getOperand(0);
3101  SDValue N1 = N->getOperand(1);
3102  EVT VT = N->getValueType(0);
3103  EVT CCVT = getSetCCResultType(VT);
3104 
3105  // fold vector ops
3106  if (VT.isVector())
3107  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3108  return FoldedVOp;
3109 
3110  SDLoc DL(N);
3111 
3112  // fold (sdiv c1, c2) -> c1/c2
3115  if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
3116  return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
3117  // fold (sdiv X, 1) -> X
3118  if (N1C && N1C->isOne())
3119  return N0;
3120  // fold (sdiv X, -1) -> 0-X
3121  if (N1C && N1C->isAllOnesValue())
3122  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
3123  // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
3124  if (N1C && N1C->getAPIntValue().isMinSignedValue())
3125  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3126  DAG.getConstant(1, DL, VT),
3127  DAG.getConstant(0, DL, VT));
3128 
3129  if (SDValue V = simplifyDivRem(N, DAG))
3130  return V;
3131 
3132  if (SDValue NewSel = foldBinOpIntoSelect(N))
3133  return NewSel;
3134 
3135  // If we know the sign bits of both operands are zero, strength reduce to a
3136  // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
3137  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3138  return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
3139 
3140  if (SDValue V = visitSDIVLike(N0, N1, N))
3141  return V;
3142 
3143  // sdiv, srem -> sdivrem
3144  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3145  // true. Otherwise, we break the simplification logic in visitREM().
3147  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3148  if (SDValue DivRem = useDivRem(N))
3149  return DivRem;
3150 
3151  return SDValue();
3152 }
3153 
3154 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3155  SDLoc DL(N);
3156  EVT VT = N->getValueType(0);
3157  EVT CCVT = getSetCCResultType(VT);
3158  unsigned BitWidth = VT.getScalarSizeInBits();
3159 
3160  // Helper for determining whether a value is a power-2 constant scalar or a
3161  // vector of such elements.
3162  auto IsPowerOfTwo = [](ConstantSDNode *C) {
3163  if (C->isNullValue() || C->isOpaque())
3164  return false;
3165  if (C->getAPIntValue().isPowerOf2())
3166  return true;
3167  if ((-C->getAPIntValue()).isPowerOf2())
3168  return true;
3169  return false;
3170  };
3171 
3172  // fold (sdiv X, pow2) -> simple ops after legalize
3173  // FIXME: We check for the exact bit here because the generic lowering gives
3174  // better results in that case. The target-specific lowering should learn how
3175  // to handle exact sdivs efficiently.
3176  if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
3177  // Target-specific implementation of sdiv x, pow2.
3178  if (SDValue Res = BuildSDIVPow2(N))
3179  return Res;
3180 
3181  // Create constants that are functions of the shift amount value.
3182  EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
3183  SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
3184  SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
3185  C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
3186  SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
3187  if (!isConstantOrConstantVector(Inexact))
3188  return SDValue();
3189 
3190  // Splat the sign bit into the register
3191  SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
3192  DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
3193  AddToWorklist(Sign.getNode());
3194 
3195  // Add (N0 < 0) ? abs2 - 1 : 0;
3196  SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
3197  AddToWorklist(Srl.getNode());
3198  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
3199  AddToWorklist(Add.getNode());
3200  SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
3201  AddToWorklist(Sra.getNode());
3202 
3203  // Special case: (sdiv X, 1) -> X
3204  // Special Case: (sdiv X, -1) -> 0-X
3205  SDValue One = DAG.getConstant(1, DL, VT);
3206  SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
3207  SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
3208  SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
3209  SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
3210  Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
3211 
3212  // If dividing by a positive value, we're done. Otherwise, the result must
3213  // be negated.
3214  SDValue Zero = DAG.getConstant(0, DL, VT);
3215  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
3216 
3217  // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
3218  SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
3219  SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
3220  return Res;
3221  }
3222 
3223  // If integer divide is expensive and we satisfy the requirements, emit an
3224  // alternate sequence. Targets may check function attributes for size/speed
3225  // trade-offs.
3227  if (isConstantOrConstantVector(N1) &&
3228  !TLI.isIntDivCheap(N->getValueType(0), Attr))
3229  if (SDValue Op = BuildSDIV(N))
3230  return Op;
3231 
3232  return SDValue();
3233 }
3234 
3235 SDValue DAGCombiner::visitUDIV(SDNode *N) {
3236  SDValue N0 = N->getOperand(0);
3237  SDValue N1 = N->getOperand(1);
3238  EVT VT = N->getValueType(0);
3239  EVT CCVT = getSetCCResultType(VT);
3240 
3241  // fold vector ops
3242  if (VT.isVector())
3243  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3244  return FoldedVOp;
3245 
3246  SDLoc DL(N);
3247 
3248  // fold (udiv c1, c2) -> c1/c2
3251  if (N0C && N1C)
3252  if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
3253  N0C, N1C))
3254  return Folded;
3255  // fold (udiv X, 1) -> X
3256  if (N1C && N1C->isOne())
3257  return N0;
3258  // fold (udiv X, -1) -> select(X == -1, 1, 0)
3259  if (N1C && N1C->getAPIntValue().isAllOnesValue())
3260  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3261  DAG.getConstant(1, DL, VT),
3262  DAG.getConstant(0, DL, VT));
3263 
3264  if (SDValue V = simplifyDivRem(N, DAG))
3265  return V;
3266 
3267  if (SDValue NewSel = foldBinOpIntoSelect(N))
3268  return NewSel;
3269 
3270  if (SDValue V = visitUDIVLike(N0, N1, N))
3271  return V;
3272 
3273  // sdiv, srem -> sdivrem
3274  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3275  // true. Otherwise, we break the simplification logic in visitREM().
3277  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3278  if (SDValue DivRem = useDivRem(N))
3279  return DivRem;
3280 
3281  return SDValue();
3282 }
3283 
3284 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3285  SDLoc DL(N);
3286  EVT VT = N->getValueType(0);
3287 
3288  // fold (udiv x, (1 << c)) -> x >>u c
3289  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3290  DAG.isKnownToBeAPowerOfTwo(N1)) {
3291  SDValue LogBase2 = BuildLogBase2(N1, DL);
3292  AddToWorklist(LogBase2.getNode());
3293 
3294  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3295  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3296  AddToWorklist(Trunc.getNode());
3297  return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3298  }
3299 
3300  // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
3301  if (N1.getOpcode() == ISD::SHL) {
3302  SDValue N10 = N1.getOperand(0);
3303  if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
3304  DAG.isKnownToBeAPowerOfTwo(N10)) {
3305  SDValue LogBase2 = BuildLogBase2(N10, DL);
3306  AddToWorklist(LogBase2.getNode());
3307 
3308  EVT ADDVT = N1.getOperand(1).getValueType();
3309  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
3310  AddToWorklist(Trunc.getNode());
3311  SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
3312  AddToWorklist(Add.getNode());
3313  return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
3314  }
3315  }
3316 
3317  // fold (udiv x, c) -> alternate
3319  if (isConstantOrConstantVector(N1) &&
3320  !TLI.isIntDivCheap(N->getValueType(0), Attr))
3321  if (SDValue Op = BuildUDIV(N))
3322  return Op;
3323 
3324  return SDValue();
3325 }
3326 
3327 // handles ISD::SREM and ISD::UREM
3328 SDValue DAGCombiner::visitREM(SDNode *N) {
3329  unsigned Opcode = N->getOpcode();
3330  SDValue N0 = N->getOperand(0);
3331  SDValue N1 = N->getOperand(1);
3332  EVT VT = N->getValueType(0);
3333  EVT CCVT = getSetCCResultType(VT);
3334 
3335  bool isSigned = (Opcode == ISD::SREM);
3336  SDLoc DL(N);
3337 
3338  // fold (rem c1, c2) -> c1%c2
3341  if (N0C && N1C)
3342  if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
3343  return Folded;
3344  // fold (urem X, -1) -> select(X == -1, 0, x)
3345  if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
3346  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3347  DAG.getConstant(0, DL, VT), N0);
3348 
3349  if (SDValue V = simplifyDivRem(N, DAG))
3350  return V;
3351 
3352  if (SDValue NewSel = foldBinOpIntoSelect(N))
3353  return NewSel;
3354 
3355  if (isSigned) {
3356  // If we know the sign bits of both operands are zero, strength reduce to a
3357  // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
3358  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3359  return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
3360  } else {
3361  SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
3362  if (DAG.isKnownToBeAPowerOfTwo(N1)) {
3363  // fold (urem x, pow2) -> (and x, pow2-1)
3364  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3365  AddToWorklist(Add.getNode());
3366  return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3367  }
3368  if (N1.getOpcode() == ISD::SHL &&
3369  DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
3370  // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
3371  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3372  AddToWorklist(Add.getNode());
3373  return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3374  }
3375  }
3376 
3378 
3379  // If X/C can be simplified by the division-by-constant logic, lower
3380  // X%C to the equivalent of X-X/C*C.
3381  // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
3382  // speculative DIV must not cause a DIVREM conversion. We guard against this
3383  // by skipping the simplification if isIntDivCheap(). When div is not cheap,
3384  // combine will not return a DIVREM. Regardless, checking cheapness here
3385  // makes sense since the simplification results in fatter code.
3386  if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
3387  SDValue OptimizedDiv =
3388  isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
3389  if (OptimizedDiv.getNode() && OptimizedDiv.getOpcode() != ISD::UDIVREM &&
3390  OptimizedDiv.getOpcode() != ISD::SDIVREM) {
3391  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
3392  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3393  AddToWorklist(OptimizedDiv.getNode());
3394  AddToWorklist(Mul.getNode());
3395  return Sub;
3396  }
3397  }
3398 
3399  // sdiv, srem -> sdivrem
3400  if (SDValue DivRem = useDivRem(N))
3401  return DivRem.getValue(1);
3402 
3403  return SDValue();
3404 }
3405 
3406 SDValue DAGCombiner::visitMULHS(SDNode *N) {
3407  SDValue N0 = N->getOperand(0);
3408  SDValue N1 = N->getOperand(1);
3409  EVT VT = N->getValueType(0);
3410  SDLoc DL(N);
3411 
3412  if (VT.isVector()) {
3413  // fold (mulhs x, 0) -> 0
3415  return N1;
3417  return N0;
3418  }
3419 
3420  // fold (mulhs x, 0) -> 0
3421  if (isNullConstant(N1))
3422  return N1;
3423  // fold (mulhs x, 1) -> (sra x, size(x)-1)
3424  if (isOneConstant(N1))
3425  return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
3426  DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
3427  getShiftAmountTy(N0.getValueType())));
3428 
3429  // fold (mulhs x, undef) -> 0
3430  if (N0.isUndef() || N1.isUndef())
3431  return DAG.getConstant(0, DL, VT);
3432 
3433  // If the type twice as wide is legal, transform the mulhs to a wider multiply
3434  // plus a shift.
3435  if (VT.isSimple() && !VT.isVector()) {
3436  MVT Simple = VT.getSimpleVT();
3437  unsigned SimpleSize = Simple.getSizeInBits();
3438  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3439  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3440  N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
3441  N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
3442  N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3443  N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3444  DAG.getConstant(SimpleSize, DL,
3445  getShiftAmountTy(N1.getValueType())));
3446  return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3447  }
3448  }
3449 
3450  return SDValue();
3451 }
3452 
3453 SDValue DAGCombiner::visitMULHU(SDNode *N) {
3454  SDValue N0 = N->getOperand(0);
3455  SDValue N1 = N->getOperand(1);
3456  EVT VT = N->getValueType(0);
3457  SDLoc DL(N);
3458 
3459  if (VT.isVector()) {
3460  // fold (mulhu x, 0) -> 0
3462  return N1;
3464  return N0;
3465  }
3466 
3467  // fold (mulhu x, 0) -> 0
3468  if (isNullConstant(N1))
3469  return N1;
3470  // fold (mulhu x, 1) -> 0
3471  if (isOneConstant(N1))
3472  return DAG.getConstant(0, DL, N0.getValueType());
3473  // fold (mulhu x, undef) -> 0
3474  if (N0.isUndef() || N1.isUndef())
3475  return DAG.getConstant(0, DL, VT);
3476 
3477  // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
3478  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3479  DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
3480  SDLoc DL(N);
3481  unsigned NumEltBits = VT.getScalarSizeInBits();
3482  SDValue LogBase2 = BuildLogBase2(N1, DL);
3483  SDValue SRLAmt = DAG.getNode(
3484  ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
3485  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3486  SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
3487  return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3488  }
3489 
3490  // If the type twice as wide is legal, transform the mulhu to a wider multiply
3491  // plus a shift.
3492  if (VT.isSimple() && !VT.isVector()) {
3493  MVT Simple = VT.getSimpleVT();
3494  unsigned SimpleSize = Simple.getSizeInBits();
3495  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3496  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3497  N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
3498  N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
3499  N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3500  N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3501  DAG.getConstant(SimpleSize, DL,
3502  getShiftAmountTy(N1.getValueType())));
3503  return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3504  }
3505  }
3506 
3507  return SDValue();
3508 }
3509 
3510 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
3511 /// give the opcodes for the two computations that are being performed. Return
3512 /// true if a simplification was made.
3513 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
3514  unsigned HiOp) {
3515  // If the high half is not needed, just compute the low half.
3516  bool HiExists = N->hasAnyUseOfValue(1);
3517  if (!HiExists &&
3518  (!LegalOperations ||
3519  TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
3520  SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3521  return CombineTo(N, Res, Res);
3522  }
3523 
3524  // If the low half is not needed, just compute the high half.
3525  bool LoExists = N->hasAnyUseOfValue(0);
3526  if (!LoExists &&
3527  (!LegalOperations ||
3528  TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
3529  SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3530  return CombineTo(N, Res, Res);
3531  }
3532 
3533  // If both halves are used, return as it is.
3534  if (LoExists && HiExists)
3535  return SDValue();
3536 
3537  // If the two computed results can be simplified separately, separate them.
3538  if (LoExists) {
3539  SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3540  AddToWorklist(Lo.getNode());
3541  SDValue LoOpt = combine(Lo.getNode());
3542  if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
3543  (!LegalOperations ||
3544  TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
3545  return CombineTo(N, LoOpt, LoOpt);
3546  }
3547 
3548  if (HiExists) {
3549  SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3550  AddToWorklist(Hi.getNode());
3551  SDValue HiOpt = combine(Hi.getNode());
3552  if (HiOpt.getNode() && HiOpt != Hi &&
3553  (!LegalOperations ||
3554  TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
3555  return CombineTo(N, HiOpt, HiOpt);
3556  }
3557 
3558  return SDValue();
3559 }
3560 
3561 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
3562  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
3563  return Res;
3564 
3565  EVT VT = N->getValueType(0);
3566  SDLoc DL(N);
3567 
3568  // If the type is twice as wide is legal, transform the mulhu to a wider
3569  // multiply plus a shift.
3570  if (VT.isSimple() && !VT.isVector()) {
3571  MVT Simple = VT.getSimpleVT();
3572  unsigned SimpleSize = Simple.getSizeInBits();
3573  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3574  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3575  SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
3576  SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
3577  Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3578  // Compute the high part as N1.
3579  Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3580  DAG.getConstant(SimpleSize, DL,
3581  getShiftAmountTy(Lo.getValueType())));
3582  Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3583  // Compute the low part as N0.
3584  Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3585  return CombineTo(N, Lo, Hi);
3586  }
3587  }
3588 
3589  return SDValue();
3590 }
3591 
3592 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
3593  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
3594  return Res;
3595 
3596  EVT VT = N->getValueType(0);
3597  SDLoc DL(N);
3598 
3599  // If the type is twice as wide is legal, transform the mulhu to a wider
3600  // multiply plus a shift.
3601  if (VT.isSimple() && !VT.isVector()) {
3602  MVT Simple = VT.getSimpleVT();
3603  unsigned SimpleSize = Simple.getSizeInBits();
3604  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3605  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3606  SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
3607  SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
3608  Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3609  // Compute the high part as N1.
3610  Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3611  DAG.getConstant(SimpleSize, DL,
3612  getShiftAmountTy(Lo.getValueType())));
3613  Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3614  // Compute the low part as N0.
3615  Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3616  return CombineTo(N, Lo, Hi);
3617  }
3618  }
3619 
3620  return SDValue();
3621 }
3622 
3623 SDValue DAGCombiner::visitSMULO(SDNode *N) {
3624  // (smulo x, 2) -> (saddo x, x)
3625  if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3626  if (C2->getAPIntValue() == 2)
3627  return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
3628  N->getOperand(0), N->getOperand(0));
3629 
3630  return SDValue();
3631 }
3632 
3633 SDValue DAGCombiner::visitUMULO(SDNode *N) {
3634  // (umulo x, 2) -> (uaddo x, x)
3635  if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3636  if (C2->getAPIntValue() == 2)
3637  return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
3638  N->getOperand(0), N->getOperand(0));
3639 
3640  return SDValue();
3641 }
3642 
3643 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
3644  SDValue N0 = N->getOperand(0);
3645  SDValue N1 = N->getOperand(1);
3646  EVT VT = N0.getValueType();
3647 
3648  // fold vector ops
3649  if (VT.isVector())
3650  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3651  return FoldedVOp;
3652 
3653  // fold operation with constant operands.
3656  if (N0C && N1C)
3657  return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
3658 
3659  // canonicalize constant to RHS
3662  return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
3663 
3664  // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
3665  // Only do this if the current op isn't legal and the flipped is.
3666  unsigned Opcode = N->getOpcode();
3667  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3668  if (!TLI.isOperationLegal(Opcode, VT) &&
3669  (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
3670  (N1.isUndef() || DAG.SignBitIsZero(N1))) {
3671  unsigned AltOpcode;
3672  switch (Opcode) {
3673  case ISD::SMIN: AltOpcode = ISD::UMIN; break;
3674  case ISD::SMAX: AltOpcode = ISD::UMAX; break;
3675  case ISD::UMIN: AltOpcode = ISD::SMIN; break;
3676  case ISD::UMAX: AltOpcode = ISD::SMAX; break;
3677  default: llvm_unreachable("Unknown MINMAX opcode");
3678  }
3679  if (TLI.isOperationLegal(AltOpcode, VT))
3680  return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
3681  }
3682 
3683  return SDValue();
3684 }
3685 
3686 /// If this is a binary operator with two operands of the same opcode, try to
3687 /// simplify it.
3688 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
3689  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3690  EVT VT = N0.getValueType();
3691  assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
3692 
3693  // Bail early if none of these transforms apply.
3694  if (N0.getNumOperands() == 0) return SDValue();
3695 
3696  // For each of OP in AND/OR/XOR:
3697  // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
3698  // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
3699  // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
3700  // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y))
3701  // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
3702  //
3703  // do not sink logical op inside of a vector extend, since it may combine
3704  // into a vsetcc.
3705  EVT Op0VT = N0.getOperand(0).getValueType();
3706  if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
3707  N0.getOpcode() == ISD::SIGN_EXTEND ||
3708  N0.getOpcode() == ISD::BSWAP ||
3709  // Avoid infinite looping with PromoteIntBinOp.
3710  (N0.getOpcode() == ISD::ANY_EXTEND &&
3711  (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
3712  (N0.getOpcode() == ISD::TRUNCATE &&
3713  (!TLI.isZExtFree(VT, Op0VT) ||
3714  !TLI.isTruncateFree(Op0VT, VT)) &&
3715  TLI.isTypeLegal(Op0VT))) &&
3716  !VT.isVector() &&
3717  Op0VT == N1.getOperand(0).getValueType() &&
3718  (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
3719  SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3720  N0.getOperand(0).getValueType(),
3721  N0.getOperand(0), N1.getOperand(0));
3722  AddToWorklist(ORNode.getNode());
3723  return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
3724  }
3725 
3726  // For each of OP in SHL/SRL/SRA/AND...
3727  // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
3728  // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z)
3729  // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
3730  if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
3731  N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
3732  N0.getOperand(1) == N1.getOperand(1)) {
3733  SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3734  N0.getOperand(0).getValueType(),
3735  N0.getOperand(0), N1.getOperand(0));
3736  AddToWorklist(ORNode.getNode());
3737  return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
3738  ORNode, N0.getOperand(1));
3739  }
3740 
3741  // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
3742  // Only perform this optimization up until type legalization, before
3743  // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
3744  // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
3745  // we don't want to undo this promotion.
3746  // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
3747  // on scalars.
3748  if ((N0.getOpcode() == ISD::BITCAST ||
3749  N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
3750  Level <= AfterLegalizeTypes) {
3751  SDValue In0 = N0.getOperand(0);
3752  SDValue In1 = N1.getOperand(0);
3753  EVT In0Ty = In0.getValueType();
3754  EVT In1Ty = In1.getValueType();
3755  SDLoc DL(N);
3756  // If both incoming values are integers, and the original types are the
3757  // same.
3758  if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
3759  SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
3760  SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
3761  AddToWorklist(Op.getNode());
3762  return BC;
3763  }
3764  }
3765 
3766  // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
3767  // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
3768  // If both shuffles use the same mask, and both shuffle within a single
3769  // vector, then it is worthwhile to move the swizzle after the operation.
3770  // The type-legalizer generates this pattern when loading illegal
3771  // vector types from memory. In many cases this allows additional shuffle
3772  // optimizations.
3773  // There are other cases where moving the shuffle after the xor/and/or
3774  // is profitable even if shuffles don't perform a swizzle.
3775  // If both shuffles use the same mask, and both shuffles have the same first
3776  // or second operand, then it might still be profitable to move the shuffle
3777  // after the xor/and/or operation.
3778  if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
3779  ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
3780  ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
3781 
3782  assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
3783  "Inputs to shuffles are not the same type");
3784 
3785  // Check that both shuffles use the same mask. The masks are known to be of
3786  // the same length because the result vector type is the same.
3787  // Check also that shuffles have only one use to avoid introducing extra
3788  // instructions.
3789  if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
3790  SVN0->getMask().equals(SVN1->getMask())) {
3791  SDValue ShOp = N0->getOperand(1);
3792 
3793  // Don't try to fold this node if it requires introducing a
3794  // build vector of all zeros that might be illegal at this stage.
3795  if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3796  if (!LegalTypes)
3797  ShOp = DAG.getConstant(0, SDLoc(N), VT);
3798  else
3799  ShOp = SDValue();
3800  }
3801 
3802  // (AND (shuf (A, C), shuf (B, C))) -> shuf (AND (A, B), C)
3803  // (OR (shuf (A, C), shuf (B, C))) -> shuf (OR (A, B), C)
3804  // (XOR (shuf (A, C), shuf (B, C))) -> shuf (XOR (A, B), V_0)
3805  if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
3806  SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3807  N0->getOperand(0), N1->getOperand(0));
3808  AddToWorklist(NewNode.getNode());
3809  return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
3810  SVN0->getMask());
3811  }
3812 
3813  // Don't try to fold this node if it requires introducing a
3814  // build vector of all zeros that might be illegal at this stage.
3815  ShOp = N0->getOperand(0);
3816  if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3817  if (!LegalTypes)
3818  ShOp = DAG.getConstant(0, SDLoc(N), VT);
3819  else
3820  ShOp = SDValue();
3821  }
3822 
3823  // (AND (shuf (C, A), shuf (C, B))) -> shuf (C, AND (A, B))
3824  // (OR (shuf (C, A), shuf (C, B))) -> shuf (C, OR (A, B))
3825  // (XOR (shuf (C, A), shuf (C, B))) -> shuf (V_0, XOR (A, B))
3826  if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
3827  SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3828  N0->getOperand(1), N1->getOperand(1));
3829  AddToWorklist(NewNode.getNode());
3830  return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
3831  SVN0->getMask());
3832  }
3833  }
3834  }
3835 
3836  return SDValue();
3837 }
3838 
3839 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
3840 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
3841  const SDLoc &DL) {
3842  SDValue LL, LR, RL, RR, N0CC, N1CC;
3843  if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
3844  !isSetCCEquivalent(N1, RL, RR, N1CC))
3845  return SDValue();
3846 
3847  assert(N0.getValueType() == N1.getValueType() &&
3848  "Unexpected operand types for bitwise logic op");
3849  assert(LL.getValueType() == LR.getValueType() &&
3850  RL.getValueType() == RR.getValueType() &&
3851  "Unexpected operand types for setcc");
3852 
3853  // If we're here post-legalization or the logic op type is not i1, the logic
3854  // op type must match a setcc result type. Also, all folds require new
3855  // operations on the left and right operands, so those types must match.
3856  EVT VT = N0.getValueType();
3857  EVT OpVT = LL.getValueType();
3858  if (LegalOperations || VT.getScalarType() != MVT::i1)
3859  if (VT != getSetCCResultType(OpVT))
3860  return SDValue();
3861  if (OpVT != RL.getValueType())
3862  return SDValue();
3863 
3864  ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
3865  ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
3866  bool IsInteger = OpVT.isInteger();
3867  if (LR == RR && CC0 == CC1 && IsInteger) {
3868  bool IsZero = isNullConstantOrNullSplatConstant(LR);
3869  bool IsNeg1 = isAllOnesConstantOrAllOnesSplatConstant(LR);
3870 
3871  // All bits clear?
3872  bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
3873  // All sign bits clear?
3874  bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
3875  // Any bits set?
3876  bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
3877  // Any sign bits set?
3878  bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
3879 
3880  // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
3881  // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
3882  // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
3883  // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
3884  if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
3885  SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
3886  AddToWorklist(Or.getNode());
3887  return DAG.getSetCC(DL, VT, Or, LR, CC1);
3888  }
3889 
3890  // All bits set?
3891  bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
3892  // All sign bits set?
3893  bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
3894  // Any bits clear?
3895  bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
3896  // Any sign bits clear?
3897  bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
3898 
3899  // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
3900  // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
3901  // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
3902  // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
3903  if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
3904  SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
3905  AddToWorklist(And.getNode());
3906  return DAG.getSetCC(DL, VT, And, LR, CC1);
3907  }
3908  }
3909 
3910  // TODO: What is the 'or' equivalent of this fold?
3911  // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
3912  if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
3913  IsInteger && CC0 == ISD::SETNE &&
3914  ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
3915  (isAllOnesConstant(LR) && isNullConstant(RR)))) {
3916  SDValue One = DAG.getConstant(1, DL, OpVT);
3917  SDValue Two = DAG.getConstant(2, DL, OpVT);
3918  SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
3919  AddToWorklist(Add.getNode());
3920  return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
3921  }
3922 
3923  // Try more general transforms if the predicates match and the only user of
3924  // the compares is the 'and' or 'or'.
3925  if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
3926  N0.hasOneUse() && N1.hasOneUse()) {
3927  // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
3928  // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
3929  if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
3930  SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
3931  SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
3932  SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
3933  SDValue Zero = DAG.getConstant(0, DL, OpVT);
3934  return DAG.getSetCC(DL, VT, Or, Zero, CC1);
3935  }
3936  }
3937 
3938  // Canonicalize equivalent operands to LL == RL.
3939  if (LL == RR && LR == RL) {
3940  CC1 = ISD::getSetCCSwappedOperands(CC1);
3941  std::swap(RL, RR);
3942  }
3943 
3944  // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3945  // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3946  if (LL == RL && LR == RR) {
3947  ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
3948  : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
3949  if (NewCC != ISD::SETCC_INVALID &&
3950  (!LegalOperations ||
3951  (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
3952  TLI.isOperationLegal(ISD::SETCC, OpVT))))
3953  return DAG.getSetCC(DL, VT, LL, LR, NewCC);
3954  }
3955 
3956  return SDValue();
3957 }
3958 
3959 /// This contains all DAGCombine rules which reduce two values combined by
3960 /// an And operation to a single value. This makes them reusable in the context
3961 /// of visitSELECT(). Rules involving constants are not included as
3962 /// visitSELECT() already handles those cases.
3963 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
3964  EVT VT = N1.getValueType();
3965  SDLoc DL(N);
3966 
3967  // fold (and x, undef) -> 0
3968  if (N0.isUndef() || N1.isUndef())
3969  return DAG.getConstant(0, DL, VT);
3970 
3971  if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
3972  return V;
3973 
3974  if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
3975  VT.getSizeInBits() <= 64) {
3976  if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3977  if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
3978  // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
3979  // immediate for an add, but it is legal if its top c2 bits are set,
3980  // transform the ADD so the immediate doesn't need to be materialized
3981  // in a register.
3982  APInt ADDC = ADDI->getAPIntValue();
3983  APInt SRLC = SRLI->getAPIntValue();
3984  if (ADDC.getMinSignedBits() <= 64 &&
3985  SRLC.ult(VT.getSizeInBits()) &&
3986  !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3988  SRLC.getZExtValue());
3989  if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
3990  ADDC |= Mask;
3991  if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3992  SDLoc DL0(N0);
3993  SDValue NewAdd =
3994  DAG.getNode(ISD::ADD, DL0, VT,
3995  N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
3996  CombineTo(N0.getNode(), NewAdd);
3997  // Return N so it doesn't get rechecked!
3998  return SDValue(N, 0);
3999  }
4000  }
4001  }
4002  }
4003  }
4004  }
4005 
4006  // Reduce bit extract of low half of an integer to the narrower type.
4007  // (and (srl i64:x, K), KMask) ->
4008  // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
4009  if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
4010  if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
4011  if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4012  unsigned Size = VT.getSizeInBits();
4013  const APInt &AndMask = CAnd->getAPIntValue();
4014  unsigned ShiftBits = CShift->getZExtValue();
4015 
4016  // Bail out, this node will probably disappear anyway.
4017  if (ShiftBits == 0)
4018  return SDValue();
4019 
4020  unsigned MaskBits = AndMask.countTrailingOnes();
4021  EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
4022 
4023  if (AndMask.isMask() &&
4024  // Required bits must not span the two halves of the integer and
4025  // must fit in the half size type.
4026  (ShiftBits + MaskBits <= Size / 2) &&
4027  TLI.isNarrowingProfitable(VT, HalfVT) &&
4028  TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
4029  TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
4030  TLI.isTruncateFree(VT, HalfVT) &&
4031  TLI.isZExtFree(HalfVT, VT)) {
4032  // The isNarrowingProfitable is to avoid regressions on PPC and
4033  // AArch64 which match a few 64-bit bit insert / bit extract patterns
4034  // on downstream users of this. Those patterns could probably be
4035  // extended to handle extensions mixed in.
4036 
4037  SDValue SL(N0);
4038  assert(MaskBits <= Size);
4039 
4040  // Extracting the highest bit of the low half.
4041  EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
4042  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
4043  N0.getOperand(0));
4044 
4045  SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
4046  SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
4047  SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
4048  SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
4049  return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
4050  }
4051  }
4052  }
4053  }
4054 
4055  return SDValue();
4056 }
4057 
4058 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
4059  EVT LoadResultTy, EVT &ExtVT) {
4060  if (!AndC->getAPIntValue().isMask())
4061  return false;
4062 
4063  unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
4064 
4065  ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4066  EVT LoadedVT = LoadN->getMemoryVT();
4067 
4068  if (ExtVT == LoadedVT &&
4069  (!LegalOperations ||
4070  TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
4071  // ZEXTLOAD will match without needing to change the size of the value being
4072  // loaded.
4073  return true;
4074  }
4075 
4076  // Do not change the width of a volatile load.
4077  if (LoadN->isVolatile())
4078  return false;
4079 
4080  // Do not generate loads of non-round integer types since these can
4081  // be expensive (and would be wrong if the type is not byte sized).
4082  if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
4083  return false;
4084 
4085  if (LegalOperations &&
4086  !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
4087  return false;
4088 
4089  if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
4090  return false;
4091 
4092  return true;
4093 }
4094 
4095 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
4096  ISD::LoadExtType ExtType, EVT &MemVT,
4097  unsigned ShAmt) {
4098  if (!LDST)
4099  return false;
4100  // Only allow byte offsets.
4101  if (ShAmt % 8)
4102  return false;
4103 
4104  // Do not generate loads of non-round integer types since these can
4105  // be expensive (and would be wrong if the type is not byte sized).
4106  if (!MemVT.isRound())
4107  return false;
4108 
4109  // Don't change the width of a volatile load.
4110  if (LDST->isVolatile())
4111  return false;
4112 
4113  // Verify that we are actually reducing a load width here.
4114  if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
4115  return false;
4116 
4117  // Ensure that this isn't going to produce an unsupported unaligned access.
4118  if (ShAmt &&
4119  !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
4120  LDST->getAddressSpace(), ShAmt / 8))
4121  return false;
4122 
4123  // It's not possible to generate a constant of extended or untyped type.
4124  EVT PtrType = LDST->getBasePtr().getValueType();
4125  if (PtrType == MVT::Untyped || PtrType.isExtended())
4126  return false;
4127 
4128  if (isa<LoadSDNode>(LDST)) {
4129  LoadSDNode *Load = cast<LoadSDNode>(LDST);
4130  // Don't transform one with multiple uses, this would require adding a new
4131  // load.
4132  if (!SDValue(Load, 0).hasOneUse())
4133  return false;
4134 
4135  if (LegalOperations &&
4136  !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
4137  return false;
4138 
4139  // For the transform to be legal, the load must produce only two values
4140  // (the value loaded and the chain). Don't transform a pre-increment
4141  // load, for example, which produces an extra value. Otherwise the
4142  // transformation is not equivalent, and the downstream logic to replace
4143  // uses gets things wrong.
4144  if (Load->getNumValues() > 2)
4145  return false;
4146 
4147  // If the load that we're shrinking is an extload and we're not just
4148  // discarding the extension we can't simply shrink the load. Bail.
4149  // TODO: It would be possible to merge the extensions in some cases.
4150  if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
4151  Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4152  return false;
4153 
4154  if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
4155  return false;
4156  } else {
4157  assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
4158  StoreSDNode *Store = cast<StoreSDNode>(LDST);
4159  // Can't write outside the original store
4160  if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4161  return false;
4162 
4163  if (LegalOperations &&
4164  !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
4165  return false;
4166  }
4167  return true;
4168 }
4169 
4170 bool DAGCombiner::SearchForAndLoads(SDNode *N,
4172  SmallPtrSetImpl<SDNode*> &NodesWithConsts,
4174  SDNode *&NodeToMask) {
4175  // Recursively search for the operands, looking for loads which can be
4176  // narrowed.
4177  for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
4178  SDValue Op = N->getOperand(i);
4179 
4180  if (Op.getValueType().isVector())
4181  return false;
4182 
4183  // Some constants may need fixing up later if they are too large.
4184  if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4185  if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
4186  (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
4187  NodesWithConsts.insert(N);
4188  continue;
4189  }
4190 
4191  if (!Op.hasOneUse())
4192  return false;
4193 
4194  switch(Op.getOpcode()) {
4195  case ISD::LOAD: {
4196  auto *Load = cast<LoadSDNode>(Op);
4197  EVT ExtVT;
4198  if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
4199  isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
4200 
4201  // ZEXTLOAD is already small enough.
4202  if (Load->getExtensionType() == ISD::ZEXTLOAD &&
4203  ExtVT.bitsGE(Load->getMemoryVT()))
4204  continue;
4205 
4206  // Use LE to convert equal sized loads to zext.
4207  if (ExtVT.bitsLE(Load->getMemoryVT()))
4208  Loads.insert(Load);
4209 
4210  continue;
4211  }
4212  return false;
4213  }
4214  case ISD::ZERO_EXTEND:
4215  case ISD::AssertZext: {
4216  unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
4217  EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4218  EVT VT = Op.getOpcode() == ISD::AssertZext ?
4219  cast<VTSDNode>(Op.getOperand(1))->getVT() :
4220  Op.getOperand(0).getValueType();
4221 
4222  // We can accept extending nodes if the mask is wider or an equal
4223  // width to the original type.
4224  if (ExtVT.bitsGE(VT))
4225  continue;
4226  break;
4227  }
4228  case ISD::OR:
4229  case ISD::XOR:
4230  case ISD::AND:
4231  if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
4232  NodeToMask))
4233  return false;
4234  continue;
4235  }
4236 
4237  // Allow one node which will masked along with any loads found.
4238  if (NodeToMask)
4239  return false;
4240 
4241  // Also ensure that the node to be masked only produces one data result.
4242  NodeToMask = Op.getNode();
4243  if (NodeToMask->getNumValues() > 1) {
4244  bool HasValue = false;
4245  for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
4246  MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
4247  if (VT != MVT::Glue && VT != MVT::Other) {
4248  if (HasValue) {
4249  NodeToMask = nullptr;
4250  return false;
4251  }
4252  HasValue = true;
4253  }
4254  }
4255  assert(HasValue && "Node to be masked has no data result?");
4256  }
4257  }
4258  return true;
4259 }
4260 
4261 bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
4262  auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
4263  if (!Mask)
4264  return false;
4265 
4266  if (!Mask->getAPIntValue().isMask())
4267  return false;
4268 
4269  // No need to do anything if the and directly uses a load.
4270  if (isa<LoadSDNode>(N->getOperand(0)))
4271  return false;
4272 
4274  SmallPtrSet<SDNode*, 2> NodesWithConsts;
4275  SDNode *FixupNode = nullptr;
4276  if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
4277  if (Loads.size() == 0)
4278  return false;
4279 
4280  LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
4281  SDValue MaskOp = N->getOperand(1);
4282 
4283  // If it exists, fixup the single node we allow in the tree that needs
4284  // masking.
4285  if (FixupNode) {
4286  LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
4287  SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
4288  FixupNode->getValueType(0),
4289  SDValue(FixupNode, 0), MaskOp);
4290  DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
4291  if (And.getOpcode() == ISD ::AND)
4292  DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
4293  }
4294 
4295  // Narrow any constants that need it.
4296  for (auto *LogicN : NodesWithConsts) {
4297  SDValue Op0 = LogicN->getOperand(0);
4298  SDValue Op1 = LogicN->getOperand(1);
4299 
4300  if (isa<ConstantSDNode>(Op0))
4301  std::swap(Op0, Op1);
4302 
4303  SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
4304  Op1, MaskOp);
4305 
4306  DAG.UpdateNodeOperands(LogicN, Op0, And);
4307  }
4308 
4309  // Create narrow loads.
4310  for (auto *Load : Loads) {
4311  LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
4312  SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
4313  SDValue(Load, 0), MaskOp);
4314  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
4315  if (And.getOpcode() == ISD ::AND)
4316  And = SDValue(
4317  DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
4318  SDValue NewLoad = ReduceLoadWidth(And.getNode());
4319  assert(NewLoad &&
4320  "Shouldn't be masking the load if it can't be narrowed");
4321  CombineTo(Load, NewLoad, NewLoad.getValue(1));
4322  }
4323  DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
4324  return true;
4325  }
4326  return false;
4327 }
4328 
4329 // Unfold
4330 // x & (-1 'logical shift' y)
4331 // To
4332 // (x 'opposite logical shift' y) 'logical shift' y
4333 // if it is better for performance.
4334 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
4335  assert(N->getOpcode() == ISD::AND);
4336 
4337  SDValue N0 = N->getOperand(0);
4338  SDValue N1 = N->getOperand(1);
4339 
4340  // Do we actually prefer shifts over mask?
4341  if (!TLI.preferShiftsToClearExtremeBits(N0))
4342  return SDValue();
4343 
4344  // Try to match (-1 '[outer] logical shift' y)
4345  unsigned OuterShift;
4346  unsigned InnerShift; // The opposite direction to the OuterShift.
4347  SDValue Y; // Shift amount.
4348  auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
4349  if (!M.hasOneUse())
4350  return false;
4351  OuterShift = M->getOpcode();
4352  if (OuterShift == ISD::SHL)
4353  InnerShift = ISD::SRL;
4354  else if (OuterShift == ISD::SRL)
4355  InnerShift = ISD::SHL;
4356  else
4357  return false;
4358  if (!isAllOnesConstant(M->getOperand(0)))
4359  return false;
4360  Y = M->getOperand(1);
4361  return true;
4362  };
4363 
4364  SDValue X;
4365  if (matchMask(N1))
4366  X = N0;
4367  else if (matchMask(N0))
4368  X = N1;
4369  else
4370  return SDValue();
4371 
4372  SDLoc DL(N);
4373  EVT VT = N->getValueType(0);
4374 
4375  // tmp = x 'opposite logical shift' y
4376  SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
4377  // ret = tmp 'logical shift' y
4378  SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
4379 
4380  return T1;
4381 }
4382 
4383 SDValue DAGCombiner::visitAND(SDNode *N) {
4384  SDValue N0 = N->getOperand(0);
4385  SDValue N1 = N->getOperand(1);
4386  EVT VT = N1.getValueType();
4387 
4388  // x & x --> x
4389  if (N0 == N1)
4390  return N0;
4391 
4392  // fold vector ops
4393  if (VT.isVector()) {
4394  if (SDValue FoldedVOp = SimplifyVBinOp(N))
4395  return FoldedVOp;
4396 
4397  // fold (and x, 0) -> 0, vector edition
4399  // do not return N0, because undef node may exist in N0
4401  SDLoc(N), N0.getValueType());
4403  // do not return N1, because undef node may exist in N1
4405  SDLoc(N), N1.getValueType());
4406 
4407  // fold (and x, -1) -> x, vector edition
4409  return N1;
4411  return N0;
4412  }
4413 
4414  // fold (and c1, c2) -> c1&c2
4417  if (N0C && N1C && !N1C->isOpaque())
4418  return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
4419  // canonicalize constant to RHS
4422  return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
4423  // fold (and x, -1) -> x
4424  if (isAllOnesConstant(N1))
4425  return N0;
4426  // if (and x, c) is known to be zero, return 0
4427  unsigned BitWidth = VT.getScalarSizeInBits();
4428  if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
4429  APInt::getAllOnesValue(BitWidth)))
4430  return DAG.getConstant(0, SDLoc(N), VT);
4431 
4432  if (SDValue NewSel = foldBinOpIntoSelect(N))
4433  return NewSel;
4434 
4435  // reassociate and
4436  if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
4437  return RAND;
4438 
4439  // Try to convert a constant mask AND into a shuffle clear mask.
4440  if (VT.isVector())
4441  if (SDValue Shuffle = XformToShuffleWithZero(N))
4442  return Shuffle;
4443 
4444  // fold (and (or x, C), D) -> D if (C & D) == D
4445  auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
4446  return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
4447  };
4448  if (N0.getOpcode() == ISD::OR &&
4449  ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
4450  return N1;
4451  // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
4452  if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
4453  SDValue N0Op0 = N0.getOperand(0);
4454  APInt Mask = ~N1C->getAPIntValue();
4455  Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
4456  if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
4457  SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
4458  N0.getValueType(), N0Op0);
4459 
4460  // Replace uses of the AND with uses of the Zero extend node.
4461  CombineTo(N, Zext);
4462 
4463  // We actually want to replace all uses of the any_extend with the
4464  // zero_extend, to avoid duplicating things. This will later cause this
4465  // AND to be folded.
4466  CombineTo(N0.getNode(), Zext);
4467  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4468  }
4469  }
4470  // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
4471  // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
4472  // already be zero by virtue of the width of the base type of the load.
4473  //
4474  // the 'X' node here can either be nothing or an extract_vector_elt to catch
4475  // more cases.
4476  if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4478  N0.getOperand(0).getOpcode() == ISD::LOAD &&
4479  N0.getOperand(0).getResNo() == 0) ||
4480  (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
4481  LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
4482  N0 : N0.getOperand(0) );
4483 
4484  // Get the constant (if applicable) the zero'th operand is being ANDed with.
4485  // This can be a pure constant or a vector splat, in which case we treat the
4486  // vector as a scalar and use the splat value.
4488  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
4489  Constant = C->getAPIntValue();
4490  } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
4491  APInt SplatValue, SplatUndef;
4492  unsigned SplatBitSize;
4493  bool HasAnyUndefs;
4494  bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
4495  SplatBitSize, HasAnyUndefs);
4496  if (IsSplat) {
4497  // Undef bits can contribute to a possible optimisation if set, so
4498  // set them.
4499  SplatValue |= SplatUndef;
4500 
4501  // The splat value may be something like "0x00FFFFFF", which means 0 for
4502  // the first vector value and FF for the rest, repeating. We need a mask
4503  // that will apply equally to all members of the vector, so AND all the
4504  // lanes of the constant together.
4505  EVT VT = Vector->getValueType(0);
4506  unsigned BitWidth = VT.getScalarSizeInBits();
4507 
4508  // If the splat value has been compressed to a bitlength lower
4509  // than the size of the vector lane, we need to re-expand it to
4510  // the lane size.
4511  if (BitWidth > SplatBitSize)
4512  for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
4513  SplatBitSize < BitWidth;
4514  SplatBitSize = SplatBitSize * 2)
4515  SplatValue |= SplatValue.shl(SplatBitSize);
4516 
4517  // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
4518  // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
4519  if (SplatBitSize % BitWidth == 0) {
4520  Constant = APInt::getAllOnesValue(BitWidth);
4521  for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
4522  Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
4523  }
4524  }
4525  }
4526 
4527  // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
4528  // actually legal and isn't going to get expanded, else this is a false
4529  // optimisation.
4530  bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
4531  Load->getValueType(0),
4532  Load->getMemoryVT());
4533 
4534  // Resize the constant to the same size as the original memory access before
4535  // extension. If it is still the AllOnesValue then this AND is completely
4536  // unneeded.
4537  Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
4538 
4539  bool B;
4540  switch (Load->getExtensionType()) {
4541  default: B = false; break;
4542  case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
4543  case ISD::ZEXTLOAD:
4544  case ISD::NON_EXTLOAD: B = true; break;
4545  }
4546 
4547  if (B && Constant.isAllOnesValue()) {
4548  // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
4549  // preserve semantics once we get rid of the AND.
4550  SDValue NewLoad(Load, 0);
4551 
4552  // Fold the AND away. NewLoad may get replaced immediately.
4553  CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
4554 
4555  if (Load->getExtensionType() == ISD::EXTLOAD) {
4556  NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
4557  Load->getValueType(0), SDLoc(Load),
4558  Load->getChain(), Load->getBasePtr(),
4559  Load->getOffset(), Load->getMemoryVT(),
4560  Load->getMemOperand());
4561  // Replace uses of the EXTLOAD with the new ZEXTLOAD.
4562  if (Load->getNumValues() == 3) {
4563  // PRE/POST_INC loads have 3 values.
4564  SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
4565  NewLoad.getValue(2) };
4566  CombineTo(Load, To, 3, true);
4567  } else {
4568  CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
4569  }
4570  }
4571 
4572  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4573  }
4574  }
4575 
4576  // fold (and (load x), 255) -> (zextload x, i8)
4577  // fold (and (extload x, i16), 255) -> (zextload x, i8)
4578  // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
4579  if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
4580  (N0.getOpcode() == ISD::ANY_EXTEND &&
4581  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
4582  if (SDValue Res = ReduceLoadWidth(N)) {
4583  LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
4584  ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
4585 
4586  AddToWorklist(N);
4587  CombineTo(LN0, Res, Res.getValue(1));
4588  return SDValue(N, 0);
4589  }
4590  }
4591 
4592  if (Level >= AfterLegalizeTypes) {
4593  // Attempt to propagate the AND back up to the leaves which, if they're
4594  // loads, can be combined to narrow loads and the AND node can be removed.
4595  // Perform after legalization so that extend nodes will already be
4596  // combined into the loads.
4597  if (BackwardsPropagateMask(N, DAG)) {
4598  return SDValue(N, 0);
4599  }
4600  }
4601 
4602  if (SDValue Combined = visitANDLike(N0, N1, N))
4603  return Combined;
4604 
4605  // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
4606  if (N0.getOpcode() == N1.getOpcode())
4607  if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
4608  return Tmp;
4609 
4610  // Masking the negated extension of a boolean is just the zero-extended
4611  // boolean:
4612  // and (sub 0, zext(bool X)), 1 --> zext(bool X)
4613  // and (sub 0, sext(bool X)), 1 --> zext(bool X)
4614  //
4615  // Note: the SimplifyDemandedBits fold below can make an information-losing
4616  // transform, and then we have no way to find this better fold.
4617  if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
4619  SDValue SubRHS = N0.getOperand(1);
4620  if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
4621  SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4622  return SubRHS;
4623  if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
4624  SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4625  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
4626  }
4627  }
4628 
4629  // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
4630  // fold (and (sra)) -> (and (srl)) when possible.
4631  if (SimplifyDemandedBits(SDValue(N, 0)))
4632  return SDValue(N, 0);
4633 
4634  // fold (zext_inreg (extload x)) -> (zextload x)
4635  if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
4636  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4637  EVT MemVT = LN0->getMemoryVT();
4638  // If we zero all the possible extended bits, then we can turn this into
4639  // a zextload if we are running before legalize or the operation is legal.
4640  unsigned BitWidth = N1.getScalarValueSizeInBits();
4641  if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4642  BitWidth - MemVT.getScalarSizeInBits())) &&
4643  ((!LegalOperations && !LN0->isVolatile()) ||
4644  TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
4645  SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4646  LN0->getChain(), LN0->getBasePtr(),
4647  MemVT, LN0->getMemOperand());
4648  AddToWorklist(N);
4649  CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4650  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4651  }
4652  }
4653  // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
4654  if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
4655  N0.hasOneUse()) {
4656  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4657  EVT MemVT = LN0->getMemoryVT();
4658  // If we zero all the possible extended bits, then we can turn this into
4659  // a zextload if we are running before legalize or the operation is legal.
4660  unsigned BitWidth = N1.getScalarValueSizeInBits();
4661  if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4662  BitWidth - MemVT.getScalarSizeInBits())) &&
4663  ((!LegalOperations && !LN0->isVolatile()) ||
4664  TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
4665  SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4666  LN0->getChain(), LN0->getBasePtr(),
4667  MemVT, LN0->getMemOperand());
4668  AddToWorklist(N);
4669  CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4670  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4671  }
4672  }
4673  // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
4674  if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
4675  if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
4676  N0.getOperand(1), false))
4677  return BSwap;
4678  }
4679 
4680  if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
4681  return Shifts;
4682 
4683  return SDValue();
4684 }
4685 
4686 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
4687 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
4688  bool DemandHighBits) {
4689  if (!LegalOperations)
4690  return SDValue();
4691 
4692  EVT VT = N->getValueType(0);
4693  if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
4694  return SDValue();
4695  if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
4696  return SDValue();
4697 
4698  // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
4699  bool LookPassAnd0 = false;
4700  bool LookPassAnd1 = false;
4701  if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
4702  std::swap(N0, N1);
4703  if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
4704  std::swap(N0, N1);
4705  if (N0.getOpcode() == ISD::AND) {
4706  if (!N0.getNode()->hasOneUse())
4707  return SDValue();
4709  // Also handle 0xffff since the LHS is guaranteed to have zeros there.
4710  // This is needed for X86.
4711  if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
4712  N01C->getZExtValue() != 0xFFFF))
4713  return SDValue();
4714  N0 = N0.getOperand(0);
4715  LookPassAnd0 = true;
4716  }
4717 
4718  if (N1.getOpcode() == ISD::AND) {
4719  if (!N1.getNode()->hasOneUse())
4720  return SDValue();
4722  if (!N11C || N11C->getZExtValue() != 0xFF)
4723  return SDValue();
4724  N1 = N1.getOperand(0);
4725  LookPassAnd1 = true;
4726  }
4727 
4728  if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
4729  std::swap(N0, N1);
4730  if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
4731  return SDValue();
4732  if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
4733  return SDValue();
4734 
4737  if (!N01C || !N11C)
4738  return SDValue();
4739  if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
4740  return SDValue();
4741 
4742  // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
4743  SDValue N00 = N0->getOperand(0);
4744  if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
4745  if (!N00.getNode()->hasOneUse())
4746  return SDValue();
4748  if (!N001C || N001C->getZExtValue() != 0xFF)
4749  return SDValue();
4750  N00 = N00.getOperand(0);
4751  LookPassAnd0 = true;
4752  }
4753 
4754  SDValue N10 = N1->getOperand(0);
4755  if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
4756  if (!N10.getNode()->hasOneUse())
4757  return SDValue();
4759  // Also allow 0xFFFF since the bits will be shifted out. This is needed
4760  // for X86.
4761  if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
4762  N101C->getZExtValue() != 0xFFFF))
4763  return SDValue();
4764  N10 = N10.getOperand(0);
4765  LookPassAnd1 = true;
4766  }
4767 
4768  if (N00 != N10)
4769  return SDValue();
4770 
4771  // Make sure everything beyond the low halfword gets set to zero since the SRL
4772  // 16 will clear the top bits.
4773  unsigned OpSizeInBits = VT.getSizeInBits();
4774  if (DemandHighBits && OpSizeInBits > 16) {
4775  // If the left-shift isn't masked out then the only way this is a bswap is
4776  // if all bits beyond the low 8 are 0. In that case the entire pattern
4777  // reduces to a left shift anyway: leave it for other parts of the combiner.
4778  if (!LookPassAnd0)
4779  return SDValue();
4780 
4781  // However, if the right shift isn't masked out then it might be because
4782  // it's not needed. See if we can spot that too.
4783  if (!LookPassAnd1 &&
4784  !DAG.MaskedValueIsZero(
4785  N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
4786  return SDValue();
4787  }
4788 
4789  SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
4790  if (OpSizeInBits > 16) {
4791  SDLoc DL(N);
4792  Res = DAG.getNode(ISD::SRL, DL, VT, Res,
4793  DAG.getConstant(OpSizeInBits - 16, DL,
4794  getShiftAmountTy(VT)));
4795  }
4796  return Res;
4797 }
4798 
4799 /// Return true if the specified node is an element that makes up a 32-bit
4800 /// packed halfword byteswap.
4801 /// ((x & 0x000000ff) << 8) |
4802 /// ((x & 0x0000ff00) >> 8) |
4803 /// ((x & 0x00ff0000) << 8) |
4804 /// ((x & 0xff000000) >> 8)
4806  if (!N.getNode()->hasOneUse())
4807  return false;
4808 
4809  unsigned Opc = N.getOpcode();
4810  if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
4811  return false;
4812 
4813  SDValue N0 = N.getOperand(0);
4814  unsigned Opc0 = N0.getOpcode();
4815  if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
4816  return false;
4817 
4818  ConstantSDNode *N1C = nullptr;
4819  // SHL or SRL: look upstream for AND mask operand
4820  if (Opc == ISD::AND)
4821  N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4822  else if (Opc0 == ISD::AND)
4823  N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4824  if (!N1C)
4825  return false;
4826 
4827  unsigned MaskByteOffset;
4828  switch (N1C->getZExtValue()) {
4829  default:
4830  return false;
4831  case 0xFF: MaskByteOffset = 0; break;
4832  case 0xFF00: MaskByteOffset = 1; break;
4833  case 0xFFFF:
4834  // In case demanded bits didn't clear the bits that will be shifted out.
4835  // This is needed for X86.
4836  if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
4837  MaskByteOffset = 1;
4838  break;
4839  }
4840  return false;
4841  case 0xFF0000: MaskByteOffset = 2; break;
4842  case 0xFF000000: MaskByteOffset = 3; break;
4843  }
4844 
4845  // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
4846  if (Opc == ISD::AND