LLVM  8.0.0svn
DAGCombiner.cpp
Go to the documentation of this file.
1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
11 // both before and after the DAG is legalized.
12 //
13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
14 // primarily intended to handle simplification opportunities that are implicit
15 // in the LLVM IR and exposed by the various codegen lowering phases.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SetVector.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/Statistic.h"
48 #include "llvm/IR/Attributes.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/DataLayout.h"
51 #include "llvm/IR/DerivedTypes.h"
52 #include "llvm/IR/Function.h"
53 #include "llvm/IR/LLVMContext.h"
54 #include "llvm/IR/Metadata.h"
55 #include "llvm/Support/Casting.h"
56 #include "llvm/Support/CodeGen.h"
58 #include "llvm/Support/Compiler.h"
59 #include "llvm/Support/Debug.h"
61 #include "llvm/Support/KnownBits.h"
67 #include <algorithm>
68 #include <cassert>
69 #include <cstdint>
70 #include <functional>
71 #include <iterator>
72 #include <string>
73 #include <tuple>
74 #include <utility>
75 
76 using namespace llvm;
77 
78 #define DEBUG_TYPE "dagcombine"
79 
80 STATISTIC(NodesCombined , "Number of dag nodes combined");
81 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
82 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
83 STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
84 STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
85 STATISTIC(SlicedLoads, "Number of load sliced");
86 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
87 
88 static cl::opt<bool>
89 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
90  cl::desc("Enable DAG combiner's use of IR alias analysis"));
91 
92 static cl::opt<bool>
93 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
94  cl::desc("Enable DAG combiner's use of TBAA"));
95 
96 #ifndef NDEBUG
98 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
99  cl::desc("Only use DAG-combiner alias analysis in this"
100  " function"));
101 #endif
102 
103 /// Hidden option to stress test load slicing, i.e., when this option
104 /// is enabled, load slicing bypasses most of its profitability guards.
105 static cl::opt<bool>
106 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
107  cl::desc("Bypass the profitability model of load slicing"),
108  cl::init(false));
109 
110 static cl::opt<bool>
111  MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
112  cl::desc("DAG combiner may split indexing from loads"));
113 
114 namespace {
115 
116  class DAGCombiner {
117  SelectionDAG &DAG;
118  const TargetLowering &TLI;
120  CodeGenOpt::Level OptLevel;
121  bool LegalOperations = false;
122  bool LegalTypes = false;
123  bool ForCodeSize;
124 
125  /// Worklist of all of the nodes that need to be simplified.
126  ///
127  /// This must behave as a stack -- new nodes to process are pushed onto the
128  /// back and when processing we pop off of the back.
129  ///
130  /// The worklist will not contain duplicates but may contain null entries
131  /// due to nodes being deleted from the underlying DAG.
132  SmallVector<SDNode *, 64> Worklist;
133 
134  /// Mapping from an SDNode to its position on the worklist.
135  ///
136  /// This is used to find and remove nodes from the worklist (by nulling
137  /// them) when they are deleted from the underlying DAG. It relies on
138  /// stable indices of nodes within the worklist.
139  DenseMap<SDNode *, unsigned> WorklistMap;
140 
141  /// Set of nodes which have been combined (at least once).
142  ///
143  /// This is used to allow us to reliably add any operands of a DAG node
144  /// which have not yet been combined to the worklist.
145  SmallPtrSet<SDNode *, 32> CombinedNodes;
146 
147  // AA - Used for DAG load/store alias analysis.
148  AliasAnalysis *AA;
149 
150  /// When an instruction is simplified, add all users of the instruction to
151  /// the work lists because they might get more simplified now.
152  void AddUsersToWorklist(SDNode *N) {
153  for (SDNode *Node : N->uses())
154  AddToWorklist(Node);
155  }
156 
157  /// Call the node-specific routine that folds each particular type of node.
158  SDValue visit(SDNode *N);
159 
160  public:
161  DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
162  : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
163  OptLevel(OL), AA(AA) {
164  ForCodeSize = DAG.getMachineFunction().getFunction().optForSize();
165 
166  MaximumLegalStoreInBits = 0;
167  for (MVT VT : MVT::all_valuetypes())
168  if (EVT(VT).isSimple() && VT != MVT::Other &&
169  TLI.isTypeLegal(EVT(VT)) &&
170  VT.getSizeInBits() >= MaximumLegalStoreInBits)
171  MaximumLegalStoreInBits = VT.getSizeInBits();
172  }
173 
174  /// Add to the worklist making sure its instance is at the back (next to be
175  /// processed.)
176  void AddToWorklist(SDNode *N) {
178  "Deleted Node added to Worklist");
179 
180  // Skip handle nodes as they can't usefully be combined and confuse the
181  // zero-use deletion strategy.
182  if (N->getOpcode() == ISD::HANDLENODE)
183  return;
184 
185  if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
186  Worklist.push_back(N);
187  }
188 
189  /// Remove all instances of N from the worklist.
190  void removeFromWorklist(SDNode *N) {
191  CombinedNodes.erase(N);
192 
193  auto It = WorklistMap.find(N);
194  if (It == WorklistMap.end())
195  return; // Not in the worklist.
196 
197  // Null out the entry rather than erasing it to avoid a linear operation.
198  Worklist[It->second] = nullptr;
199  WorklistMap.erase(It);
200  }
201 
202  void deleteAndRecombine(SDNode *N);
203  bool recursivelyDeleteUnusedNodes(SDNode *N);
204 
205  /// Replaces all uses of the results of one DAG node with new values.
206  SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
207  bool AddTo = true);
208 
209  /// Replaces all uses of the results of one DAG node with new values.
210  SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
211  return CombineTo(N, &Res, 1, AddTo);
212  }
213 
214  /// Replaces all uses of the results of one DAG node with new values.
215  SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
216  bool AddTo = true) {
217  SDValue To[] = { Res0, Res1 };
218  return CombineTo(N, To, 2, AddTo);
219  }
220 
221  void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
222 
223  private:
224  unsigned MaximumLegalStoreInBits;
225 
226  /// Check the specified integer node value to see if it can be simplified or
227  /// if things it uses can be simplified by bit propagation.
228  /// If so, return true.
229  bool SimplifyDemandedBits(SDValue Op) {
230  unsigned BitWidth = Op.getScalarValueSizeInBits();
231  APInt Demanded = APInt::getAllOnesValue(BitWidth);
232  return SimplifyDemandedBits(Op, Demanded);
233  }
234 
235  /// Check the specified vector node value to see if it can be simplified or
236  /// if things it uses can be simplified as it only uses some of the
237  /// elements. If so, return true.
238  bool SimplifyDemandedVectorElts(SDValue Op) {
239  unsigned NumElts = Op.getValueType().getVectorNumElements();
240  APInt Demanded = APInt::getAllOnesValue(NumElts);
241  return SimplifyDemandedVectorElts(Op, Demanded);
242  }
243 
244  bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
245  bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
246  bool AssumeSingleUse = false);
247 
248  bool CombineToPreIndexedLoadStore(SDNode *N);
249  bool CombineToPostIndexedLoadStore(SDNode *N);
250  SDValue SplitIndexingFromLoad(LoadSDNode *LD);
251  bool SliceUpLoad(SDNode *N);
252 
253  // Scalars have size 0 to distinguish from singleton vectors.
254  SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
255  bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
256  bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
257 
258  /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
259  /// load.
260  ///
261  /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
262  /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
263  /// \param EltNo index of the vector element to load.
264  /// \param OriginalLoad load that EVE came from to be replaced.
265  /// \returns EVE on success SDValue() on failure.
266  SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
267  SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
268  void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
269  SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
270  SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
271  SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
272  SDValue PromoteIntBinOp(SDValue Op);
273  SDValue PromoteIntShiftOp(SDValue Op);
274  SDValue PromoteExtend(SDValue Op);
275  bool PromoteLoad(SDValue Op);
276 
277  /// Call the node-specific routine that knows how to fold each
278  /// particular type of node. If that doesn't do anything, try the
279  /// target-specific DAG combines.
280  SDValue combine(SDNode *N);
281 
282  // Visitation implementation - Implement dag node combining for different
283  // node types. The semantics are as follows:
284  // Return Value:
285  // SDValue.getNode() == 0 - No change was made
286  // SDValue.getNode() == N - N was replaced, is dead and has been handled.
287  // otherwise - N should be replaced by the returned Operand.
288  //
289  SDValue visitTokenFactor(SDNode *N);
290  SDValue visitMERGE_VALUES(SDNode *N);
291  SDValue visitADD(SDNode *N);
292  SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
293  SDValue visitSUB(SDNode *N);
294  SDValue visitADDC(SDNode *N);
295  SDValue visitUADDO(SDNode *N);
296  SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
297  SDValue visitSUBC(SDNode *N);
298  SDValue visitUSUBO(SDNode *N);
299  SDValue visitADDE(SDNode *N);
300  SDValue visitADDCARRY(SDNode *N);
301  SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
302  SDValue visitSUBE(SDNode *N);
303  SDValue visitSUBCARRY(SDNode *N);
304  SDValue visitMUL(SDNode *N);
305  SDValue useDivRem(SDNode *N);
306  SDValue visitSDIV(SDNode *N);
307  SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
308  SDValue visitUDIV(SDNode *N);
309  SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
310  SDValue visitREM(SDNode *N);
311  SDValue visitMULHU(SDNode *N);
312  SDValue visitMULHS(SDNode *N);
313  SDValue visitSMUL_LOHI(SDNode *N);
314  SDValue visitUMUL_LOHI(SDNode *N);
315  SDValue visitSMULO(SDNode *N);
316  SDValue visitUMULO(SDNode *N);
317  SDValue visitIMINMAX(SDNode *N);
318  SDValue visitAND(SDNode *N);
319  SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
320  SDValue visitOR(SDNode *N);
321  SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
322  SDValue visitXOR(SDNode *N);
323  SDValue SimplifyVBinOp(SDNode *N);
324  SDValue visitSHL(SDNode *N);
325  SDValue visitSRA(SDNode *N);
326  SDValue visitSRL(SDNode *N);
327  SDValue visitRotate(SDNode *N);
328  SDValue visitABS(SDNode *N);
329  SDValue visitBSWAP(SDNode *N);
330  SDValue visitBITREVERSE(SDNode *N);
331  SDValue visitCTLZ(SDNode *N);
332  SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
333  SDValue visitCTTZ(SDNode *N);
334  SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
335  SDValue visitCTPOP(SDNode *N);
336  SDValue visitSELECT(SDNode *N);
337  SDValue visitVSELECT(SDNode *N);
338  SDValue visitSELECT_CC(SDNode *N);
339  SDValue visitSETCC(SDNode *N);
340  SDValue visitSETCCCARRY(SDNode *N);
341  SDValue visitSIGN_EXTEND(SDNode *N);
342  SDValue visitZERO_EXTEND(SDNode *N);
343  SDValue visitANY_EXTEND(SDNode *N);
344  SDValue visitAssertExt(SDNode *N);
345  SDValue visitSIGN_EXTEND_INREG(SDNode *N);
346  SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
347  SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
348  SDValue visitTRUNCATE(SDNode *N);
349  SDValue visitBITCAST(SDNode *N);
350  SDValue visitBUILD_PAIR(SDNode *N);
351  SDValue visitFADD(SDNode *N);
352  SDValue visitFSUB(SDNode *N);
353  SDValue visitFMUL(SDNode *N);
354  SDValue visitFMA(SDNode *N);
355  SDValue visitFDIV(SDNode *N);
356  SDValue visitFREM(SDNode *N);
357  SDValue visitFSQRT(SDNode *N);
358  SDValue visitFCOPYSIGN(SDNode *N);
359  SDValue visitFPOW(SDNode *N);
360  SDValue visitSINT_TO_FP(SDNode *N);
361  SDValue visitUINT_TO_FP(SDNode *N);
362  SDValue visitFP_TO_SINT(SDNode *N);
363  SDValue visitFP_TO_UINT(SDNode *N);
364  SDValue visitFP_ROUND(SDNode *N);
365  SDValue visitFP_ROUND_INREG(SDNode *N);
366  SDValue visitFP_EXTEND(SDNode *N);
367  SDValue visitFNEG(SDNode *N);
368  SDValue visitFABS(SDNode *N);
369  SDValue visitFCEIL(SDNode *N);
370  SDValue visitFTRUNC(SDNode *N);
371  SDValue visitFFLOOR(SDNode *N);
372  SDValue visitFMINNUM(SDNode *N);
373  SDValue visitFMAXNUM(SDNode *N);
374  SDValue visitBRCOND(SDNode *N);
375  SDValue visitBR_CC(SDNode *N);
376  SDValue visitLOAD(SDNode *N);
377 
378  SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
379  SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
380 
381  SDValue visitSTORE(SDNode *N);
382  SDValue visitINSERT_VECTOR_ELT(SDNode *N);
383  SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
384  SDValue visitBUILD_VECTOR(SDNode *N);
385  SDValue visitCONCAT_VECTORS(SDNode *N);
386  SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
387  SDValue visitVECTOR_SHUFFLE(SDNode *N);
388  SDValue visitSCALAR_TO_VECTOR(SDNode *N);
389  SDValue visitINSERT_SUBVECTOR(SDNode *N);
390  SDValue visitMLOAD(SDNode *N);
391  SDValue visitMSTORE(SDNode *N);
392  SDValue visitMGATHER(SDNode *N);
393  SDValue visitMSCATTER(SDNode *N);
394  SDValue visitFP_TO_FP16(SDNode *N);
395  SDValue visitFP16_TO_FP(SDNode *N);
396 
397  SDValue visitFADDForFMACombine(SDNode *N);
398  SDValue visitFSUBForFMACombine(SDNode *N);
399  SDValue visitFMULForFMADistributiveCombine(SDNode *N);
400 
401  SDValue XformToShuffleWithZero(SDNode *N);
402  SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
403  SDValue N1, SDNodeFlags Flags);
404 
405  SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
406 
407  SDValue foldSelectOfConstants(SDNode *N);
408  SDValue foldVSelectOfConstants(SDNode *N);
409  SDValue foldBinOpIntoSelect(SDNode *BO);
410  bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
411  SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
412  SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
413  SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
414  SDValue N2, SDValue N3, ISD::CondCode CC,
415  bool NotExtCompare = false);
416  SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
417  SDValue N2, SDValue N3, ISD::CondCode CC);
418  SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
419  const SDLoc &DL);
420  SDValue unfoldMaskedMerge(SDNode *N);
421  SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
422  SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
423  const SDLoc &DL, bool foldBooleans);
424  SDValue rebuildSetCC(SDValue N);
425 
426  bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
427  SDValue &CC) const;
428  bool isOneUseSetCC(SDValue N) const;
429 
430  SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
431  unsigned HiOp);
432  SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
433  SDValue CombineExtLoad(SDNode *N);
434  SDValue CombineZExtLogicopShiftLoad(SDNode *N);
435  SDValue combineRepeatedFPDivisors(SDNode *N);
436  SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
437  SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
438  SDValue BuildSDIV(SDNode *N);
439  SDValue BuildSDIVPow2(SDNode *N);
440  SDValue BuildUDIV(SDNode *N);
441  SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
442  SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
443  SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
444  SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
445  SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
446  SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
447  SDNodeFlags Flags, bool Reciprocal);
448  SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
449  SDNodeFlags Flags, bool Reciprocal);
450  SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
451  bool DemandHighBits = true);
452  SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
453  SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
454  SDValue InnerPos, SDValue InnerNeg,
455  unsigned PosOpcode, unsigned NegOpcode,
456  const SDLoc &DL);
457  SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
458  SDValue MatchLoadCombine(SDNode *N);
459  SDValue ReduceLoadWidth(SDNode *N);
460  SDValue ReduceLoadOpStoreWidth(SDNode *N);
462  SDValue TransformFPLoadStorePair(SDNode *N);
463  SDValue convertBuildVecZextToZext(SDNode *N);
464  SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
465  SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
466  SDValue reduceBuildVecToShuffle(SDNode *N);
467  SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
468  ArrayRef<int> VectorMask, SDValue VecIn1,
469  SDValue VecIn2, unsigned LeftIdx);
470  SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
471 
472  /// Walk up chain skipping non-aliasing memory nodes,
473  /// looking for aliasing nodes and adding them to the Aliases vector.
474  void GatherAllAliases(SDNode *N, SDValue OriginalChain,
475  SmallVectorImpl<SDValue> &Aliases);
476 
477  /// Return true if there is any possibility that the two addresses overlap.
478  bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
479 
480  /// Walk up chain skipping non-aliasing memory nodes, looking for a better
481  /// chain (aliasing node.)
482  SDValue FindBetterChain(SDNode *N, SDValue Chain);
483 
484  /// Try to replace a store and any possibly adjacent stores on
485  /// consecutive chains with better chains. Return true only if St is
486  /// replaced.
487  ///
488  /// Notice that other chains may still be replaced even if the function
489  /// returns false.
490  bool findBetterNeighborChains(StoreSDNode *St);
491 
492  /// Holds a pointer to an LSBaseSDNode as well as information on where it
493  /// is located in a sequence of memory operations connected by a chain.
494  struct MemOpLink {
495  // Ptr to the mem node.
496  LSBaseSDNode *MemNode;
497 
498  // Offset from the base ptr.
499  int64_t OffsetFromBase;
500 
501  MemOpLink(LSBaseSDNode *N, int64_t Offset)
502  : MemNode(N), OffsetFromBase(Offset) {}
503  };
504 
505  /// This is a helper function for visitMUL to check the profitability
506  /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
507  /// MulNode is the original multiply, AddNode is (add x, c1),
508  /// and ConstNode is c2.
509  bool isMulAddWithConstProfitable(SDNode *MulNode,
510  SDValue &AddNode,
511  SDValue &ConstNode);
512 
513  /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
514  /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
515  /// the type of the loaded value to be extended.
516  bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
517  EVT LoadResultTy, EVT &ExtVT);
518 
519  /// Helper function to calculate whether the given Load/Store can have its
520  /// width reduced to ExtVT.
521  bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
522  EVT &MemVT, unsigned ShAmt = 0);
523 
524  /// Used by BackwardsPropagateMask to find suitable loads.
525  bool SearchForAndLoads(SDNode *N, SmallPtrSetImpl<LoadSDNode*> &Loads,
526  SmallPtrSetImpl<SDNode*> &NodesWithConsts,
527  ConstantSDNode *Mask, SDNode *&NodeToMask);
528  /// Attempt to propagate a given AND node back to load leaves so that they
529  /// can be combined into narrow loads.
530  bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
531 
532  /// Helper function for MergeConsecutiveStores which merges the
533  /// component store chains.
534  SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
535  unsigned NumStores);
536 
537  /// This is a helper function for MergeConsecutiveStores. When the
538  /// source elements of the consecutive stores are all constants or
539  /// all extracted vector elements, try to merge them into one
540  /// larger store introducing bitcasts if necessary. \return True
541  /// if a merged store was created.
542  bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
543  EVT MemVT, unsigned NumStores,
544  bool IsConstantSrc, bool UseVector,
545  bool UseTrunc);
546 
547  /// This is a helper function for MergeConsecutiveStores. Stores
548  /// that potentially may be merged with St are placed in
549  /// StoreNodes. RootNode is a chain predecessor to all store
550  /// candidates.
551  void getStoreMergeCandidates(StoreSDNode *St,
552  SmallVectorImpl<MemOpLink> &StoreNodes,
553  SDNode *&Root);
554 
555  /// Helper function for MergeConsecutiveStores. Checks if
556  /// candidate stores have indirect dependency through their
557  /// operands. RootNode is the predecessor to all stores calculated
558  /// by getStoreMergeCandidates and is used to prune the dependency check.
559  /// \return True if safe to merge.
560  bool checkMergeStoreCandidatesForDependencies(
561  SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
562  SDNode *RootNode);
563 
564  /// Merge consecutive store operations into a wide store.
565  /// This optimization uses wide integers or vectors when possible.
566  /// \return number of stores that were merged into a merged store (the
567  /// affected nodes are stored as a prefix in \p StoreNodes).
568  bool MergeConsecutiveStores(StoreSDNode *St);
569 
570  /// Try to transform a truncation where C is a constant:
571  /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
572  ///
573  /// \p N needs to be a truncation and its first operand an AND. Other
574  /// requirements are checked by the function (e.g. that trunc is
575  /// single-use) and if missed an empty SDValue is returned.
576  SDValue distributeTruncateThroughAnd(SDNode *N);
577 
578  /// Helper function to determine whether the target supports operation
579  /// given by \p Opcode for type \p VT, that is, whether the operation
580  /// is legal or custom before legalizing operations, and whether is
581  /// legal (but not custom) after legalization.
582  bool hasOperation(unsigned Opcode, EVT VT) {
583  if (LegalOperations)
584  return TLI.isOperationLegal(Opcode, VT);
585  return TLI.isOperationLegalOrCustom(Opcode, VT);
586  }
587 
588  public:
589  /// Runs the dag combiner on all nodes in the work list
590  void Run(CombineLevel AtLevel);
591 
592  SelectionDAG &getDAG() const { return DAG; }
593 
594  /// Returns a type large enough to hold any valid shift amount - before type
595  /// legalization these can be huge.
596  EVT getShiftAmountTy(EVT LHSTy) {
597  assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
598  return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
599  }
600 
601  /// This method returns true if we are running before type legalization or
602  /// if the specified VT is legal.
603  bool isTypeLegal(const EVT &VT) {
604  if (!LegalTypes) return true;
605  return TLI.isTypeLegal(VT);
606  }
607 
608  /// Convenience wrapper around TargetLowering::getSetCCResultType
609  EVT getSetCCResultType(EVT VT) const {
610  return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
611  }
612 
613  void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
614  SDValue OrigLoad, SDValue ExtLoad,
616  };
617 
618 /// This class is a DAGUpdateListener that removes any deleted
619 /// nodes from the worklist.
620 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
621  DAGCombiner &DC;
622 
623 public:
624  explicit WorklistRemover(DAGCombiner &dc)
625  : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
626 
627  void NodeDeleted(SDNode *N, SDNode *E) override {
628  DC.removeFromWorklist(N);
629  }
630 };
631 
632 } // end anonymous namespace
633 
634 //===----------------------------------------------------------------------===//
635 // TargetLowering::DAGCombinerInfo implementation
636 //===----------------------------------------------------------------------===//
637 
639  ((DAGCombiner*)DC)->AddToWorklist(N);
640 }
641 
643 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
644  return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
645 }
646 
648 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
649  return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
650 }
651 
653 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
654  return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
655 }
656 
659  return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
660 }
661 
662 //===----------------------------------------------------------------------===//
663 // Helper Functions
664 //===----------------------------------------------------------------------===//
665 
666 void DAGCombiner::deleteAndRecombine(SDNode *N) {
667  removeFromWorklist(N);
668 
669  // If the operands of this node are only used by the node, they will now be
670  // dead. Make sure to re-visit them and recursively delete dead nodes.
671  for (const SDValue &Op : N->ops())
672  // For an operand generating multiple values, one of the values may
673  // become dead allowing further simplification (e.g. split index
674  // arithmetic from an indexed load).
675  if (Op->hasOneUse() || Op->getNumValues() > 1)
676  AddToWorklist(Op.getNode());
677 
678  DAG.DeleteNode(N);
679 }
680 
681 /// Return 1 if we can compute the negated form of the specified expression for
682 /// the same cost as the expression itself, or 2 if we can compute the negated
683 /// form more cheaply than the expression itself.
684 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
685  const TargetLowering &TLI,
686  const TargetOptions *Options,
687  unsigned Depth = 0) {
688  // fneg is removable even if it has multiple uses.
689  if (Op.getOpcode() == ISD::FNEG) return 2;
690 
691  // Don't allow anything with multiple uses unless we know it is free.
692  EVT VT = Op.getValueType();
693  const SDNodeFlags Flags = Op->getFlags();
694  if (!Op.hasOneUse())
695  if (!(Op.getOpcode() == ISD::FP_EXTEND &&
696  TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
697  return 0;
698 
699  // Don't recurse exponentially.
700  if (Depth > 6) return 0;
701 
702  switch (Op.getOpcode()) {
703  default: return false;
704  case ISD::ConstantFP: {
705  if (!LegalOperations)
706  return 1;
707 
708  // Don't invert constant FP values after legalization unless the target says
709  // the negated constant is legal.
710  return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
711  TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
712  }
713  case ISD::FADD:
714  if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros())
715  return 0;
716 
717  // After operation legalization, it might not be legal to create new FSUBs.
718  if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
719  return 0;
720 
721  // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
722  if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
723  Options, Depth + 1))
724  return V;
725  // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
726  return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
727  Depth + 1);
728  case ISD::FSUB:
729  // We can't turn -(A-B) into B-A when we honor signed zeros.
730  if (!Options->NoSignedZerosFPMath &&
731  !Flags.hasNoSignedZeros())
732  return 0;
733 
734  // fold (fneg (fsub A, B)) -> (fsub B, A)
735  return 1;
736 
737  case ISD::FMUL:
738  case ISD::FDIV:
739  // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
740  if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
741  Options, Depth + 1))
742  return V;
743 
744  return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
745  Depth + 1);
746 
747  case ISD::FP_EXTEND:
748  case ISD::FP_ROUND:
749  case ISD::FSIN:
750  return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
751  Depth + 1);
752  }
753 }
754 
755 /// If isNegatibleForFree returns true, return the newly negated expression.
757  bool LegalOperations, unsigned Depth = 0) {
758  const TargetOptions &Options = DAG.getTarget().Options;
759  // fneg is removable even if it has multiple uses.
760  if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
761 
762  assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
763 
764  const SDNodeFlags Flags = Op.getNode()->getFlags();
765 
766  switch (Op.getOpcode()) {
767  default: llvm_unreachable("Unknown code");
768  case ISD::ConstantFP: {
769  APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
770  V.changeSign();
771  return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
772  }
773  case ISD::FADD:
774  assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros());
775 
776  // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
777  if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
778  DAG.getTargetLoweringInfo(), &Options, Depth+1))
779  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
780  GetNegatedExpression(Op.getOperand(0), DAG,
781  LegalOperations, Depth+1),
782  Op.getOperand(1), Flags);
783  // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
784  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
785  GetNegatedExpression(Op.getOperand(1), DAG,
786  LegalOperations, Depth+1),
787  Op.getOperand(0), Flags);
788  case ISD::FSUB:
789  // fold (fneg (fsub 0, B)) -> B
790  if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
791  if (N0CFP->isZero())
792  return Op.getOperand(1);
793 
794  // fold (fneg (fsub A, B)) -> (fsub B, A)
795  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
796  Op.getOperand(1), Op.getOperand(0), Flags);
797 
798  case ISD::FMUL:
799  case ISD::FDIV:
800  // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
801  if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
802  DAG.getTargetLoweringInfo(), &Options, Depth+1))
803  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
804  GetNegatedExpression(Op.getOperand(0), DAG,
805  LegalOperations, Depth+1),
806  Op.getOperand(1), Flags);
807 
808  // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
809  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
810  Op.getOperand(0),
811  GetNegatedExpression(Op.getOperand(1), DAG,
812  LegalOperations, Depth+1), Flags);
813 
814  case ISD::FP_EXTEND:
815  case ISD::FSIN:
816  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
817  GetNegatedExpression(Op.getOperand(0), DAG,
818  LegalOperations, Depth+1));
819  case ISD::FP_ROUND:
820  return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
821  GetNegatedExpression(Op.getOperand(0), DAG,
822  LegalOperations, Depth+1),
823  Op.getOperand(1));
824  }
825 }
826 
827 // APInts must be the same size for most operations, this helper
828 // function zero extends the shorter of the pair so that they match.
829 // We provide an Offset so that we can create bitwidths that won't overflow.
830 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
831  unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
832  LHS = LHS.zextOrSelf(Bits);
833  RHS = RHS.zextOrSelf(Bits);
834 }
835 
836 // Return true if this node is a setcc, or is a select_cc
837 // that selects between the target values used for true and false, making it
838 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
839 // the appropriate nodes based on the type of node we are checking. This
840 // simplifies life a bit for the callers.
841 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
842  SDValue &CC) const {
843  if (N.getOpcode() == ISD::SETCC) {
844  LHS = N.getOperand(0);
845  RHS = N.getOperand(1);
846  CC = N.getOperand(2);
847  return true;
848  }
849 
850  if (N.getOpcode() != ISD::SELECT_CC ||
851  !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
852  !TLI.isConstFalseVal(N.getOperand(3).getNode()))
853  return false;
854 
855  if (TLI.getBooleanContents(N.getValueType()) ==
857  return false;
858 
859  LHS = N.getOperand(0);
860  RHS = N.getOperand(1);
861  CC = N.getOperand(4);
862  return true;
863 }
864 
865 /// Return true if this is a SetCC-equivalent operation with only one use.
866 /// If this is true, it allows the users to invert the operation for free when
867 /// it is profitable to do so.
868 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
869  SDValue N0, N1, N2;
870  if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
871  return true;
872  return false;
873 }
874 
875 // Returns the SDNode if it is a constant float BuildVector
876 // or constant float.
878  if (isa<ConstantFPSDNode>(N))
879  return N.getNode();
881  return N.getNode();
882  return nullptr;
883 }
884 
885 // Determines if it is a constant integer or a build vector of constant
886 // integers (and undefs).
887 // Do not permit build vector implicit truncation.
888 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
889  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
890  return !(Const->isOpaque() && NoOpaques);
891  if (N.getOpcode() != ISD::BUILD_VECTOR)
892  return false;
893  unsigned BitWidth = N.getScalarValueSizeInBits();
894  for (const SDValue &Op : N->op_values()) {
895  if (Op.isUndef())
896  continue;
898  if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
899  (Const->isOpaque() && NoOpaques))
900  return false;
901  }
902  return true;
903 }
904 
905 // Determines if it is a constant null integer or a splatted vector of a
906 // constant null integer (with no undefs).
907 // Build vector implicit truncation is not an issue for null values.
909  // TODO: may want to use peekThroughBitcast() here.
910  if (ConstantSDNode *Splat = isConstOrConstSplat(N))
911  return Splat->isNullValue();
912  return false;
913 }
914 
915 // Determines if it is a constant integer of one or a splatted vector of a
916 // constant integer of one (with no undefs).
917 // Do not permit build vector implicit truncation.
919  // TODO: may want to use peekThroughBitcast() here.
920  unsigned BitWidth = N.getScalarValueSizeInBits();
921  if (ConstantSDNode *Splat = isConstOrConstSplat(N))
922  return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth;
923  return false;
924 }
925 
926 // Determines if it is a constant integer of all ones or a splatted vector of a
927 // constant integer of all ones (with no undefs).
928 // Do not permit build vector implicit truncation.
930  N = peekThroughBitcasts(N);
931  unsigned BitWidth = N.getScalarValueSizeInBits();
932  if (ConstantSDNode *Splat = isConstOrConstSplat(N))
933  return Splat->isAllOnesValue() &&
934  Splat->getAPIntValue().getBitWidth() == BitWidth;
935  return false;
936 }
937 
938 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
939 // undef's.
940 static bool isAnyConstantBuildVector(const SDNode *N) {
943 }
944 
945 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
946  SDValue N1, SDNodeFlags Flags) {
947  // Don't reassociate reductions.
948  if (Flags.hasVectorReduction())
949  return SDValue();
950 
951  EVT VT = N0.getValueType();
952  if (N0.getOpcode() == Opc && !N0->getFlags().hasVectorReduction()) {
955  // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
956  if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
957  return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
958  return SDValue();
959  }
960  if (N0.hasOneUse()) {
961  // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
962  // use
963  SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
964  if (!OpNode.getNode())
965  return SDValue();
966  AddToWorklist(OpNode.getNode());
967  return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
968  }
969  }
970  }
971 
972  if (N1.getOpcode() == Opc && !N1->getFlags().hasVectorReduction()) {
975  // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
976  if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
977  return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
978  return SDValue();
979  }
980  if (N1.hasOneUse()) {
981  // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
982  // use
983  SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
984  if (!OpNode.getNode())
985  return SDValue();
986  AddToWorklist(OpNode.getNode());
987  return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
988  }
989  }
990  }
991 
992  return SDValue();
993 }
994 
995 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
996  bool AddTo) {
997  assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
998  ++NodesCombined;
999  LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1000  To[0].getNode()->dump(&DAG);
1001  dbgs() << " and " << NumTo - 1 << " other values\n");
1002  for (unsigned i = 0, e = NumTo; i != e; ++i)
1003  assert((!To[i].getNode() ||
1004  N->getValueType(i) == To[i].getValueType()) &&
1005  "Cannot combine value to value of different type!");
1006 
1007  WorklistRemover DeadNodes(*this);
1008  DAG.ReplaceAllUsesWith(N, To);
1009  if (AddTo) {
1010  // Push the new nodes and any users onto the worklist
1011  for (unsigned i = 0, e = NumTo; i != e; ++i) {
1012  if (To[i].getNode()) {
1013  AddToWorklist(To[i].getNode());
1014  AddUsersToWorklist(To[i].getNode());
1015  }
1016  }
1017  }
1018 
1019  // Finally, if the node is now dead, remove it from the graph. The node
1020  // may not be dead if the replacement process recursively simplified to
1021  // something else needing this node.
1022  if (N->use_empty())
1023  deleteAndRecombine(N);
1024  return SDValue(N, 0);
1025 }
1026 
1027 void DAGCombiner::
1028 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1029  // Replace all uses. If any nodes become isomorphic to other nodes and
1030  // are deleted, make sure to remove them from our worklist.
1031  WorklistRemover DeadNodes(*this);
1032  DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1033 
1034  // Push the new node and any (possibly new) users onto the worklist.
1035  AddToWorklist(TLO.New.getNode());
1036  AddUsersToWorklist(TLO.New.getNode());
1037 
1038  // Finally, if the node is now dead, remove it from the graph. The node
1039  // may not be dead if the replacement process recursively simplified to
1040  // something else needing this node.
1041  if (TLO.Old.getNode()->use_empty())
1042  deleteAndRecombine(TLO.Old.getNode());
1043 }
1044 
1045 /// Check the specified integer node value to see if it can be simplified or if
1046 /// things it uses can be simplified by bit propagation. If so, return true.
1047 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
1048  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1049  KnownBits Known;
1050  if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO))
1051  return false;
1052 
1053  // Revisit the node.
1054  AddToWorklist(Op.getNode());
1055 
1056  // Replace the old value with the new one.
1057  ++NodesCombined;
1058  LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1059  dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1060  dbgs() << '\n');
1061 
1062  CommitTargetLoweringOpt(TLO);
1063  return true;
1064 }
1065 
1066 /// Check the specified vector node value to see if it can be simplified or
1067 /// if things it uses can be simplified as it only uses some of the elements.
1068 /// If so, return true.
1069 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
1070  bool AssumeSingleUse) {
1071  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1072  APInt KnownUndef, KnownZero;
1073  if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO,
1074  0, AssumeSingleUse))
1075  return false;
1076 
1077  // Revisit the node.
1078  AddToWorklist(Op.getNode());
1079 
1080  // Replace the old value with the new one.
1081  ++NodesCombined;
1082  LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1083  dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1084  dbgs() << '\n');
1085 
1086  CommitTargetLoweringOpt(TLO);
1087  return true;
1088 }
1089 
1090 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1091  SDLoc DL(Load);
1092  EVT VT = Load->getValueType(0);
1093  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1094 
1095  LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1096  Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1097  WorklistRemover DeadNodes(*this);
1098  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1099  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1100  deleteAndRecombine(Load);
1101  AddToWorklist(Trunc.getNode());
1102 }
1103 
1104 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1105  Replace = false;
1106  SDLoc DL(Op);
1107  if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1108  LoadSDNode *LD = cast<LoadSDNode>(Op);
1109  EVT MemVT = LD->getMemoryVT();
1111  : LD->getExtensionType();
1112  Replace = true;
1113  return DAG.getExtLoad(ExtType, DL, PVT,
1114  LD->getChain(), LD->getBasePtr(),
1115  MemVT, LD->getMemOperand());
1116  }
1117 
1118  unsigned Opc = Op.getOpcode();
1119  switch (Opc) {
1120  default: break;
1121  case ISD::AssertSext:
1122  if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1123  return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1124  break;
1125  case ISD::AssertZext:
1126  if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1127  return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1128  break;
1129  case ISD::Constant: {
1130  unsigned ExtOpc =
1132  return DAG.getNode(ExtOpc, DL, PVT, Op);
1133  }
1134  }
1135 
1136  if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1137  return SDValue();
1138  return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1139 }
1140 
1141 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1143  return SDValue();
1144  EVT OldVT = Op.getValueType();
1145  SDLoc DL(Op);
1146  bool Replace = false;
1147  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1148  if (!NewOp.getNode())
1149  return SDValue();
1150  AddToWorklist(NewOp.getNode());
1151 
1152  if (Replace)
1153  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1154  return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1155  DAG.getValueType(OldVT));
1156 }
1157 
1158 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1159  EVT OldVT = Op.getValueType();
1160  SDLoc DL(Op);
1161  bool Replace = false;
1162  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1163  if (!NewOp.getNode())
1164  return SDValue();
1165  AddToWorklist(NewOp.getNode());
1166 
1167  if (Replace)
1168  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1169  return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1170 }
1171 
1172 /// Promote the specified integer binary operation if the target indicates it is
1173 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1174 /// i32 since i16 instructions are longer.
1175 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1176  if (!LegalOperations)
1177  return SDValue();
1178 
1179  EVT VT = Op.getValueType();
1180  if (VT.isVector() || !VT.isInteger())
1181  return SDValue();
1182 
1183  // If operation type is 'undesirable', e.g. i16 on x86, consider
1184  // promoting it.
1185  unsigned Opc = Op.getOpcode();
1186  if (TLI.isTypeDesirableForOp(Opc, VT))
1187  return SDValue();
1188 
1189  EVT PVT = VT;
1190  // Consult target whether it is a good idea to promote this operation and
1191  // what's the right type to promote it to.
1192  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1193  assert(PVT != VT && "Don't know what type to promote to!");
1194 
1195  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1196 
1197  bool Replace0 = false;
1198  SDValue N0 = Op.getOperand(0);
1199  SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1200 
1201  bool Replace1 = false;
1202  SDValue N1 = Op.getOperand(1);
1203  SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1204  SDLoc DL(Op);
1205 
1206  SDValue RV =
1207  DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1208 
1209  // We are always replacing N0/N1's use in N and only need
1210  // additional replacements if there are additional uses.
1211  Replace0 &= !N0->hasOneUse();
1212  Replace1 &= (N0 != N1) && !N1->hasOneUse();
1213 
1214  // Combine Op here so it is preserved past replacements.
1215  CombineTo(Op.getNode(), RV);
1216 
1217  // If operands have a use ordering, make sure we deal with
1218  // predecessor first.
1219  if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1220  std::swap(N0, N1);
1221  std::swap(NN0, NN1);
1222  }
1223 
1224  if (Replace0) {
1225  AddToWorklist(NN0.getNode());
1226  ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1227  }
1228  if (Replace1) {
1229  AddToWorklist(NN1.getNode());
1230  ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1231  }
1232  return Op;
1233  }
1234  return SDValue();
1235 }
1236 
1237 /// Promote the specified integer shift operation if the target indicates it is
1238 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1239 /// i32 since i16 instructions are longer.
1240 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1241  if (!LegalOperations)
1242  return SDValue();
1243 
1244  EVT VT = Op.getValueType();
1245  if (VT.isVector() || !VT.isInteger())
1246  return SDValue();
1247 
1248  // If operation type is 'undesirable', e.g. i16 on x86, consider
1249  // promoting it.
1250  unsigned Opc = Op.getOpcode();
1251  if (TLI.isTypeDesirableForOp(Opc, VT))
1252  return SDValue();
1253 
1254  EVT PVT = VT;
1255  // Consult target whether it is a good idea to promote this operation and
1256  // what's the right type to promote it to.
1257  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1258  assert(PVT != VT && "Don't know what type to promote to!");
1259 
1260  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1261 
1262  bool Replace = false;
1263  SDValue N0 = Op.getOperand(0);
1264  SDValue N1 = Op.getOperand(1);
1265  if (Opc == ISD::SRA)
1266  N0 = SExtPromoteOperand(N0, PVT);
1267  else if (Opc == ISD::SRL)
1268  N0 = ZExtPromoteOperand(N0, PVT);
1269  else
1270  N0 = PromoteOperand(N0, PVT, Replace);
1271 
1272  if (!N0.getNode())
1273  return SDValue();
1274 
1275  SDLoc DL(Op);
1276  SDValue RV =
1277  DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1278 
1279  AddToWorklist(N0.getNode());
1280  if (Replace)
1281  ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1282 
1283  // Deal with Op being deleted.
1284  if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1285  return RV;
1286  }
1287  return SDValue();
1288 }
1289 
1290 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1291  if (!LegalOperations)
1292  return SDValue();
1293 
1294  EVT VT = Op.getValueType();
1295  if (VT.isVector() || !VT.isInteger())
1296  return SDValue();
1297 
1298  // If operation type is 'undesirable', e.g. i16 on x86, consider
1299  // promoting it.
1300  unsigned Opc = Op.getOpcode();
1301  if (TLI.isTypeDesirableForOp(Opc, VT))
1302  return SDValue();
1303 
1304  EVT PVT = VT;
1305  // Consult target whether it is a good idea to promote this operation and
1306  // what's the right type to promote it to.
1307  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1308  assert(PVT != VT && "Don't know what type to promote to!");
1309  // fold (aext (aext x)) -> (aext x)
1310  // fold (aext (zext x)) -> (zext x)
1311  // fold (aext (sext x)) -> (sext x)
1312  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1313  return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1314  }
1315  return SDValue();
1316 }
1317 
1318 bool DAGCombiner::PromoteLoad(SDValue Op) {
1319  if (!LegalOperations)
1320  return false;
1321 
1322  if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1323  return false;
1324 
1325  EVT VT = Op.getValueType();
1326  if (VT.isVector() || !VT.isInteger())
1327  return false;
1328 
1329  // If operation type is 'undesirable', e.g. i16 on x86, consider
1330  // promoting it.
1331  unsigned Opc = Op.getOpcode();
1332  if (TLI.isTypeDesirableForOp(Opc, VT))
1333  return false;
1334 
1335  EVT PVT = VT;
1336  // Consult target whether it is a good idea to promote this operation and
1337  // what's the right type to promote it to.
1338  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1339  assert(PVT != VT && "Don't know what type to promote to!");
1340 
1341  SDLoc DL(Op);
1342  SDNode *N = Op.getNode();
1343  LoadSDNode *LD = cast<LoadSDNode>(N);
1344  EVT MemVT = LD->getMemoryVT();
1346  : LD->getExtensionType();
1347  SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1348  LD->getChain(), LD->getBasePtr(),
1349  MemVT, LD->getMemOperand());
1350  SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1351 
1352  LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1353  Result.getNode()->dump(&DAG); dbgs() << '\n');
1354  WorklistRemover DeadNodes(*this);
1355  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1356  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1357  deleteAndRecombine(N);
1358  AddToWorklist(Result.getNode());
1359  return true;
1360  }
1361  return false;
1362 }
1363 
1364 /// Recursively delete a node which has no uses and any operands for
1365 /// which it is the only use.
1366 ///
1367 /// Note that this both deletes the nodes and removes them from the worklist.
1368 /// It also adds any nodes who have had a user deleted to the worklist as they
1369 /// may now have only one use and subject to other combines.
1370 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1371  if (!N->use_empty())
1372  return false;
1373 
1375  Nodes.insert(N);
1376  do {
1377  N = Nodes.pop_back_val();
1378  if (!N)
1379  continue;
1380 
1381  if (N->use_empty()) {
1382  for (const SDValue &ChildN : N->op_values())
1383  Nodes.insert(ChildN.getNode());
1384 
1385  removeFromWorklist(N);
1386  DAG.DeleteNode(N);
1387  } else {
1388  AddToWorklist(N);
1389  }
1390  } while (!Nodes.empty());
1391  return true;
1392 }
1393 
1394 //===----------------------------------------------------------------------===//
1395 // Main DAG Combiner implementation
1396 //===----------------------------------------------------------------------===//
1397 
1398 void DAGCombiner::Run(CombineLevel AtLevel) {
1399  // set the instance variables, so that the various visit routines may use it.
1400  Level = AtLevel;
1401  LegalOperations = Level >= AfterLegalizeVectorOps;
1402  LegalTypes = Level >= AfterLegalizeTypes;
1403 
1404  // Add all the dag nodes to the worklist.
1405  for (SDNode &Node : DAG.allnodes())
1406  AddToWorklist(&Node);
1407 
1408  // Create a dummy node (which is not added to allnodes), that adds a reference
1409  // to the root node, preventing it from being deleted, and tracking any
1410  // changes of the root.
1411  HandleSDNode Dummy(DAG.getRoot());
1412 
1413  // While the worklist isn't empty, find a node and try to combine it.
1414  while (!WorklistMap.empty()) {
1415  SDNode *N;
1416  // The Worklist holds the SDNodes in order, but it may contain null entries.
1417  do {
1418  N = Worklist.pop_back_val();
1419  } while (!N);
1420 
1421  bool GoodWorklistEntry = WorklistMap.erase(N);
1422  (void)GoodWorklistEntry;
1423  assert(GoodWorklistEntry &&
1424  "Found a worklist entry without a corresponding map entry!");
1425 
1426  // If N has no uses, it is dead. Make sure to revisit all N's operands once
1427  // N is deleted from the DAG, since they too may now be dead or may have a
1428  // reduced number of uses, allowing other xforms.
1429  if (recursivelyDeleteUnusedNodes(N))
1430  continue;
1431 
1432  WorklistRemover DeadNodes(*this);
1433 
1434  // If this combine is running after legalizing the DAG, re-legalize any
1435  // nodes pulled off the worklist.
1436  if (Level == AfterLegalizeDAG) {
1437  SmallSetVector<SDNode *, 16> UpdatedNodes;
1438  bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1439 
1440  for (SDNode *LN : UpdatedNodes) {
1441  AddToWorklist(LN);
1442  AddUsersToWorklist(LN);
1443  }
1444  if (!NIsValid)
1445  continue;
1446  }
1447 
1448  LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1449 
1450  // Add any operands of the new node which have not yet been combined to the
1451  // worklist as well. Because the worklist uniques things already, this
1452  // won't repeatedly process the same operand.
1453  CombinedNodes.insert(N);
1454  for (const SDValue &ChildN : N->op_values())
1455  if (!CombinedNodes.count(ChildN.getNode()))
1456  AddToWorklist(ChildN.getNode());
1457 
1458  SDValue RV = combine(N);
1459 
1460  if (!RV.getNode())
1461  continue;
1462 
1463  ++NodesCombined;
1464 
1465  // If we get back the same node we passed in, rather than a new node or
1466  // zero, we know that the node must have defined multiple values and
1467  // CombineTo was used. Since CombineTo takes care of the worklist
1468  // mechanics for us, we have no work to do in this case.
1469  if (RV.getNode() == N)
1470  continue;
1471 
1473  RV.getOpcode() != ISD::DELETED_NODE &&
1474  "Node was deleted but visit returned new node!");
1475 
1476  LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1477 
1478  if (N->getNumValues() == RV.getNode()->getNumValues())
1479  DAG.ReplaceAllUsesWith(N, RV.getNode());
1480  else {
1481  assert(N->getValueType(0) == RV.getValueType() &&
1482  N->getNumValues() == 1 && "Type mismatch");
1483  DAG.ReplaceAllUsesWith(N, &RV);
1484  }
1485 
1486  // Push the new node and any users onto the worklist
1487  AddToWorklist(RV.getNode());
1488  AddUsersToWorklist(RV.getNode());
1489 
1490  // Finally, if the node is now dead, remove it from the graph. The node
1491  // may not be dead if the replacement process recursively simplified to
1492  // something else needing this node. This will also take care of adding any
1493  // operands which have lost a user to the worklist.
1494  recursivelyDeleteUnusedNodes(N);
1495  }
1496 
1497  // If the root changed (e.g. it was a dead load, update the root).
1498  DAG.setRoot(Dummy.getValue());
1499  DAG.RemoveDeadNodes();
1500 }
1501 
1502 SDValue DAGCombiner::visit(SDNode *N) {
1503  switch (N->getOpcode()) {
1504  default: break;
1505  case ISD::TokenFactor: return visitTokenFactor(N);
1506  case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1507  case ISD::ADD: return visitADD(N);
1508  case ISD::SUB: return visitSUB(N);
1509  case ISD::ADDC: return visitADDC(N);
1510  case ISD::UADDO: return visitUADDO(N);
1511  case ISD::SUBC: return visitSUBC(N);
1512  case ISD::USUBO: return visitUSUBO(N);
1513  case ISD::ADDE: return visitADDE(N);
1514  case ISD::ADDCARRY: return visitADDCARRY(N);
1515  case ISD::SUBE: return visitSUBE(N);
1516  case ISD::SUBCARRY: return visitSUBCARRY(N);
1517  case ISD::MUL: return visitMUL(N);
1518  case ISD::SDIV: return visitSDIV(N);
1519  case ISD::UDIV: return visitUDIV(N);
1520  case ISD::SREM:
1521  case ISD::UREM: return visitREM(N);
1522  case ISD::MULHU: return visitMULHU(N);
1523  case ISD::MULHS: return visitMULHS(N);
1524  case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1525  case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1526  case ISD::SMULO: return visitSMULO(N);
1527  case ISD::UMULO: return visitUMULO(N);
1528  case ISD::SMIN:
1529  case ISD::SMAX:
1530  case ISD::UMIN:
1531  case ISD::UMAX: return visitIMINMAX(N);
1532  case ISD::AND: return visitAND(N);
1533  case ISD::OR: return visitOR(N);
1534  case ISD::XOR: return visitXOR(N);
1535  case ISD::SHL: return visitSHL(N);
1536  case ISD::SRA: return visitSRA(N);
1537  case ISD::SRL: return visitSRL(N);
1538  case ISD::ROTR:
1539  case ISD::ROTL: return visitRotate(N);
1540  case ISD::ABS: return visitABS(N);
1541  case ISD::BSWAP: return visitBSWAP(N);
1542  case ISD::BITREVERSE: return visitBITREVERSE(N);
1543  case ISD::CTLZ: return visitCTLZ(N);
1544  case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1545  case ISD::CTTZ: return visitCTTZ(N);
1546  case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1547  case ISD::CTPOP: return visitCTPOP(N);
1548  case ISD::SELECT: return visitSELECT(N);
1549  case ISD::VSELECT: return visitVSELECT(N);
1550  case ISD::SELECT_CC: return visitSELECT_CC(N);
1551  case ISD::SETCC: return visitSETCC(N);
1552  case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1553  case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1554  case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1555  case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1556  case ISD::AssertSext:
1557  case ISD::AssertZext: return visitAssertExt(N);
1558  case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1559  case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1560  case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1561  case ISD::TRUNCATE: return visitTRUNCATE(N);
1562  case ISD::BITCAST: return visitBITCAST(N);
1563  case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1564  case ISD::FADD: return visitFADD(N);
1565  case ISD::FSUB: return visitFSUB(N);
1566  case ISD::FMUL: return visitFMUL(N);
1567  case ISD::FMA: return visitFMA(N);
1568  case ISD::FDIV: return visitFDIV(N);
1569  case ISD::FREM: return visitFREM(N);
1570  case ISD::FSQRT: return visitFSQRT(N);
1571  case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1572  case ISD::FPOW: return visitFPOW(N);
1573  case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1574  case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1575  case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1576  case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1577  case ISD::FP_ROUND: return visitFP_ROUND(N);
1578  case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N);
1579  case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1580  case ISD::FNEG: return visitFNEG(N);
1581  case ISD::FABS: return visitFABS(N);
1582  case ISD::FFLOOR: return visitFFLOOR(N);
1583  case ISD::FMINNUM: return visitFMINNUM(N);
1584  case ISD::FMAXNUM: return visitFMAXNUM(N);
1585  case ISD::FCEIL: return visitFCEIL(N);
1586  case ISD::FTRUNC: return visitFTRUNC(N);
1587  case ISD::BRCOND: return visitBRCOND(N);
1588  case ISD::BR_CC: return visitBR_CC(N);
1589  case ISD::LOAD: return visitLOAD(N);
1590  case ISD::STORE: return visitSTORE(N);
1591  case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1592  case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1593  case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1594  case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1595  case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1596  case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1597  case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1598  case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1599  case ISD::MGATHER: return visitMGATHER(N);
1600  case ISD::MLOAD: return visitMLOAD(N);
1601  case ISD::MSCATTER: return visitMSCATTER(N);
1602  case ISD::MSTORE: return visitMSTORE(N);
1603  case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1604  case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1605  }
1606  return SDValue();
1607 }
1608 
1609 SDValue DAGCombiner::combine(SDNode *N) {
1610  SDValue RV = visit(N);
1611 
1612  // If nothing happened, try a target-specific DAG combine.
1613  if (!RV.getNode()) {
1615  "Node was deleted but visit returned NULL!");
1616 
1617  if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1619 
1620  // Expose the DAG combiner to the target combiner impls.
1622  DagCombineInfo(DAG, Level, false, this);
1623 
1624  RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1625  }
1626  }
1627 
1628  // If nothing happened still, try promoting the operation.
1629  if (!RV.getNode()) {
1630  switch (N->getOpcode()) {
1631  default: break;
1632  case ISD::ADD:
1633  case ISD::SUB:
1634  case ISD::MUL:
1635  case ISD::AND:
1636  case ISD::OR:
1637  case ISD::XOR:
1638  RV = PromoteIntBinOp(SDValue(N, 0));
1639  break;
1640  case ISD::SHL:
1641  case ISD::SRA:
1642  case ISD::SRL:
1643  RV = PromoteIntShiftOp(SDValue(N, 0));
1644  break;
1645  case ISD::SIGN_EXTEND:
1646  case ISD::ZERO_EXTEND:
1647  case ISD::ANY_EXTEND:
1648  RV = PromoteExtend(SDValue(N, 0));
1649  break;
1650  case ISD::LOAD:
1651  if (PromoteLoad(SDValue(N, 0)))
1652  RV = SDValue(N, 0);
1653  break;
1654  }
1655  }
1656 
1657  // If N is a commutative binary node, try eliminate it if the commuted
1658  // version is already present in the DAG.
1659  if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1660  N->getNumValues() == 1) {
1661  SDValue N0 = N->getOperand(0);
1662  SDValue N1 = N->getOperand(1);
1663 
1664  // Constant operands are canonicalized to RHS.
1665  if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1666  SDValue Ops[] = {N1, N0};
1667  SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1668  N->getFlags());
1669  if (CSENode)
1670  return SDValue(CSENode, 0);
1671  }
1672  }
1673 
1674  return RV;
1675 }
1676 
1677 /// Given a node, return its input chain if it has one, otherwise return a null
1678 /// sd operand.
1680  if (unsigned NumOps = N->getNumOperands()) {
1681  if (N->getOperand(0).getValueType() == MVT::Other)
1682  return N->getOperand(0);
1683  if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1684  return N->getOperand(NumOps-1);
1685  for (unsigned i = 1; i < NumOps-1; ++i)
1686  if (N->getOperand(i).getValueType() == MVT::Other)
1687  return N->getOperand(i);
1688  }
1689  return SDValue();
1690 }
1691 
1692 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1693  // If N has two operands, where one has an input chain equal to the other,
1694  // the 'other' chain is redundant.
1695  if (N->getNumOperands() == 2) {
1696  if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1697  return N->getOperand(0);
1698  if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1699  return N->getOperand(1);
1700  }
1701 
1702  // Don't simplify token factors if optnone.
1703  if (OptLevel == CodeGenOpt::None)
1704  return SDValue();
1705 
1706  SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
1707  SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
1708  SmallPtrSet<SDNode*, 16> SeenOps;
1709  bool Changed = false; // If we should replace this token factor.
1710 
1711  // Start out with this token factor.
1712  TFs.push_back(N);
1713 
1714  // Iterate through token factors. The TFs grows when new token factors are
1715  // encountered.
1716  for (unsigned i = 0; i < TFs.size(); ++i) {
1717  SDNode *TF = TFs[i];
1718 
1719  // Check each of the operands.
1720  for (const SDValue &Op : TF->op_values()) {
1721  switch (Op.getOpcode()) {
1722  case ISD::EntryToken:
1723  // Entry tokens don't need to be added to the list. They are
1724  // redundant.
1725  Changed = true;
1726  break;
1727 
1728  case ISD::TokenFactor:
1729  if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1730  // Queue up for processing.
1731  TFs.push_back(Op.getNode());
1732  // Clean up in case the token factor is removed.
1733  AddToWorklist(Op.getNode());
1734  Changed = true;
1735  break;
1736  }
1738 
1739  default:
1740  // Only add if it isn't already in the list.
1741  if (SeenOps.insert(Op.getNode()).second)
1742  Ops.push_back(Op);
1743  else
1744  Changed = true;
1745  break;
1746  }
1747  }
1748  }
1749 
1750  // Remove Nodes that are chained to another node in the list. Do so
1751  // by walking up chains breath-first stopping when we've seen
1752  // another operand. In general we must climb to the EntryNode, but we can exit
1753  // early if we find all remaining work is associated with just one operand as
1754  // no further pruning is possible.
1755 
1756  // List of nodes to search through and original Ops from which they originate.
1758  SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1759  SmallPtrSet<SDNode *, 16> SeenChains;
1760  bool DidPruneOps = false;
1761 
1762  unsigned NumLeftToConsider = 0;
1763  for (const SDValue &Op : Ops) {
1764  Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1765  OpWorkCount.push_back(1);
1766  }
1767 
1768  auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1769  // If this is an Op, we can remove the op from the list. Remark any
1770  // search associated with it as from the current OpNumber.
1771  if (SeenOps.count(Op) != 0) {
1772  Changed = true;
1773  DidPruneOps = true;
1774  unsigned OrigOpNumber = 0;
1775  while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1776  OrigOpNumber++;
1777  assert((OrigOpNumber != Ops.size()) &&
1778  "expected to find TokenFactor Operand");
1779  // Re-mark worklist from OrigOpNumber to OpNumber
1780  for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1781  if (Worklist[i].second == OrigOpNumber) {
1782  Worklist[i].second = OpNumber;
1783  }
1784  }
1785  OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1786  OpWorkCount[OrigOpNumber] = 0;
1787  NumLeftToConsider--;
1788  }
1789  // Add if it's a new chain
1790  if (SeenChains.insert(Op).second) {
1791  OpWorkCount[OpNumber]++;
1792  Worklist.push_back(std::make_pair(Op, OpNumber));
1793  }
1794  };
1795 
1796  for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1797  // We need at least be consider at least 2 Ops to prune.
1798  if (NumLeftToConsider <= 1)
1799  break;
1800  auto CurNode = Worklist[i].first;
1801  auto CurOpNumber = Worklist[i].second;
1802  assert((OpWorkCount[CurOpNumber] > 0) &&
1803  "Node should not appear in worklist");
1804  switch (CurNode->getOpcode()) {
1805  case ISD::EntryToken:
1806  // Hitting EntryToken is the only way for the search to terminate without
1807  // hitting
1808  // another operand's search. Prevent us from marking this operand
1809  // considered.
1810  NumLeftToConsider++;
1811  break;
1812  case ISD::TokenFactor:
1813  for (const SDValue &Op : CurNode->op_values())
1814  AddToWorklist(i, Op.getNode(), CurOpNumber);
1815  break;
1816  case ISD::CopyFromReg:
1817  case ISD::CopyToReg:
1818  AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1819  break;
1820  default:
1821  if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1822  AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1823  break;
1824  }
1825  OpWorkCount[CurOpNumber]--;
1826  if (OpWorkCount[CurOpNumber] == 0)
1827  NumLeftToConsider--;
1828  }
1829 
1830  // If we've changed things around then replace token factor.
1831  if (Changed) {
1832  SDValue Result;
1833  if (Ops.empty()) {
1834  // The entry token is the only possible outcome.
1835  Result = DAG.getEntryNode();
1836  } else {
1837  if (DidPruneOps) {
1838  SmallVector<SDValue, 8> PrunedOps;
1839  //
1840  for (const SDValue &Op : Ops) {
1841  if (SeenChains.count(Op.getNode()) == 0)
1842  PrunedOps.push_back(Op);
1843  }
1844  Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps);
1845  } else {
1846  Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
1847  }
1848  }
1849  return Result;
1850  }
1851  return SDValue();
1852 }
1853 
1854 /// MERGE_VALUES can always be eliminated.
1855 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1856  WorklistRemover DeadNodes(*this);
1857  // Replacing results may cause a different MERGE_VALUES to suddenly
1858  // be CSE'd with N, and carry its uses with it. Iterate until no
1859  // uses remain, to ensure that the node can be safely deleted.
1860  // First add the users of this node to the work list so that they
1861  // can be tried again once they have new operands.
1862  AddUsersToWorklist(N);
1863  do {
1864  // Do as a single replacement to avoid rewalking use lists.
1866  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1867  Ops.push_back(N->getOperand(i));
1868  DAG.ReplaceAllUsesWith(N, Ops.data());
1869  } while (!N->use_empty());
1870  deleteAndRecombine(N);
1871  return SDValue(N, 0); // Return N so it doesn't get rechecked!
1872 }
1873 
1874 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1875 /// ConstantSDNode pointer else nullptr.
1878  return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1879 }
1880 
1881 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
1882  assert(ISD::isBinaryOp(BO) && "Unexpected binary operator");
1883 
1884  // Don't do this unless the old select is going away. We want to eliminate the
1885  // binary operator, not replace a binop with a select.
1886  // TODO: Handle ISD::SELECT_CC.
1887  unsigned SelOpNo = 0;
1888  SDValue Sel = BO->getOperand(0);
1889  if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
1890  SelOpNo = 1;
1891  Sel = BO->getOperand(1);
1892  }
1893 
1894  if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1895  return SDValue();
1896 
1897  SDValue CT = Sel.getOperand(1);
1898  if (!isConstantOrConstantVector(CT, true) &&
1900  return SDValue();
1901 
1902  SDValue CF = Sel.getOperand(2);
1903  if (!isConstantOrConstantVector(CF, true) &&
1905  return SDValue();
1906 
1907  // Bail out if any constants are opaque because we can't constant fold those.
1908  // The exception is "and" and "or" with either 0 or -1 in which case we can
1909  // propagate non constant operands into select. I.e.:
1910  // and (select Cond, 0, -1), X --> select Cond, 0, X
1911  // or X, (select Cond, -1, 0) --> select Cond, -1, X
1912  auto BinOpcode = BO->getOpcode();
1913  bool CanFoldNonConst = (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
1918 
1919  SDValue CBO = BO->getOperand(SelOpNo ^ 1);
1920  if (!CanFoldNonConst &&
1921  !isConstantOrConstantVector(CBO, true) &&
1923  return SDValue();
1924 
1925  EVT VT = Sel.getValueType();
1926 
1927  // In case of shift value and shift amount may have different VT. For instance
1928  // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
1929  // swapped operands and value types do not match. NB: x86 is fine if operands
1930  // are not swapped with shift amount VT being not bigger than shifted value.
1931  // TODO: that is possible to check for a shift operation, correct VTs and
1932  // still perform optimization on x86 if needed.
1933  if (SelOpNo && VT != CBO.getValueType())
1934  return SDValue();
1935 
1936  // We have a select-of-constants followed by a binary operator with a
1937  // constant. Eliminate the binop by pulling the constant math into the select.
1938  // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
1939  SDLoc DL(Sel);
1940  SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
1941  : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
1942  if (!CanFoldNonConst && !NewCT.isUndef() &&
1943  !isConstantOrConstantVector(NewCT, true) &&
1945  return SDValue();
1946 
1947  SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
1948  : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
1949  if (!CanFoldNonConst && !NewCF.isUndef() &&
1950  !isConstantOrConstantVector(NewCF, true) &&
1952  return SDValue();
1953 
1954  return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
1955 }
1956 
1958  assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
1959  "Expecting add or sub");
1960 
1961  // Match a constant operand and a zext operand for the math instruction:
1962  // add Z, C
1963  // sub C, Z
1964  bool IsAdd = N->getOpcode() == ISD::ADD;
1965  SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
1966  SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
1967  auto *CN = dyn_cast<ConstantSDNode>(C);
1968  if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
1969  return SDValue();
1970 
1971  // Match the zext operand as a setcc of a boolean.
1972  if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
1973  Z.getOperand(0).getValueType() != MVT::i1)
1974  return SDValue();
1975 
1976  // Match the compare as: setcc (X & 1), 0, eq.
1977  SDValue SetCC = Z.getOperand(0);
1978  ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
1979  if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
1980  SetCC.getOperand(0).getOpcode() != ISD::AND ||
1981  !isOneConstant(SetCC.getOperand(0).getOperand(1)))
1982  return SDValue();
1983 
1984  // We are adding/subtracting a constant and an inverted low bit. Turn that
1985  // into a subtract/add of the low bit with incremented/decremented constant:
1986  // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
1987  // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
1988  EVT VT = C.getValueType();
1989  SDLoc DL(N);
1990  SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
1991  SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
1992  DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
1993  return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
1994 }
1995 
1996 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
1997 /// a shift and add with a different constant.
1999  assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2000  "Expecting add or sub");
2001 
2002  // We need a constant operand for the add/sub, and the other operand is a
2003  // logical shift right: add (srl), C or sub C, (srl).
2004  bool IsAdd = N->getOpcode() == ISD::ADD;
2005  SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2006  SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2007  ConstantSDNode *C = isConstOrConstSplat(ConstantOp);
2008  if (!C || ShiftOp.getOpcode() != ISD::SRL)
2009  return SDValue();
2010 
2011  // The shift must be of a 'not' value.
2012  SDValue Not = ShiftOp.getOperand(0);
2013  if (!Not.hasOneUse() || !isBitwiseNot(Not))
2014  return SDValue();
2015 
2016  // The shift must be moving the sign bit to the least-significant-bit.
2017  EVT VT = ShiftOp.getValueType();
2018  SDValue ShAmt = ShiftOp.getOperand(1);
2019  ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2020  if (!ShAmtC || ShAmtC->getZExtValue() != VT.getScalarSizeInBits() - 1)
2021  return SDValue();
2022 
2023  // Eliminate the 'not' by adjusting the shift and add/sub constant:
2024  // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2025  // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2026  SDLoc DL(N);
2027  auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2028  SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2029  APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1;
2030  return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
2031 }
2032 
2033 SDValue DAGCombiner::visitADD(SDNode *N) {
2034  SDValue N0 = N->getOperand(0);
2035  SDValue N1 = N->getOperand(1);
2036  EVT VT = N0.getValueType();
2037  SDLoc DL(N);
2038 
2039  // fold vector ops
2040  if (VT.isVector()) {
2041  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2042  return FoldedVOp;
2043 
2044  // fold (add x, 0) -> x, vector edition
2046  return N0;
2048  return N1;
2049  }
2050 
2051  // fold (add x, undef) -> undef
2052  if (N0.isUndef())
2053  return N0;
2054 
2055  if (N1.isUndef())
2056  return N1;
2057 
2059  // canonicalize constant to RHS
2061  return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2062  // fold (add c1, c2) -> c1+c2
2063  return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
2064  N1.getNode());
2065  }
2066 
2067  // fold (add x, 0) -> x
2068  if (isNullConstant(N1))
2069  return N0;
2070 
2071  if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2072  // fold ((c1-A)+c2) -> (c1+c2)-A
2073  if (N0.getOpcode() == ISD::SUB &&
2074  isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2075  // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
2076  return DAG.getNode(ISD::SUB, DL, VT,
2077  DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2078  N0.getOperand(1));
2079  }
2080 
2081  // add (sext i1 X), 1 -> zext (not i1 X)
2082  // We don't transform this pattern:
2083  // add (zext i1 X), -1 -> sext (not i1 X)
2084  // because most (?) targets generate better code for the zext form.
2085  if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2087  SDValue X = N0.getOperand(0);
2088  if ((!LegalOperations ||
2089  (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2090  TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2091  X.getScalarValueSizeInBits() == 1) {
2092  SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2093  return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2094  }
2095  }
2096 
2097  // Undo the add -> or combine to merge constant offsets from a frame index.
2098  if (N0.getOpcode() == ISD::OR &&
2099  isa<FrameIndexSDNode>(N0.getOperand(0)) &&
2100  isa<ConstantSDNode>(N0.getOperand(1)) &&
2101  DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2102  SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
2103  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2104  }
2105  }
2106 
2107  if (SDValue NewSel = foldBinOpIntoSelect(N))
2108  return NewSel;
2109 
2110  // reassociate add
2111  if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2112  return RADD;
2113 
2114  // fold ((0-A) + B) -> B-A
2115  if (N0.getOpcode() == ISD::SUB &&
2117  return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2118 
2119  // fold (A + (0-B)) -> A-B
2120  if (N1.getOpcode() == ISD::SUB &&
2122  return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2123 
2124  // fold (A+(B-A)) -> B
2125  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2126  return N1.getOperand(0);
2127 
2128  // fold ((B-A)+A) -> B
2129  if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2130  return N0.getOperand(0);
2131 
2132  // fold (A+(B-(A+C))) to (B-C)
2133  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2134  N0 == N1.getOperand(1).getOperand(0))
2135  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2136  N1.getOperand(1).getOperand(1));
2137 
2138  // fold (A+(B-(C+A))) to (B-C)
2139  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2140  N0 == N1.getOperand(1).getOperand(1))
2141  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2142  N1.getOperand(1).getOperand(0));
2143 
2144  // fold (A+((B-A)+or-C)) to (B+or-C)
2145  if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2146  N1.getOperand(0).getOpcode() == ISD::SUB &&
2147  N0 == N1.getOperand(0).getOperand(1))
2148  return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2149  N1.getOperand(1));
2150 
2151  // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2152  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2153  SDValue N00 = N0.getOperand(0);
2154  SDValue N01 = N0.getOperand(1);
2155  SDValue N10 = N1.getOperand(0);
2156  SDValue N11 = N1.getOperand(1);
2157 
2159  return DAG.getNode(ISD::SUB, DL, VT,
2160  DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2161  DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2162  }
2163 
2164  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2165  return V;
2166 
2167  if (SDValue V = foldAddSubOfSignBit(N, DAG))
2168  return V;
2169 
2170  if (SimplifyDemandedBits(SDValue(N, 0)))
2171  return SDValue(N, 0);
2172 
2173  // fold (a+b) -> (a|b) iff a and b share no bits.
2174  if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2175  DAG.haveNoCommonBitsSet(N0, N1))
2176  return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2177 
2178  // fold (add (xor a, -1), 1) -> (sub 0, a)
2180  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2181  N0.getOperand(0));
2182 
2183  if (SDValue Combined = visitADDLike(N0, N1, N))
2184  return Combined;
2185 
2186  if (SDValue Combined = visitADDLike(N1, N0, N))
2187  return Combined;
2188 
2189  return SDValue();
2190 }
2191 
2192 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2193  bool Masked = false;
2194 
2195  // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2196  while (true) {
2197  if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2198  V = V.getOperand(0);
2199  continue;
2200  }
2201 
2202  if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2203  Masked = true;
2204  V = V.getOperand(0);
2205  continue;
2206  }
2207 
2208  break;
2209  }
2210 
2211  // If this is not a carry, return.
2212  if (V.getResNo() != 1)
2213  return SDValue();
2214 
2215  if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2216  V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2217  return SDValue();
2218 
2219  // If the result is masked, then no matter what kind of bool it is we can
2220  // return. If it isn't, then we need to make sure the bool type is either 0 or
2221  // 1 and not other values.
2222  if (Masked ||
2223  TLI.getBooleanContents(V.getValueType()) ==
2225  return V;
2226 
2227  return SDValue();
2228 }
2229 
2230 SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
2231  EVT VT = N0.getValueType();
2232  SDLoc DL(LocReference);
2233 
2234  // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2235  if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2237  return DAG.getNode(ISD::SUB, DL, VT, N0,
2238  DAG.getNode(ISD::SHL, DL, VT,
2239  N1.getOperand(0).getOperand(1),
2240  N1.getOperand(1)));
2241 
2242  if (N1.getOpcode() == ISD::AND) {
2243  SDValue AndOp0 = N1.getOperand(0);
2244  unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
2245  unsigned DestBits = VT.getScalarSizeInBits();
2246 
2247  // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
2248  // and similar xforms where the inner op is either ~0 or 0.
2249  if (NumSignBits == DestBits &&
2251  return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0);
2252  }
2253 
2254  // add (sext i1), X -> sub X, (zext i1)
2255  if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2256  N0.getOperand(0).getValueType() == MVT::i1 &&
2258  SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2259  return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2260  }
2261 
2262  // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2263  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2264  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2265  if (TN->getVT() == MVT::i1) {
2266  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2267  DAG.getConstant(1, DL, VT));
2268  return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2269  }
2270  }
2271 
2272  // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2273  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2274  N1.getResNo() == 0)
2275  return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2276  N0, N1.getOperand(0), N1.getOperand(2));
2277 
2278  // (add X, Carry) -> (addcarry X, 0, Carry)
2280  if (SDValue Carry = getAsCarry(TLI, N1))
2281  return DAG.getNode(ISD::ADDCARRY, DL,
2282  DAG.getVTList(VT, Carry.getValueType()), N0,
2283  DAG.getConstant(0, DL, VT), Carry);
2284 
2285  return SDValue();
2286 }
2287 
2288 SDValue DAGCombiner::visitADDC(SDNode *N) {
2289  SDValue N0 = N->getOperand(0);
2290  SDValue N1 = N->getOperand(1);
2291  EVT VT = N0.getValueType();
2292  SDLoc DL(N);
2293 
2294  // If the flag result is dead, turn this into an ADD.
2295  if (!N->hasAnyUseOfValue(1))
2296  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2297  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2298 
2299  // canonicalize constant to RHS.
2302  if (N0C && !N1C)
2303  return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2304 
2305  // fold (addc x, 0) -> x + no carry out
2306  if (isNullConstant(N1))
2307  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2308  DL, MVT::Glue));
2309 
2310  // If it cannot overflow, transform into an add.
2311  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2312  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2313  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2314 
2315  return SDValue();
2316 }
2317 
2318 static SDValue flipBoolean(SDValue V, const SDLoc &DL, EVT VT,
2319  SelectionDAG &DAG, const TargetLowering &TLI) {
2320  SDValue Cst;
2321  switch (TLI.getBooleanContents(VT)) {
2324  Cst = DAG.getConstant(1, DL, VT);
2325  break;
2327  Cst = DAG.getConstant(-1, DL, VT);
2328  break;
2329  }
2330 
2331  return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
2332 }
2333 
2334 static bool isBooleanFlip(SDValue V, EVT VT, const TargetLowering &TLI) {
2335  if (V.getOpcode() != ISD::XOR) return false;
2337  if (!Const) return false;
2338 
2339  switch(TLI.getBooleanContents(VT)) {
2341  return Const->isOne();
2343  return Const->isAllOnesValue();
2345  return (Const->getAPIntValue() & 0x01) == 1;
2346  }
2347  llvm_unreachable("Unsupported boolean content");
2348 }
2349 
2350 SDValue DAGCombiner::visitUADDO(SDNode *N) {
2351  SDValue N0 = N->getOperand(0);
2352  SDValue N1 = N->getOperand(1);
2353  EVT VT = N0.getValueType();
2354  if (VT.isVector())
2355  return SDValue();
2356 
2357  EVT CarryVT = N->getValueType(1);
2358  SDLoc DL(N);
2359 
2360  // If the flag result is dead, turn this into an ADD.
2361  if (!N->hasAnyUseOfValue(1))
2362  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2363  DAG.getUNDEF(CarryVT));
2364 
2365  // canonicalize constant to RHS.
2368  if (N0C && !N1C)
2369  return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0);
2370 
2371  // fold (uaddo x, 0) -> x + no carry out
2372  if (isNullConstant(N1))
2373  return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2374 
2375  // If it cannot overflow, transform into an add.
2376  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2377  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2378  DAG.getConstant(0, DL, CarryVT));
2379 
2380  // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2382  SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2383  DAG.getConstant(0, DL, VT),
2384  N0.getOperand(0));
2385  return CombineTo(N, Sub,
2386  flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
2387  }
2388 
2389  if (SDValue Combined = visitUADDOLike(N0, N1, N))
2390  return Combined;
2391 
2392  if (SDValue Combined = visitUADDOLike(N1, N0, N))
2393  return Combined;
2394 
2395  return SDValue();
2396 }
2397 
2398 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2399  auto VT = N0.getValueType();
2400 
2401  // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2402  // If Y + 1 cannot overflow.
2403  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2404  SDValue Y = N1.getOperand(0);
2405  SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2406  if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2407  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2408  N1.getOperand(2));
2409  }
2410 
2411  // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2413  if (SDValue Carry = getAsCarry(TLI, N1))
2414  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2415  DAG.getConstant(0, SDLoc(N), VT), Carry);
2416 
2417  return SDValue();
2418 }
2419 
2420 SDValue DAGCombiner::visitADDE(SDNode *N) {
2421  SDValue N0 = N->getOperand(0);
2422  SDValue N1 = N->getOperand(1);
2423  SDValue CarryIn = N->getOperand(2);
2424 
2425  // canonicalize constant to RHS
2428  if (N0C && !N1C)
2429  return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2430  N1, N0, CarryIn);
2431 
2432  // fold (adde x, y, false) -> (addc x, y)
2433  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2434  return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2435 
2436  return SDValue();
2437 }
2438 
2439 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2440  SDValue N0 = N->getOperand(0);
2441  SDValue N1 = N->getOperand(1);
2442  SDValue CarryIn = N->getOperand(2);
2443  SDLoc DL(N);
2444 
2445  // canonicalize constant to RHS
2448  if (N0C && !N1C)
2449  return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2450 
2451  // fold (addcarry x, y, false) -> (uaddo x, y)
2452  if (isNullConstant(CarryIn)) {
2453  if (!LegalOperations ||
2455  return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2456  }
2457 
2458  EVT CarryVT = CarryIn.getValueType();
2459 
2460  // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2461  if (isNullConstant(N0) && isNullConstant(N1)) {
2462  EVT VT = N0.getValueType();
2463  SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2464  AddToWorklist(CarryExt.getNode());
2465  return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2466  DAG.getConstant(1, DL, VT)),
2467  DAG.getConstant(0, DL, CarryVT));
2468  }
2469 
2470  // fold (addcarry (xor a, -1), 0, !b) -> (subcarry 0, a, b) and flip carry.
2471  if (isBitwiseNot(N0) && isNullConstant(N1) &&
2472  isBooleanFlip(CarryIn, CarryVT, TLI)) {
2473  SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(),
2474  DAG.getConstant(0, DL, N0.getValueType()),
2475  N0.getOperand(0), CarryIn.getOperand(0));
2476  return CombineTo(N, Sub,
2477  flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
2478  }
2479 
2480  if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2481  return Combined;
2482 
2483  if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2484  return Combined;
2485 
2486  return SDValue();
2487 }
2488 
2489 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2490  SDNode *N) {
2491  // Iff the flag result is dead:
2492  // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2493  if ((N0.getOpcode() == ISD::ADD ||
2494  (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) &&
2495  isNullConstant(N1) && !N->hasAnyUseOfValue(1))
2496  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2497  N0.getOperand(0), N0.getOperand(1), CarryIn);
2498 
2499  /**
2500  * When one of the addcarry argument is itself a carry, we may be facing
2501  * a diamond carry propagation. In which case we try to transform the DAG
2502  * to ensure linear carry propagation if that is possible.
2503  *
2504  * We are trying to get:
2505  * (addcarry X, 0, (addcarry A, B, Z):Carry)
2506  */
2507  if (auto Y = getAsCarry(TLI, N1)) {
2508  /**
2509  * (uaddo A, B)
2510  * / \
2511  * Carry Sum
2512  * | \
2513  * | (addcarry *, 0, Z)
2514  * | /
2515  * \ Carry
2516  * | /
2517  * (addcarry X, *, *)
2518  */
2519  if (Y.getOpcode() == ISD::UADDO &&
2520  CarryIn.getResNo() == 1 &&
2521  CarryIn.getOpcode() == ISD::ADDCARRY &&
2522  isNullConstant(CarryIn.getOperand(1)) &&
2523  CarryIn.getOperand(0) == Y.getValue(0)) {
2524  auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
2525  Y.getOperand(0), Y.getOperand(1),
2526  CarryIn.getOperand(2));
2527  AddToWorklist(NewY.getNode());
2528  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2529  DAG.getConstant(0, SDLoc(N), N0.getValueType()),
2530  NewY.getValue(1));
2531  }
2532  }
2533 
2534  return SDValue();
2535 }
2536 
2537 // Since it may not be valid to emit a fold to zero for vector initializers
2538 // check if we can before folding.
2539 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
2540  SelectionDAG &DAG, bool LegalOperations,
2541  bool LegalTypes) {
2542  if (!VT.isVector())
2543  return DAG.getConstant(0, DL, VT);
2544  if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
2545  return DAG.getConstant(0, DL, VT);
2546  return SDValue();
2547 }
2548 
2549 SDValue DAGCombiner::visitSUB(SDNode *N) {
2550  SDValue N0 = N->getOperand(0);
2551  SDValue N1 = N->getOperand(1);
2552  EVT VT = N0.getValueType();
2553  SDLoc DL(N);
2554 
2555  // fold vector ops
2556  if (VT.isVector()) {
2557  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2558  return FoldedVOp;
2559 
2560  // fold (sub x, 0) -> x, vector edition
2562  return N0;
2563  }
2564 
2565  // fold (sub x, x) -> 0
2566  // FIXME: Refactor this and xor and other similar operations together.
2567  if (N0 == N1)
2568  return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations, LegalTypes);
2571  // fold (sub c1, c2) -> c1-c2
2572  return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
2573  N1.getNode());
2574  }
2575 
2576  if (SDValue NewSel = foldBinOpIntoSelect(N))
2577  return NewSel;
2578 
2580 
2581  // fold (sub x, c) -> (add x, -c)
2582  if (N1C) {
2583  return DAG.getNode(ISD::ADD, DL, VT, N0,
2584  DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
2585  }
2586 
2588  unsigned BitWidth = VT.getScalarSizeInBits();
2589  // Right-shifting everything out but the sign bit followed by negation is
2590  // the same as flipping arithmetic/logical shift type without the negation:
2591  // -(X >>u 31) -> (X >>s 31)
2592  // -(X >>s 31) -> (X >>u 31)
2593  if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
2594  ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
2595  if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
2596  auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
2597  if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
2598  return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
2599  }
2600  }
2601 
2602  // 0 - X --> 0 if the sub is NUW.
2603  if (N->getFlags().hasNoUnsignedWrap())
2604  return N0;
2605 
2606  if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
2607  // N1 is either 0 or the minimum signed value. If the sub is NSW, then
2608  // N1 must be 0 because negating the minimum signed value is undefined.
2609  if (N->getFlags().hasNoSignedWrap())
2610  return N0;
2611 
2612  // 0 - X --> X if X is 0 or the minimum signed value.
2613  return N1;
2614  }
2615  }
2616 
2617  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
2619  return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
2620 
2621  // fold (A - (0-B)) -> A+B
2622  if (N1.getOpcode() == ISD::SUB &&
2624  return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
2625 
2626  // fold A-(A-B) -> B
2627  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
2628  return N1.getOperand(1);
2629 
2630  // fold (A+B)-A -> B
2631  if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
2632  return N0.getOperand(1);
2633 
2634  // fold (A+B)-B -> A
2635  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
2636  return N0.getOperand(0);
2637 
2638  // fold C2-(A+C1) -> (C2-C1)-A
2639  if (N1.getOpcode() == ISD::ADD) {
2640  SDValue N11 = N1.getOperand(1);
2641  if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
2642  isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
2643  SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
2644  return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
2645  }
2646  }
2647 
2648  // fold ((A+(B+or-C))-B) -> A+or-C
2649  if (N0.getOpcode() == ISD::ADD &&
2650  (N0.getOperand(1).getOpcode() == ISD::SUB ||
2651  N0.getOperand(1).getOpcode() == ISD::ADD) &&
2652  N0.getOperand(1).getOperand(0) == N1)
2653  return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
2654  N0.getOperand(1).getOperand(1));
2655 
2656  // fold ((A+(C+B))-B) -> A+C
2657  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
2658  N0.getOperand(1).getOperand(1) == N1)
2659  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
2660  N0.getOperand(1).getOperand(0));
2661 
2662  // fold ((A-(B-C))-C) -> A-B
2663  if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
2664  N0.getOperand(1).getOperand(1) == N1)
2665  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2666  N0.getOperand(1).getOperand(0));
2667 
2668  // fold (A-(B-C)) -> A+(C-B)
2669  if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
2670  return DAG.getNode(ISD::ADD, DL, VT, N0,
2671  DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
2672  N1.getOperand(0)));
2673 
2674  // fold (X - (-Y * Z)) -> (X + (Y * Z))
2675  if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
2676  if (N1.getOperand(0).getOpcode() == ISD::SUB &&
2678  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
2679  N1.getOperand(0).getOperand(1),
2680  N1.getOperand(1));
2681  return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
2682  }
2683  if (N1.getOperand(1).getOpcode() == ISD::SUB &&
2685  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
2686  N1.getOperand(0),
2687  N1.getOperand(1).getOperand(1));
2688  return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
2689  }
2690  }
2691 
2692  // If either operand of a sub is undef, the result is undef
2693  if (N0.isUndef())
2694  return N0;
2695  if (N1.isUndef())
2696  return N1;
2697 
2698  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2699  return V;
2700 
2701  if (SDValue V = foldAddSubOfSignBit(N, DAG))
2702  return V;
2703 
2704  // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
2705  if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
2706  if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
2707  SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
2708  SDValue S0 = N1.getOperand(0);
2709  if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
2710  unsigned OpSizeInBits = VT.getScalarSizeInBits();
2712  if (C->getAPIntValue() == (OpSizeInBits - 1))
2713  return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
2714  }
2715  }
2716  }
2717 
2718  // If the relocation model supports it, consider symbol offsets.
2719  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
2720  if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2721  // fold (sub Sym, c) -> Sym-c
2722  if (N1C && GA->getOpcode() == ISD::GlobalAddress)
2723  return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
2724  GA->getOffset() -
2725  (uint64_t)N1C->getSExtValue());
2726  // fold (sub Sym+c1, Sym+c2) -> c1-c2
2727  if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
2728  if (GA->getGlobal() == GB->getGlobal())
2729  return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
2730  DL, VT);
2731  }
2732 
2733  // sub X, (sextinreg Y i1) -> add X, (and Y 1)
2734  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2735  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2736  if (TN->getVT() == MVT::i1) {
2737  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2738  DAG.getConstant(1, DL, VT));
2739  return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
2740  }
2741  }
2742 
2743  // Prefer an add for more folding potential and possibly better codegen:
2744  // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
2745  if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
2746  SDValue ShAmt = N1.getOperand(1);
2747  ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2748  if (ShAmtC && ShAmtC->getZExtValue() == N1.getScalarValueSizeInBits() - 1) {
2749  SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
2750  return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
2751  }
2752  }
2753 
2754  return SDValue();
2755 }
2756 
2757 SDValue DAGCombiner::visitSUBC(SDNode *N) {
2758  SDValue N0 = N->getOperand(0);
2759  SDValue N1 = N->getOperand(1);
2760  EVT VT = N0.getValueType();
2761  SDLoc DL(N);
2762 
2763  // If the flag result is dead, turn this into an SUB.
2764  if (!N->hasAnyUseOfValue(1))
2765  return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2766  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2767 
2768  // fold (subc x, x) -> 0 + no borrow
2769  if (N0 == N1)
2770  return CombineTo(N, DAG.getConstant(0, DL, VT),
2771  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2772 
2773  // fold (subc x, 0) -> x + no borrow
2774  if (isNullConstant(N1))
2775  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2776 
2777  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2778  if (isAllOnesConstant(N0))
2779  return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2780  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2781 
2782  return SDValue();
2783 }
2784 
2785 SDValue DAGCombiner::visitUSUBO(SDNode *N) {
2786  SDValue N0 = N->getOperand(0);
2787  SDValue N1 = N->getOperand(1);
2788  EVT VT = N0.getValueType();
2789  if (VT.isVector())
2790  return SDValue();
2791 
2792  EVT CarryVT = N->getValueType(1);
2793  SDLoc DL(N);
2794 
2795  // If the flag result is dead, turn this into an SUB.
2796  if (!N->hasAnyUseOfValue(1))
2797  return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2798  DAG.getUNDEF(CarryVT));
2799 
2800  // fold (usubo x, x) -> 0 + no borrow
2801  if (N0 == N1)
2802  return CombineTo(N, DAG.getConstant(0, DL, VT),
2803  DAG.getConstant(0, DL, CarryVT));
2804 
2805  // fold (usubo x, 0) -> x + no borrow
2806  if (isNullConstant(N1))
2807  return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2808 
2809  // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2810  if (isAllOnesConstant(N0))
2811  return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2812  DAG.getConstant(0, DL, CarryVT));
2813 
2814  return SDValue();
2815 }
2816 
2817 SDValue DAGCombiner::visitSUBE(SDNode *N) {
2818  SDValue N0 = N->getOperand(0);
2819  SDValue N1 = N->getOperand(1);
2820  SDValue CarryIn = N->getOperand(2);
2821 
2822  // fold (sube x, y, false) -> (subc x, y)
2823  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2824  return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
2825 
2826  return SDValue();
2827 }
2828 
2829 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
2830  SDValue N0 = N->getOperand(0);
2831  SDValue N1 = N->getOperand(1);
2832  SDValue CarryIn = N->getOperand(2);
2833 
2834  // fold (subcarry x, y, false) -> (usubo x, y)
2835  if (isNullConstant(CarryIn)) {
2836  if (!LegalOperations ||
2838  return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
2839  }
2840 
2841  return SDValue();
2842 }
2843 
2844 SDValue DAGCombiner::visitMUL(SDNode *N) {
2845  SDValue N0 = N->getOperand(0);
2846  SDValue N1 = N->getOperand(1);
2847  EVT VT = N0.getValueType();
2848 
2849  // fold (mul x, undef) -> 0
2850  if (N0.isUndef() || N1.isUndef())
2851  return DAG.getConstant(0, SDLoc(N), VT);
2852 
2853  bool N0IsConst = false;
2854  bool N1IsConst = false;
2855  bool N1IsOpaqueConst = false;
2856  bool N0IsOpaqueConst = false;
2857  APInt ConstValue0, ConstValue1;
2858  // fold vector ops
2859  if (VT.isVector()) {
2860  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2861  return FoldedVOp;
2862 
2863  N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
2864  N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
2865  assert((!N0IsConst ||
2866  ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
2867  "Splat APInt should be element width");
2868  assert((!N1IsConst ||
2869  ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
2870  "Splat APInt should be element width");
2871  } else {
2872  N0IsConst = isa<ConstantSDNode>(N0);
2873  if (N0IsConst) {
2874  ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
2875  N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
2876  }
2877  N1IsConst = isa<ConstantSDNode>(N1);
2878  if (N1IsConst) {
2879  ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
2880  N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
2881  }
2882  }
2883 
2884  // fold (mul c1, c2) -> c1*c2
2885  if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
2886  return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
2887  N0.getNode(), N1.getNode());
2888 
2889  // canonicalize constant to RHS (vector doesn't have to splat)
2892  return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
2893  // fold (mul x, 0) -> 0
2894  if (N1IsConst && ConstValue1.isNullValue())
2895  return N1;
2896  // fold (mul x, 1) -> x
2897  if (N1IsConst && ConstValue1.isOneValue())
2898  return N0;
2899 
2900  if (SDValue NewSel = foldBinOpIntoSelect(N))
2901  return NewSel;
2902 
2903  // fold (mul x, -1) -> 0-x
2904  if (N1IsConst && ConstValue1.isAllOnesValue()) {
2905  SDLoc DL(N);
2906  return DAG.getNode(ISD::SUB, DL, VT,
2907  DAG.getConstant(0, DL, VT), N0);
2908  }
2909  // fold (mul x, (1 << c)) -> x << c
2910  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
2911  DAG.isKnownToBeAPowerOfTwo(N1) &&
2912  (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
2913  SDLoc DL(N);
2914  SDValue LogBase2 = BuildLogBase2(N1, DL);
2915  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
2916  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
2917  return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
2918  }
2919  // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
2920  if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
2921  unsigned Log2Val = (-ConstValue1).logBase2();
2922  SDLoc DL(N);
2923  // FIXME: If the input is something that is easily negated (e.g. a
2924  // single-use add), we should put the negate there.
2925  return DAG.getNode(ISD::SUB, DL, VT,
2926  DAG.getConstant(0, DL, VT),
2927  DAG.getNode(ISD::SHL, DL, VT, N0,
2928  DAG.getConstant(Log2Val, DL,
2929  getShiftAmountTy(N0.getValueType()))));
2930  }
2931 
2932  // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub.
2933  // mul x, (2^N + 1) --> add (shl x, N), x
2934  // mul x, (2^N - 1) --> sub (shl x, N), x
2935  // Examples: x * 33 --> (x << 5) + x
2936  // x * 15 --> (x << 4) - x
2937  // x * -33 --> -((x << 5) + x)
2938  // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
2939  if (N1IsConst && TLI.decomposeMulByConstant(VT, N1)) {
2940  // TODO: We could handle more general decomposition of any constant by
2941  // having the target set a limit on number of ops and making a
2942  // callback to determine that sequence (similar to sqrt expansion).
2943  unsigned MathOp = ISD::DELETED_NODE;
2944  APInt MulC = ConstValue1.abs();
2945  if ((MulC - 1).isPowerOf2())
2946  MathOp = ISD::ADD;
2947  else if ((MulC + 1).isPowerOf2())
2948  MathOp = ISD::SUB;
2949 
2950  if (MathOp != ISD::DELETED_NODE) {
2951  unsigned ShAmt = MathOp == ISD::ADD ? (MulC - 1).logBase2()
2952  : (MulC + 1).logBase2();
2953  assert(ShAmt > 0 && ShAmt < VT.getScalarSizeInBits() &&
2954  "Not expecting multiply-by-constant that could have simplified");
2955  SDLoc DL(N);
2956  SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, N0,
2957  DAG.getConstant(ShAmt, DL, VT));
2958  SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
2959  if (ConstValue1.isNegative())
2960  R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
2961  return R;
2962  }
2963  }
2964 
2965  // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
2966  if (N0.getOpcode() == ISD::SHL &&
2967  isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
2968  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
2969  SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
2971  return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
2972  }
2973 
2974  // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
2975  // use.
2976  {
2977  SDValue Sh(nullptr, 0), Y(nullptr, 0);
2978 
2979  // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
2980  if (N0.getOpcode() == ISD::SHL &&
2982  N0.getNode()->hasOneUse()) {
2983  Sh = N0; Y = N1;
2984  } else if (N1.getOpcode() == ISD::SHL &&
2986  N1.getNode()->hasOneUse()) {
2987  Sh = N1; Y = N0;
2988  }
2989 
2990  if (Sh.getNode()) {
2991  SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
2992  return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
2993  }
2994  }
2995 
2996  // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
2998  N0.getOpcode() == ISD::ADD &&
3000  isMulAddWithConstProfitable(N, N0, N1))
3001  return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3002  DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3003  N0.getOperand(0), N1),
3004  DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3005  N0.getOperand(1), N1));
3006 
3007  // reassociate mul
3008  if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3009  return RMUL;
3010 
3011  return SDValue();
3012 }
3013 
3014 /// Return true if divmod libcall is available.
3015 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
3016  const TargetLowering &TLI) {
3017  RTLIB::Libcall LC;
3018  EVT NodeType = Node->getValueType(0);
3019  if (!NodeType.isSimple())
3020  return false;
3021  switch (NodeType.getSimpleVT().SimpleTy) {
3022  default: return false; // No libcall for vector types.
3023  case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
3024  case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
3025  case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
3026  case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
3027  case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
3028  }
3029 
3030  return TLI.getLibcallName(LC) != nullptr;
3031 }
3032 
3033 /// Issue divrem if both quotient and remainder are needed.
3034 SDValue DAGCombiner::useDivRem(SDNode *Node) {
3035  if (Node->use_empty())
3036  return SDValue(); // This is a dead node, leave it alone.
3037 
3038  unsigned Opcode = Node->getOpcode();
3039  bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
3040  unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
3041 
3042  // DivMod lib calls can still work on non-legal types if using lib-calls.
3043  EVT VT = Node->getValueType(0);
3044  if (VT.isVector() || !VT.isInteger())
3045  return SDValue();
3046 
3047  if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
3048  return SDValue();
3049 
3050  // If DIVREM is going to get expanded into a libcall,
3051  // but there is no libcall available, then don't combine.
3052  if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
3053  !isDivRemLibcallAvailable(Node, isSigned, TLI))
3054  return SDValue();
3055 
3056  // If div is legal, it's better to do the normal expansion
3057  unsigned OtherOpcode = 0;
3058  if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
3059  OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
3060  if (TLI.isOperationLegalOrCustom(Opcode, VT))
3061  return SDValue();
3062  } else {
3063  OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3064  if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
3065  return SDValue();
3066  }
3067 
3068  SDValue Op0 = Node->getOperand(0);
3069  SDValue Op1 = Node->getOperand(1);
3070  SDValue combined;
3071  for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
3072  UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
3073  SDNode *User = *UI;
3074  if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
3075  User->use_empty())
3076  continue;
3077  // Convert the other matching node(s), too;
3078  // otherwise, the DIVREM may get target-legalized into something
3079  // target-specific that we won't be able to recognize.
3080  unsigned UserOpc = User->getOpcode();
3081  if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
3082  User->getOperand(0) == Op0 &&
3083  User->getOperand(1) == Op1) {
3084  if (!combined) {
3085  if (UserOpc == OtherOpcode) {
3086  SDVTList VTs = DAG.getVTList(VT, VT);
3087  combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
3088  } else if (UserOpc == DivRemOpc) {
3089  combined = SDValue(User, 0);
3090  } else {
3091  assert(UserOpc == Opcode);
3092  continue;
3093  }
3094  }
3095  if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
3096  CombineTo(User, combined);
3097  else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
3098  CombineTo(User, combined.getValue(1));
3099  }
3100  }
3101  return combined;
3102 }
3103 
3105  SDValue N0 = N->getOperand(0);
3106  SDValue N1 = N->getOperand(1);
3107  EVT VT = N->getValueType(0);
3108  SDLoc DL(N);
3109 
3110  unsigned Opc = N->getOpcode();
3111  bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
3113 
3114  // X / undef -> undef
3115  // X % undef -> undef
3116  // X / 0 -> undef
3117  // X % 0 -> undef
3118  // NOTE: This includes vectors where any divisor element is zero/undef.
3119  if (DAG.isUndef(Opc, {N0, N1}))
3120  return DAG.getUNDEF(VT);
3121 
3122  // undef / X -> 0
3123  // undef % X -> 0
3124  if (N0.isUndef())
3125  return DAG.getConstant(0, DL, VT);
3126 
3127  // TODO: 0 / X -> 0
3128  // TODO: 0 % X -> 0
3129 
3130  // X / X -> 1
3131  // X % X -> 0
3132  if (N0 == N1)
3133  return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
3134 
3135  // X / 1 -> X
3136  // X % 1 -> 0
3137  if (N1C && N1C->isOne())
3138  return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
3139  // If this is a boolean op (single-bit element type), we can't have
3140  // division-by-zero or remainder-by-zero, so assume the divisor is 1.
3141  // Similarly, if we're zero-extending a boolean divisor, then assume it's a 1.
3142 
3143  return SDValue();
3144 }
3145 
3146 SDValue DAGCombiner::visitSDIV(SDNode *N) {
3147  SDValue N0 = N->getOperand(0);
3148  SDValue N1 = N->getOperand(1);
3149  EVT VT = N->getValueType(0);
3150  EVT CCVT = getSetCCResultType(VT);
3151 
3152  // fold vector ops
3153  if (VT.isVector())
3154  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3155  return FoldedVOp;
3156 
3157  SDLoc DL(N);
3158 
3159  // fold (sdiv c1, c2) -> c1/c2
3162  if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
3163  return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
3164  // fold (sdiv X, -1) -> 0-X
3165  if (N1C && N1C->isAllOnesValue())
3166  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
3167  // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
3168  if (N1C && N1C->getAPIntValue().isMinSignedValue())
3169  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3170  DAG.getConstant(1, DL, VT),
3171  DAG.getConstant(0, DL, VT));
3172 
3173  if (SDValue V = simplifyDivRem(N, DAG))
3174  return V;
3175 
3176  if (SDValue NewSel = foldBinOpIntoSelect(N))
3177  return NewSel;
3178 
3179  // If we know the sign bits of both operands are zero, strength reduce to a
3180  // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
3181  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3182  return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
3183 
3184  if (SDValue V = visitSDIVLike(N0, N1, N))
3185  return V;
3186 
3187  // sdiv, srem -> sdivrem
3188  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3189  // true. Otherwise, we break the simplification logic in visitREM().
3191  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3192  if (SDValue DivRem = useDivRem(N))
3193  return DivRem;
3194 
3195  return SDValue();
3196 }
3197 
3198 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3199  SDLoc DL(N);
3200  EVT VT = N->getValueType(0);
3201  EVT CCVT = getSetCCResultType(VT);
3202  unsigned BitWidth = VT.getScalarSizeInBits();
3203 
3204  // Helper for determining whether a value is a power-2 constant scalar or a
3205  // vector of such elements.
3206  auto IsPowerOfTwo = [](ConstantSDNode *C) {
3207  if (C->isNullValue() || C->isOpaque())
3208  return false;
3209  if (C->getAPIntValue().isPowerOf2())
3210  return true;
3211  if ((-C->getAPIntValue()).isPowerOf2())
3212  return true;
3213  return false;
3214  };
3215 
3216  // fold (sdiv X, pow2) -> simple ops after legalize
3217  // FIXME: We check for the exact bit here because the generic lowering gives
3218  // better results in that case. The target-specific lowering should learn how
3219  // to handle exact sdivs efficiently.
3220  if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
3221  // Target-specific implementation of sdiv x, pow2.
3222  if (SDValue Res = BuildSDIVPow2(N))
3223  return Res;
3224 
3225  // Create constants that are functions of the shift amount value.
3226  EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
3227  SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
3228  SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
3229  C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
3230  SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
3231  if (!isConstantOrConstantVector(Inexact))
3232  return SDValue();
3233 
3234  // Splat the sign bit into the register
3235  SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
3236  DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
3237  AddToWorklist(Sign.getNode());
3238 
3239  // Add (N0 < 0) ? abs2 - 1 : 0;
3240  SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
3241  AddToWorklist(Srl.getNode());
3242  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
3243  AddToWorklist(Add.getNode());
3244  SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
3245  AddToWorklist(Sra.getNode());
3246 
3247  // Special case: (sdiv X, 1) -> X
3248  // Special Case: (sdiv X, -1) -> 0-X
3249  SDValue One = DAG.getConstant(1, DL, VT);
3250  SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
3251  SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
3252  SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
3253  SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
3254  Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
3255 
3256  // If dividing by a positive value, we're done. Otherwise, the result must
3257  // be negated.
3258  SDValue Zero = DAG.getConstant(0, DL, VT);
3259  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
3260 
3261  // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
3262  SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
3263  SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
3264  return Res;
3265  }
3266 
3267  // If integer divide is expensive and we satisfy the requirements, emit an
3268  // alternate sequence. Targets may check function attributes for size/speed
3269  // trade-offs.
3271  if (isConstantOrConstantVector(N1) &&
3272  !TLI.isIntDivCheap(N->getValueType(0), Attr))
3273  if (SDValue Op = BuildSDIV(N))
3274  return Op;
3275 
3276  return SDValue();
3277 }
3278 
3279 SDValue DAGCombiner::visitUDIV(SDNode *N) {
3280  SDValue N0 = N->getOperand(0);
3281  SDValue N1 = N->getOperand(1);
3282  EVT VT = N->getValueType(0);
3283  EVT CCVT = getSetCCResultType(VT);
3284 
3285  // fold vector ops
3286  if (VT.isVector())
3287  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3288  return FoldedVOp;
3289 
3290  SDLoc DL(N);
3291 
3292  // fold (udiv c1, c2) -> c1/c2
3295  if (N0C && N1C)
3296  if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
3297  N0C, N1C))
3298  return Folded;
3299  // fold (udiv X, -1) -> select(X == -1, 1, 0)
3300  if (N1C && N1C->getAPIntValue().isAllOnesValue())
3301  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3302  DAG.getConstant(1, DL, VT),
3303  DAG.getConstant(0, DL, VT));
3304 
3305  if (SDValue V = simplifyDivRem(N, DAG))
3306  return V;
3307 
3308  if (SDValue NewSel = foldBinOpIntoSelect(N))
3309  return NewSel;
3310 
3311  if (SDValue V = visitUDIVLike(N0, N1, N))
3312  return V;
3313 
3314  // sdiv, srem -> sdivrem
3315  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3316  // true. Otherwise, we break the simplification logic in visitREM().
3318  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3319  if (SDValue DivRem = useDivRem(N))
3320  return DivRem;
3321 
3322  return SDValue();
3323 }
3324 
3325 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3326  SDLoc DL(N);
3327  EVT VT = N->getValueType(0);
3328 
3329  // fold (udiv x, (1 << c)) -> x >>u c
3330  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3331  DAG.isKnownToBeAPowerOfTwo(N1)) {
3332  SDValue LogBase2 = BuildLogBase2(N1, DL);
3333  AddToWorklist(LogBase2.getNode());
3334 
3335  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3336  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3337  AddToWorklist(Trunc.getNode());
3338  return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3339  }
3340 
3341  // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
3342  if (N1.getOpcode() == ISD::SHL) {
3343  SDValue N10 = N1.getOperand(0);
3344  if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
3345  DAG.isKnownToBeAPowerOfTwo(N10)) {
3346  SDValue LogBase2 = BuildLogBase2(N10, DL);
3347  AddToWorklist(LogBase2.getNode());
3348 
3349  EVT ADDVT = N1.getOperand(1).getValueType();
3350  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
3351  AddToWorklist(Trunc.getNode());
3352  SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
3353  AddToWorklist(Add.getNode());
3354  return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
3355  }
3356  }
3357 
3358  // fold (udiv x, c) -> alternate
3360  if (isConstantOrConstantVector(N1) &&
3361  !TLI.isIntDivCheap(N->getValueType(0), Attr))
3362  if (SDValue Op = BuildUDIV(N))
3363  return Op;
3364 
3365  return SDValue();
3366 }
3367 
3368 // handles ISD::SREM and ISD::UREM
3369 SDValue DAGCombiner::visitREM(SDNode *N) {
3370  unsigned Opcode = N->getOpcode();
3371  SDValue N0 = N->getOperand(0);
3372  SDValue N1 = N->getOperand(1);
3373  EVT VT = N->getValueType(0);
3374  EVT CCVT = getSetCCResultType(VT);
3375 
3376  bool isSigned = (Opcode == ISD::SREM);
3377  SDLoc DL(N);
3378 
3379  // fold (rem c1, c2) -> c1%c2
3382  if (N0C && N1C)
3383  if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
3384  return Folded;
3385  // fold (urem X, -1) -> select(X == -1, 0, x)
3386  if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
3387  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3388  DAG.getConstant(0, DL, VT), N0);
3389 
3390  if (SDValue V = simplifyDivRem(N, DAG))
3391  return V;
3392 
3393  if (SDValue NewSel = foldBinOpIntoSelect(N))
3394  return NewSel;
3395 
3396  if (isSigned) {
3397  // If we know the sign bits of both operands are zero, strength reduce to a
3398  // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
3399  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3400  return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
3401  } else {
3402  SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
3403  if (DAG.isKnownToBeAPowerOfTwo(N1)) {
3404  // fold (urem x, pow2) -> (and x, pow2-1)
3405  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3406  AddToWorklist(Add.getNode());
3407  return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3408  }
3409  if (N1.getOpcode() == ISD::SHL &&
3410  DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
3411  // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
3412  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3413  AddToWorklist(Add.getNode());
3414  return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3415  }
3416  }
3417 
3419 
3420  // If X/C can be simplified by the division-by-constant logic, lower
3421  // X%C to the equivalent of X-X/C*C.
3422  // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
3423  // speculative DIV must not cause a DIVREM conversion. We guard against this
3424  // by skipping the simplification if isIntDivCheap(). When div is not cheap,
3425  // combine will not return a DIVREM. Regardless, checking cheapness here
3426  // makes sense since the simplification results in fatter code.
3427  if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
3428  SDValue OptimizedDiv =
3429  isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
3430  if (OptimizedDiv.getNode()) {
3431  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
3432  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3433  AddToWorklist(OptimizedDiv.getNode());
3434  AddToWorklist(Mul.getNode());
3435  return Sub;
3436  }
3437  }
3438 
3439  // sdiv, srem -> sdivrem
3440  if (SDValue DivRem = useDivRem(N))
3441  return DivRem.getValue(1);
3442 
3443  return SDValue();
3444 }
3445 
3446 SDValue DAGCombiner::visitMULHS(SDNode *N) {
3447  SDValue N0 = N->getOperand(0);
3448  SDValue N1 = N->getOperand(1);
3449  EVT VT = N->getValueType(0);
3450  SDLoc DL(N);
3451 
3452  if (VT.isVector()) {
3453  // fold (mulhs x, 0) -> 0
3455  return N1;
3457  return N0;
3458  }
3459 
3460  // fold (mulhs x, 0) -> 0
3461  if (isNullConstant(N1))
3462  return N1;
3463  // fold (mulhs x, 1) -> (sra x, size(x)-1)
3464  if (isOneConstant(N1))
3465  return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
3466  DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
3467  getShiftAmountTy(N0.getValueType())));
3468 
3469  // fold (mulhs x, undef) -> 0
3470  if (N0.isUndef() || N1.isUndef())
3471  return DAG.getConstant(0, DL, VT);
3472 
3473  // If the type twice as wide is legal, transform the mulhs to a wider multiply
3474  // plus a shift.
3475  if (VT.isSimple() && !VT.isVector()) {
3476  MVT Simple = VT.getSimpleVT();
3477  unsigned SimpleSize = Simple.getSizeInBits();
3478  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3479  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3480  N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
3481  N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
3482  N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3483  N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3484  DAG.getConstant(SimpleSize, DL,
3485  getShiftAmountTy(N1.getValueType())));
3486  return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3487  }
3488  }
3489 
3490  return SDValue();
3491 }
3492 
3493 SDValue DAGCombiner::visitMULHU(SDNode *N) {
3494  SDValue N0 = N->getOperand(0);
3495  SDValue N1 = N->getOperand(1);
3496  EVT VT = N->getValueType(0);
3497  SDLoc DL(N);
3498 
3499  if (VT.isVector()) {
3500  // fold (mulhu x, 0) -> 0
3502  return N1;
3504  return N0;
3505  }
3506 
3507  // fold (mulhu x, 0) -> 0
3508  if (isNullConstant(N1))
3509  return N1;
3510  // fold (mulhu x, 1) -> 0
3511  if (isOneConstant(N1))
3512  return DAG.getConstant(0, DL, N0.getValueType());
3513  // fold (mulhu x, undef) -> 0
3514  if (N0.isUndef() || N1.isUndef())
3515  return DAG.getConstant(0, DL, VT);
3516 
3517  // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
3518  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3519  DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
3520  SDLoc DL(N);
3521  unsigned NumEltBits = VT.getScalarSizeInBits();
3522  SDValue LogBase2 = BuildLogBase2(N1, DL);
3523  SDValue SRLAmt = DAG.getNode(
3524  ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
3525  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3526  SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
3527  return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3528  }
3529 
3530  // If the type twice as wide is legal, transform the mulhu to a wider multiply
3531  // plus a shift.
3532  if (VT.isSimple() && !VT.isVector()) {
3533  MVT Simple = VT.getSimpleVT();
3534  unsigned SimpleSize = Simple.getSizeInBits();
3535  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3536  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3537  N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
3538  N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
3539  N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3540  N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3541  DAG.getConstant(SimpleSize, DL,
3542  getShiftAmountTy(N1.getValueType())));
3543  return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3544  }
3545  }
3546 
3547  return SDValue();
3548 }
3549 
3550 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
3551 /// give the opcodes for the two computations that are being performed. Return
3552 /// true if a simplification was made.
3553 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
3554  unsigned HiOp) {
3555  // If the high half is not needed, just compute the low half.
3556  bool HiExists = N->hasAnyUseOfValue(1);
3557  if (!HiExists &&
3558  (!LegalOperations ||
3559  TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
3560  SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3561  return CombineTo(N, Res, Res);
3562  }
3563 
3564  // If the low half is not needed, just compute the high half.
3565  bool LoExists = N->hasAnyUseOfValue(0);
3566  if (!LoExists &&
3567  (!LegalOperations ||
3568  TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
3569  SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3570  return CombineTo(N, Res, Res);
3571  }
3572 
3573  // If both halves are used, return as it is.
3574  if (LoExists && HiExists)
3575  return SDValue();
3576 
3577  // If the two computed results can be simplified separately, separate them.
3578  if (LoExists) {
3579  SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3580  AddToWorklist(Lo.getNode());
3581  SDValue LoOpt = combine(Lo.getNode());
3582  if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
3583  (!LegalOperations ||
3584  TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
3585  return CombineTo(N, LoOpt, LoOpt);
3586  }
3587 
3588  if (HiExists) {
3589  SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3590  AddToWorklist(Hi.getNode());
3591  SDValue HiOpt = combine(Hi.getNode());
3592  if (HiOpt.getNode() && HiOpt != Hi &&
3593  (!LegalOperations ||
3594  TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
3595  return CombineTo(N, HiOpt, HiOpt);
3596  }
3597 
3598  return SDValue();
3599 }
3600 
3601 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
3602  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
3603  return Res;
3604 
3605  EVT VT = N->getValueType(0);
3606  SDLoc DL(N);
3607 
3608  // If the type is twice as wide is legal, transform the mulhu to a wider
3609  // multiply plus a shift.
3610  if (VT.isSimple() && !VT.isVector()) {
3611  MVT Simple = VT.getSimpleVT();
3612  unsigned SimpleSize = Simple.getSizeInBits();
3613  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3614  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3615  SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
3616  SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
3617  Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3618  // Compute the high part as N1.
3619  Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3620  DAG.getConstant(SimpleSize, DL,
3621  getShiftAmountTy(Lo.getValueType())));
3622  Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3623  // Compute the low part as N0.
3624  Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3625  return CombineTo(N, Lo, Hi);
3626  }
3627  }
3628 
3629  return SDValue();
3630 }
3631 
3632 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
3633  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
3634  return Res;
3635 
3636  EVT VT = N->getValueType(0);
3637  SDLoc DL(N);
3638 
3639  // If the type is twice as wide is legal, transform the mulhu to a wider
3640  // multiply plus a shift.
3641  if (VT.isSimple() && !VT.isVector()) {
3642  MVT Simple = VT.getSimpleVT();
3643  unsigned SimpleSize = Simple.getSizeInBits();
3644  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3645  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3646  SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
3647  SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
3648  Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3649  // Compute the high part as N1.
3650  Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3651  DAG.getConstant(SimpleSize, DL,
3652  getShiftAmountTy(Lo.getValueType())));
3653  Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3654  // Compute the low part as N0.
3655  Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3656  return CombineTo(N, Lo, Hi);
3657  }
3658  }
3659 
3660  return SDValue();
3661 }
3662 
3663 SDValue DAGCombiner::visitSMULO(SDNode *N) {
3664  // (smulo x, 2) -> (saddo x, x)
3665  if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3666  if (C2->getAPIntValue() == 2)
3667  return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
3668  N->getOperand(0), N->getOperand(0));
3669 
3670  return SDValue();
3671 }
3672 
3673 SDValue DAGCombiner::visitUMULO(SDNode *N) {
3674  // (umulo x, 2) -> (uaddo x, x)
3675  if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3676  if (C2->getAPIntValue() == 2)
3677  return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
3678  N->getOperand(0), N->getOperand(0));
3679 
3680  return SDValue();
3681 }
3682 
3683 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
3684  SDValue N0 = N->getOperand(0);
3685  SDValue N1 = N->getOperand(1);
3686  EVT VT = N0.getValueType();
3687 
3688  // fold vector ops
3689  if (VT.isVector())
3690  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3691  return FoldedVOp;
3692 
3693  // fold operation with constant operands.
3696  if (N0C && N1C)
3697  return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
3698 
3699  // canonicalize constant to RHS
3702  return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
3703 
3704  // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
3705  // Only do this if the current op isn't legal and the flipped is.
3706  unsigned Opcode = N->getOpcode();
3707  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3708  if (!TLI.isOperationLegal(Opcode, VT) &&
3709  (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
3710  (N1.isUndef() || DAG.SignBitIsZero(N1))) {
3711  unsigned AltOpcode;
3712  switch (Opcode) {
3713  case ISD::SMIN: AltOpcode = ISD::UMIN; break;
3714  case ISD::SMAX: AltOpcode = ISD::UMAX; break;
3715  case ISD::UMIN: AltOpcode = ISD::SMIN; break;
3716  case ISD::UMAX: AltOpcode = ISD::SMAX; break;
3717  default: llvm_unreachable("Unknown MINMAX opcode");
3718  }
3719  if (TLI.isOperationLegal(AltOpcode, VT))
3720  return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
3721  }
3722 
3723  return SDValue();
3724 }
3725 
3726 /// If this is a binary operator with two operands of the same opcode, try to
3727 /// simplify it.
3728 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
3729  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3730  EVT VT = N0.getValueType();
3731  assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
3732 
3733  // Bail early if none of these transforms apply.
3734  if (N0.getNumOperands() == 0) return SDValue();
3735 
3736  // For each of OP in AND/OR/XOR:
3737  // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
3738  // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
3739  // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
3740  // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y))
3741  // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
3742  //
3743  // do not sink logical op inside of a vector extend, since it may combine
3744  // into a vsetcc.
3745  EVT Op0VT = N0.getOperand(0).getValueType();
3746  if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
3747  N0.getOpcode() == ISD::SIGN_EXTEND ||
3748  N0.getOpcode() == ISD::BSWAP ||
3749  // Avoid infinite looping with PromoteIntBinOp.
3750  (N0.getOpcode() == ISD::ANY_EXTEND &&
3751  (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
3752  (N0.getOpcode() == ISD::TRUNCATE &&
3753  (!TLI.isZExtFree(VT, Op0VT) ||
3754  !TLI.isTruncateFree(Op0VT, VT)) &&
3755  TLI.isTypeLegal(Op0VT))) &&
3756  !VT.isVector() &&
3757  Op0VT == N1.getOperand(0).getValueType() &&
3758  (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
3759  SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3760  N0.getOperand(0).getValueType(),
3761  N0.getOperand(0), N1.getOperand(0));
3762  AddToWorklist(ORNode.getNode());
3763  return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
3764  }
3765 
3766  // For each of OP in SHL/SRL/SRA/AND...
3767  // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
3768  // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z)
3769  // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
3770  if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
3771  N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
3772  N0.getOperand(1) == N1.getOperand(1)) {
3773  SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3774  N0.getOperand(0).getValueType(),
3775  N0.getOperand(0), N1.getOperand(0));
3776  AddToWorklist(ORNode.getNode());
3777  return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
3778  ORNode, N0.getOperand(1));
3779  }
3780 
3781  // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
3782  // Only perform this optimization up until type legalization, before
3783  // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
3784  // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
3785  // we don't want to undo this promotion.
3786  // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
3787  // on scalars.
3788  if ((N0.getOpcode() == ISD::BITCAST ||
3789  N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
3790  Level <= AfterLegalizeTypes) {
3791  SDValue In0 = N0.getOperand(0);
3792  SDValue In1 = N1.getOperand(0);
3793  EVT In0Ty = In0.getValueType();
3794  EVT In1Ty = In1.getValueType();
3795  SDLoc DL(N);
3796  // If both incoming values are integers, and the original types are the
3797  // same.
3798  if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
3799  SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
3800  SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
3801  AddToWorklist(Op.getNode());
3802  return BC;
3803  }
3804  }
3805 
3806  // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
3807  // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
3808  // If both shuffles use the same mask, and both shuffle within a single
3809  // vector, then it is worthwhile to move the swizzle after the operation.
3810  // The type-legalizer generates this pattern when loading illegal
3811  // vector types from memory. In many cases this allows additional shuffle
3812  // optimizations.
3813  // There are other cases where moving the shuffle after the xor/and/or
3814  // is profitable even if shuffles don't perform a swizzle.
3815  // If both shuffles use the same mask, and both shuffles have the same first
3816  // or second operand, then it might still be profitable to move the shuffle
3817  // after the xor/and/or operation.
3818  if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
3819  ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
3820  ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
3821 
3822  assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
3823  "Inputs to shuffles are not the same type");
3824 
3825  // Check that both shuffles use the same mask. The masks are known to be of
3826  // the same length because the result vector type is the same.
3827  // Check also that shuffles have only one use to avoid introducing extra
3828  // instructions.
3829  if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
3830  SVN0->getMask().equals(SVN1->getMask())) {
3831  SDValue ShOp = N0->getOperand(1);
3832 
3833  // Don't try to fold this node if it requires introducing a
3834  // build vector of all zeros that might be illegal at this stage.
3835  if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3836  if (!LegalTypes)
3837  ShOp = DAG.getConstant(0, SDLoc(N), VT);
3838  else
3839  ShOp = SDValue();
3840  }
3841 
3842  // (AND (shuf (A, C), shuf (B, C))) -> shuf (AND (A, B), C)
3843  // (OR (shuf (A, C), shuf (B, C))) -> shuf (OR (A, B), C)
3844  // (XOR (shuf (A, C), shuf (B, C))) -> shuf (XOR (A, B), V_0)
3845  if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
3846  SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3847  N0->getOperand(0), N1->getOperand(0));
3848  AddToWorklist(NewNode.getNode());
3849  return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
3850  SVN0->getMask());
3851  }
3852 
3853  // Don't try to fold this node if it requires introducing a
3854  // build vector of all zeros that might be illegal at this stage.
3855  ShOp = N0->getOperand(0);
3856  if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3857  if (!LegalTypes)
3858  ShOp = DAG.getConstant(0, SDLoc(N), VT);
3859  else
3860  ShOp = SDValue();
3861  }
3862 
3863  // (AND (shuf (C, A), shuf (C, B))) -> shuf (C, AND (A, B))
3864  // (OR (shuf (C, A), shuf (C, B))) -> shuf (C, OR (A, B))
3865  // (XOR (shuf (C, A), shuf (C, B))) -> shuf (V_0, XOR (A, B))
3866  if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
3867  SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3868  N0->getOperand(1), N1->getOperand(1));
3869  AddToWorklist(NewNode.getNode());
3870  return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
3871  SVN0->getMask());
3872  }
3873  }
3874  }
3875 
3876  return SDValue();
3877 }
3878 
3879 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
3880 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
3881  const SDLoc &DL) {
3882  SDValue LL, LR, RL, RR, N0CC, N1CC;
3883  if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
3884  !isSetCCEquivalent(N1, RL, RR, N1CC))
3885  return SDValue();
3886 
3887  assert(N0.getValueType() == N1.getValueType() &&
3888  "Unexpected operand types for bitwise logic op");
3889  assert(LL.getValueType() == LR.getValueType() &&
3890  RL.getValueType() == RR.getValueType() &&
3891  "Unexpected operand types for setcc");
3892 
3893  // If we're here post-legalization or the logic op type is not i1, the logic
3894  // op type must match a setcc result type. Also, all folds require new
3895  // operations on the left and right operands, so those types must match.
3896  EVT VT = N0.getValueType();
3897  EVT OpVT = LL.getValueType();
3898  if (LegalOperations || VT.getScalarType() != MVT::i1)
3899  if (VT != getSetCCResultType(OpVT))
3900  return SDValue();
3901  if (OpVT != RL.getValueType())
3902  return SDValue();
3903 
3904  ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
3905  ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
3906  bool IsInteger = OpVT.isInteger();
3907  if (LR == RR && CC0 == CC1 && IsInteger) {
3908  bool IsZero = isNullConstantOrNullSplatConstant(LR);
3909  bool IsNeg1 = isAllOnesConstantOrAllOnesSplatConstant(LR);
3910 
3911  // All bits clear?
3912  bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
3913  // All sign bits clear?
3914  bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
3915  // Any bits set?
3916  bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
3917  // Any sign bits set?
3918  bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
3919 
3920  // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
3921  // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
3922  // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
3923  // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
3924  if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
3925  SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
3926  AddToWorklist(Or.getNode());
3927  return DAG.getSetCC(DL, VT, Or, LR, CC1);
3928  }
3929 
3930  // All bits set?
3931  bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
3932  // All sign bits set?
3933  bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
3934  // Any bits clear?
3935  bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
3936  // Any sign bits clear?
3937  bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
3938 
3939  // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
3940  // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
3941  // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
3942  // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
3943  if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
3944  SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
3945  AddToWorklist(And.getNode());
3946  return DAG.getSetCC(DL, VT, And, LR, CC1);
3947  }
3948  }
3949 
3950  // TODO: What is the 'or' equivalent of this fold?
3951  // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
3952  if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
3953  IsInteger && CC0 == ISD::SETNE &&
3954  ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
3955  (isAllOnesConstant(LR) && isNullConstant(RR)))) {
3956  SDValue One = DAG.getConstant(1, DL, OpVT);
3957  SDValue Two = DAG.getConstant(2, DL, OpVT);
3958  SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
3959  AddToWorklist(Add.getNode());
3960  return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
3961  }
3962 
3963  // Try more general transforms if the predicates match and the only user of
3964  // the compares is the 'and' or 'or'.
3965  if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
3966  N0.hasOneUse() && N1.hasOneUse()) {
3967  // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
3968  // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
3969  if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
3970  SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
3971  SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
3972  SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
3973  SDValue Zero = DAG.getConstant(0, DL, OpVT);
3974  return DAG.getSetCC(DL, VT, Or, Zero, CC1);
3975  }
3976  }
3977 
3978  // Canonicalize equivalent operands to LL == RL.
3979  if (LL == RR && LR == RL) {
3980  CC1 = ISD::getSetCCSwappedOperands(CC1);
3981  std::swap(RL, RR);
3982  }
3983 
3984  // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3985  // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3986  if (LL == RL && LR == RR) {
3987  ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
3988  : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
3989  if (NewCC != ISD::SETCC_INVALID &&
3990  (!LegalOperations ||
3991  (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
3992  TLI.isOperationLegal(ISD::SETCC, OpVT))))
3993  return DAG.getSetCC(DL, VT, LL, LR, NewCC);
3994  }
3995 
3996  return SDValue();
3997 }
3998 
3999 /// This contains all DAGCombine rules which reduce two values combined by
4000 /// an And operation to a single value. This makes them reusable in the context
4001 /// of visitSELECT(). Rules involving constants are not included as
4002 /// visitSELECT() already handles those cases.
4003 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
4004  EVT VT = N1.getValueType();
4005  SDLoc DL(N);
4006 
4007  // fold (and x, undef) -> 0
4008  if (N0.isUndef() || N1.isUndef())
4009  return DAG.getConstant(0, DL, VT);
4010 
4011  if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
4012  return V;
4013 
4014  if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
4015  VT.getSizeInBits() <= 64) {
4016  if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4017  if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
4018  // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
4019  // immediate for an add, but it is legal if its top c2 bits are set,
4020  // transform the ADD so the immediate doesn't need to be materialized
4021  // in a register.
4022  APInt ADDC = ADDI->getAPIntValue();
4023  APInt SRLC = SRLI->getAPIntValue();
4024  if (ADDC.getMinSignedBits() <= 64 &&
4025  SRLC.ult(VT.getSizeInBits()) &&
4026  !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4028  SRLC.getZExtValue());
4029  if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
4030  ADDC |= Mask;
4031  if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4032  SDLoc DL0(N0);
4033  SDValue NewAdd =
4034  DAG.getNode(ISD::ADD, DL0, VT,
4035  N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
4036  CombineTo(N0.getNode(), NewAdd);
4037  // Return N so it doesn't get rechecked!
4038  return SDValue(N, 0);
4039  }
4040  }
4041  }
4042  }
4043  }
4044  }
4045 
4046  // Reduce bit extract of low half of an integer to the narrower type.
4047  // (and (srl i64:x, K), KMask) ->
4048  // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
4049  if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
4050  if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
4051  if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4052  unsigned Size = VT.getSizeInBits();
4053  const APInt &AndMask = CAnd->getAPIntValue();
4054  unsigned ShiftBits = CShift->getZExtValue();
4055 
4056  // Bail out, this node will probably disappear anyway.
4057  if (ShiftBits == 0)
4058  return SDValue();
4059 
4060  unsigned MaskBits = AndMask.countTrailingOnes();
4061  EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
4062 
4063  if (AndMask.isMask() &&
4064  // Required bits must not span the two halves of the integer and
4065  // must fit in the half size type.
4066  (ShiftBits + MaskBits <= Size / 2) &&
4067  TLI.isNarrowingProfitable(VT, HalfVT) &&
4068  TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
4069  TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
4070  TLI.isTruncateFree(VT, HalfVT) &&
4071  TLI.isZExtFree(HalfVT, VT)) {
4072  // The isNarrowingProfitable is to avoid regressions on PPC and
4073  // AArch64 which match a few 64-bit bit insert / bit extract patterns
4074  // on downstream users of this. Those patterns could probably be
4075  // extended to handle extensions mixed in.
4076 
4077  SDValue SL(N0);
4078  assert(MaskBits <= Size);
4079 
4080  // Extracting the highest bit of the low half.
4081  EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
4082  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
4083  N0.getOperand(0));
4084 
4085  SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
4086  SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
4087  SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
4088  SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
4089  return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
4090  }
4091  }
4092  }
4093  }
4094 
4095  return SDValue();
4096 }
4097 
4098 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
4099  EVT LoadResultTy, EVT &ExtVT) {
4100  if (!AndC->getAPIntValue().isMask())
4101  return false;
4102 
4103  unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
4104 
4105  ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4106  EVT LoadedVT = LoadN->getMemoryVT();
4107 
4108  if (ExtVT == LoadedVT &&
4109  (!LegalOperations ||
4110  TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
4111  // ZEXTLOAD will match without needing to change the size of the value being
4112  // loaded.
4113  return true;
4114  }
4115 
4116  // Do not change the width of a volatile load.
4117  if (LoadN->isVolatile())
4118  return false;
4119 
4120  // Do not generate loads of non-round integer types since these can
4121  // be expensive (and would be wrong if the type is not byte sized).
4122  if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
4123  return false;
4124 
4125  if (LegalOperations &&
4126  !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
4127  return false;
4128 
4129  if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
4130  return false;
4131 
4132  return true;
4133 }
4134 
4135 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
4136  ISD::LoadExtType ExtType, EVT &MemVT,
4137  unsigned ShAmt) {
4138  if (!LDST)
4139  return false;
4140  // Only allow byte offsets.
4141  if (ShAmt % 8)
4142  return false;
4143 
4144  // Do not generate loads of non-round integer types since these can
4145  // be expensive (and would be wrong if the type is not byte sized).
4146  if (!MemVT.isRound())
4147  return false;
4148 
4149  // Don't change the width of a volatile load.
4150  if (LDST->isVolatile())
4151  return false;
4152 
4153  // Verify that we are actually reducing a load width here.
4154  if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
4155  return false;
4156 
4157  // Ensure that this isn't going to produce an unsupported unaligned access.
4158  if (ShAmt &&
4159  !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
4160  LDST->getAddressSpace(), ShAmt / 8))
4161  return false;
4162 
4163  // It's not possible to generate a constant of extended or untyped type.
4164  EVT PtrType = LDST->getBasePtr().getValueType();
4165  if (PtrType == MVT::Untyped || PtrType.isExtended())
4166  return false;
4167 
4168  if (isa<LoadSDNode>(LDST)) {
4169  LoadSDNode *Load = cast<LoadSDNode>(LDST);
4170  // Don't transform one with multiple uses, this would require adding a new
4171  // load.
4172  if (!SDValue(Load, 0).hasOneUse())
4173  return false;
4174 
4175  if (LegalOperations &&
4176  !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
4177  return false;
4178 
4179  // For the transform to be legal, the load must produce only two values
4180  // (the value loaded and the chain). Don't transform a pre-increment
4181  // load, for example, which produces an extra value. Otherwise the
4182  // transformation is not equivalent, and the downstream logic to replace
4183  // uses gets things wrong.
4184  if (Load->getNumValues() > 2)
4185  return false;
4186 
4187  // If the load that we're shrinking is an extload and we're not just
4188  // discarding the extension we can't simply shrink the load. Bail.
4189  // TODO: It would be possible to merge the extensions in some cases.
4190  if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
4191  Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4192  return false;
4193 
4194  if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
4195  return false;
4196  } else {
4197  assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
4198  StoreSDNode *Store = cast<StoreSDNode>(LDST);
4199  // Can't write outside the original store
4200  if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4201  return false;
4202 
4203  if (LegalOperations &&
4204  !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
4205  return false;
4206  }
4207  return true;
4208 }
4209 
4210 bool DAGCombiner::SearchForAndLoads(SDNode *N,
4212  SmallPtrSetImpl<SDNode*> &NodesWithConsts,
4214  SDNode *&NodeToMask) {
4215  // Recursively search for the operands, looking for loads which can be
4216  // narrowed.
4217  for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
4218  SDValue Op = N->getOperand(i);
4219 
4220  if (Op.getValueType().isVector())
4221  return false;
4222 
4223  // Some constants may need fixing up later if they are too large.
4224  if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4225  if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
4226  (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
4227  NodesWithConsts.insert(N);
4228  continue;
4229  }
4230 
4231  if (!Op.hasOneUse())
4232  return false;
4233 
4234  switch(Op.getOpcode()) {
4235  case ISD::LOAD: {
4236  auto *Load = cast<LoadSDNode>(Op);
4237  EVT ExtVT;
4238  if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
4239  isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
4240 
4241  // ZEXTLOAD is already small enough.
4242  if (Load->getExtensionType() == ISD::ZEXTLOAD &&
4243  ExtVT.bitsGE(Load->getMemoryVT()))
4244  continue;
4245 
4246  // Use LE to convert equal sized loads to zext.
4247  if (ExtVT.bitsLE(Load->getMemoryVT()))
4248  Loads.insert(Load);
4249 
4250  continue;
4251  }
4252  return false;
4253  }
4254  case ISD::ZERO_EXTEND:
4255  case ISD::AssertZext: {
4256  unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
4257  EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4258  EVT VT = Op.getOpcode() == ISD::AssertZext ?
4259  cast<VTSDNode>(Op.getOperand(1))->getVT() :
4260  Op.getOperand(0).getValueType();
4261 
4262  // We can accept extending nodes if the mask is wider or an equal
4263  // width to the original type.
4264  if (ExtVT.bitsGE(VT))
4265  continue;
4266  break;
4267  }
4268  case ISD::OR:
4269  case ISD::XOR:
4270  case ISD::AND:
4271  if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
4272  NodeToMask))
4273  return false;
4274  continue;
4275  }
4276 
4277  // Allow one node which will masked along with any loads found.
4278  if (NodeToMask)
4279  return false;
4280 
4281  // Also ensure that the node to be masked only produces one data result.
4282  NodeToMask = Op.getNode();
4283  if (NodeToMask->getNumValues() > 1) {
4284  bool HasValue = false;
4285  for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
4286  MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
4287  if (VT != MVT::Glue && VT != MVT::Other) {
4288  if (HasValue) {
4289  NodeToMask = nullptr;
4290  return false;
4291  }
4292  HasValue = true;
4293  }
4294  }
4295  assert(HasValue && "Node to be masked has no data result?");
4296  }
4297  }
4298  return true;
4299 }
4300 
4301 bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
4302  auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
4303  if (!Mask)
4304  return false;
4305 
4306  if (!Mask->getAPIntValue().isMask())
4307  return false;
4308 
4309  // No need to do anything if the and directly uses a load.
4310  if (isa<LoadSDNode>(N->getOperand(0)))
4311  return false;
4312 
4314  SmallPtrSet<SDNode*, 2> NodesWithConsts;
4315  SDNode *FixupNode = nullptr;
4316  if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
4317  if (Loads.size() == 0)
4318  return false;
4319 
4320  LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
4321  SDValue MaskOp = N->getOperand(1);
4322 
4323  // If it exists, fixup the single node we allow in the tree that needs
4324  // masking.
4325  if (FixupNode) {
4326  LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
4327  SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
4328  FixupNode->getValueType(0),
4329  SDValue(FixupNode, 0), MaskOp);
4330  DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
4331  if (And.getOpcode() == ISD ::AND)
4332  DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
4333  }
4334 
4335  // Narrow any constants that need it.
4336  for (auto *LogicN : NodesWithConsts) {
4337  SDValue Op0 = LogicN->getOperand(0);
4338  SDValue Op1 = LogicN->getOperand(1);
4339 
4340  if (isa<ConstantSDNode>(Op0))
4341  std::swap(Op0, Op1);
4342 
4343  SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
4344  Op1, MaskOp);
4345 
4346  DAG.UpdateNodeOperands(LogicN, Op0, And);
4347  }
4348 
4349  // Create narrow loads.
4350  for (auto *Load : Loads) {
4351  LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
4352  SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
4353  SDValue(Load, 0), MaskOp);
4354  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
4355  if (And.getOpcode() == ISD ::AND)
4356  And = SDValue(
4357  DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
4358  SDValue NewLoad = ReduceLoadWidth(And.getNode());
4359  assert(NewLoad &&
4360  "Shouldn't be masking the load if it can't be narrowed");
4361  CombineTo(Load, NewLoad, NewLoad.getValue(1));
4362  }
4363  DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
4364  return true;
4365  }
4366  return false;
4367 }
4368 
4369 // Unfold
4370 // x & (-1 'logical shift' y)
4371 // To
4372 // (x 'opposite logical shift' y) 'logical shift' y
4373 // if it is better for performance.
4374 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
4375  assert(N->getOpcode() == ISD::AND);
4376 
4377  SDValue N0 = N->getOperand(0);
4378  SDValue N1 = N->getOperand(1);
4379 
4380  // Do we actually prefer shifts over mask?
4381  if (!TLI.preferShiftsToClearExtremeBits(N0))
4382  return SDValue();
4383 
4384  // Try to match (-1 '[outer] logical shift' y)
4385  unsigned OuterShift;
4386  unsigned InnerShift; // The opposite direction to the OuterShift.
4387  SDValue Y; // Shift amount.
4388  auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
4389  if (!M.hasOneUse())
4390  return false;
4391  OuterShift = M->getOpcode();
4392  if (OuterShift == ISD::SHL)
4393  InnerShift = ISD::SRL;
4394  else if (OuterShift == ISD::SRL)
4395  InnerShift = ISD::SHL;
4396  else
4397  return false;
4398  if (!isAllOnesConstant(M->getOperand(0)))
4399  return false;
4400  Y = M->getOperand(1);
4401  return true;
4402  };
4403 
4404  SDValue X;
4405  if (matchMask(N1))
4406  X = N0;
4407  else if (matchMask(N0))
4408  X = N1;
4409  else
4410  return SDValue();
4411 
4412  SDLoc DL(N);
4413  EVT VT = N->getValueType(0);
4414 
4415  // tmp = x 'opposite logical shift' y
4416  SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
4417  // ret = tmp 'logical shift' y
4418  SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
4419 
4420  return T1;
4421 }
4422 
4423 SDValue DAGCombiner::visitAND(SDNode *N) {
4424  SDValue N0 = N->getOperand(0);
4425  SDValue N1 = N->getOperand(1);
4426  EVT VT = N1.getValueType();
4427 
4428  // x & x --> x
4429  if (N0 == N1)
4430  return N0;
4431 
4432  // fold vector ops
4433  if (VT.isVector()) {
4434  if (SDValue FoldedVOp = SimplifyVBinOp(N))
4435  return FoldedVOp;
4436 
4437  // fold (and x, 0) -> 0, vector edition
4439  // do not return N0, because undef node may exist in N0
4441  SDLoc(N), N0.getValueType());
4443  // do not return N1, because undef node may exist in N1
4445  SDLoc(N), N1.getValueType());
4446 
4447  // fold (and x, -1) -> x, vector edition
4449  return N1;
4451  return N0;
4452  }
4453 
4454  // fold (and c1, c2) -> c1&c2
4457  if (N0C && N1C && !N1C->isOpaque())
4458  return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
4459  // canonicalize constant to RHS
4462  return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
4463  // fold (and x, -1) -> x
4464  if (isAllOnesConstant(N1))
4465  return N0;
4466  // if (and x, c) is known to be zero, return 0
4467  unsigned BitWidth = VT.getScalarSizeInBits();
4468  if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
4469  APInt::getAllOnesValue(BitWidth)))
4470  return DAG.getConstant(0, SDLoc(N), VT);
4471 
4472  if (SDValue NewSel = foldBinOpIntoSelect(N))
4473  return NewSel;
4474 
4475  // reassociate and
4476  if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
4477  return RAND;
4478 
4479  // Try to convert a constant mask AND into a shuffle clear mask.
4480  if (VT.isVector())
4481  if (SDValue Shuffle = XformToShuffleWithZero(N))
4482  return Shuffle;
4483 
4484  // fold (and (or x, C), D) -> D if (C & D) == D
4485  auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
4486  return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
4487  };
4488  if (N0.getOpcode() == ISD::OR &&
4489  ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
4490  return N1;
4491  // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
4492  if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
4493  SDValue N0Op0 = N0.getOperand(0);
4494  APInt Mask = ~N1C->getAPIntValue();
4495  Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
4496  if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
4497  SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
4498  N0.getValueType(), N0Op0);
4499 
4500  // Replace uses of the AND with uses of the Zero extend node.
4501  CombineTo(N, Zext);
4502 
4503  // We actually want to replace all uses of the any_extend with the
4504  // zero_extend, to avoid duplicating things. This will later cause this
4505  // AND to be folded.
4506  CombineTo(N0.getNode(), Zext);
4507  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4508  }
4509  }
4510  // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
4511  // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
4512  // already be zero by virtue of the width of the base type of the load.
4513  //
4514  // the 'X' node here can either be nothing or an extract_vector_elt to catch
4515  // more cases.
4516  if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4518  N0.getOperand(0).getOpcode() == ISD::LOAD &&
4519  N0.getOperand(0).getResNo() == 0) ||
4520  (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
4521  LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
4522  N0 : N0.getOperand(0) );
4523 
4524  // Get the constant (if applicable) the zero'th operand is being ANDed with.
4525  // This can be a pure constant or a vector splat, in which case we treat the
4526  // vector as a scalar and use the splat value.
4528  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
4529  Constant = C->getAPIntValue();
4530  } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
4531  APInt SplatValue, SplatUndef;
4532  unsigned SplatBitSize;
4533  bool HasAnyUndefs;
4534  bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
4535  SplatBitSize, HasAnyUndefs);
4536  if (IsSplat) {
4537  // Undef bits can contribute to a possible optimisation if set, so
4538  // set them.
4539  SplatValue |= SplatUndef;
4540 
4541  // The splat value may be something like "0x00FFFFFF", which means 0 for
4542  // the first vector value and FF for the rest, repeating. We need a mask
4543  // that will apply equally to all members of the vector, so AND all the
4544  // lanes of the constant together.
4545  EVT VT = Vector->getValueType(0);
4546  unsigned BitWidth = VT.getScalarSizeInBits();
4547 
4548  // If the splat value has been compressed to a bitlength lower
4549  // than the size of the vector lane, we need to re-expand it to
4550  // the lane size.
4551  if (BitWidth > SplatBitSize)
4552  for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
4553  SplatBitSize < BitWidth;
4554  SplatBitSize = SplatBitSize * 2)
4555  SplatValue |= SplatValue.shl(SplatBitSize);
4556 
4557  // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
4558  // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
4559  if (SplatBitSize % BitWidth == 0) {
4560  Constant = APInt::getAllOnesValue(BitWidth);
4561  for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
4562  Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
4563  }
4564  }
4565  }
4566 
4567  // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
4568  // actually legal and isn't going to get expanded, else this is a false
4569  // optimisation.
4570  bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
4571  Load->getValueType(0),
4572  Load->getMemoryVT());
4573 
4574  // Resize the constant to the same size as the original memory access before
4575  // extension. If it is still the AllOnesValue then this AND is completely
4576  // unneeded.
4577  Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
4578 
4579  bool B;
4580  switch (Load->getExtensionType()) {
4581  default: B = false; break;
4582  case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
4583  case ISD::ZEXTLOAD:
4584  case ISD::NON_EXTLOAD: B = true; break;
4585  }
4586 
4587  if (B && Constant.isAllOnesValue()) {
4588  // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
4589  // preserve semantics once we get rid of the AND.
4590  SDValue NewLoad(Load, 0);
4591 
4592  // Fold the AND away. NewLoad may get replaced immediately.
4593  CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
4594 
4595  if (Load->getExtensionType() == ISD::EXTLOAD) {
4596  NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
4597  Load->getValueType(0), SDLoc(Load),
4598  Load->getChain(), Load->getBasePtr(),
4599  Load->getOffset(), Load->getMemoryVT(),
4600  Load->getMemOperand());
4601  // Replace uses of the EXTLOAD with the new ZEXTLOAD.
4602  if (Load->getNumValues() == 3) {
4603  // PRE/POST_INC loads have 3 values.
4604  SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
4605  NewLoad.getValue(2) };
4606  CombineTo(Load, To, 3, true);
4607  } else {
4608  CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
4609  }
4610  }
4611 
4612  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4613  }
4614  }
4615 
4616  // fold (and (load x), 255) -> (zextload x, i8)
4617  // fold (and (extload x, i16), 255) -> (zextload x, i8)
4618  // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
4619  if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
4620  (N0.getOpcode() == ISD::ANY_EXTEND &&
4621  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
4622  if (SDValue Res = ReduceLoadWidth(N)) {
4623  LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
4624  ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
4625 
4626  AddToWorklist(N);
4627  CombineTo(LN0, Res, Res.getValue(1));
4628  return SDValue(N, 0);
4629  }
4630  }
4631 
4632  if (Level >= AfterLegalizeTypes) {
4633  // Attempt to propagate the AND back up to the leaves which, if they're
4634  // loads, can be combined to narrow loads and the AND node can be removed.
4635  // Perform after legalization so that extend nodes will already be
4636  // combined into the loads.
4637  if (BackwardsPropagateMask(N, DAG)) {
4638  return SDValue(N, 0);
4639  }
4640  }
4641 
4642  if (SDValue Combined = visitANDLike(N0, N1, N))
4643  return Combined;
4644 
4645  // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
4646  if (N0.getOpcode() == N1.getOpcode())
4647  if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
4648  return Tmp;
4649 
4650  // Masking the negated extension of a boolean is just the zero-extended
4651  // boolean:
4652  // and (sub 0, zext(bool X)), 1 --> zext(bool X)
4653  // and (sub 0, sext(bool X)), 1 --> zext(bool X)
4654  //
4655  // Note: the SimplifyDemandedBits fold below can make an information-losing
4656  // transform, and then we have no way to find this better fold.
4657  if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
4659  SDValue SubRHS = N0.getOperand(1);
4660  if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
4661  SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4662  return SubRHS;
4663  if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
4664  SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4665  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
4666  }
4667  }
4668 
4669  // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
4670  // fold (and (sra)) -> (and (srl)) when possible.
4671  if (SimplifyDemandedBits(SDValue(N, 0)))
4672  return SDValue(N, 0);
4673 
4674  // fold (zext_inreg (extload x)) -> (zextload x)
4675  if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
4676  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4677  EVT MemVT = LN0->getMemoryVT();
4678  // If we zero all the possible extended bits, then we can turn this into
4679  // a zextload if we are running before legalize or the operation is legal.
4680  unsigned BitWidth = N1.getScalarValueSizeInBits();
4681  if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4682  BitWidth - MemVT.getScalarSizeInBits())) &&
4683  ((!LegalOperations && !LN0->isVolatile()) ||
4684  TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
4685  SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4686  LN0->getChain(), LN0->getBasePtr(),
4687  MemVT, LN0->getMemOperand());
4688  AddToWorklist(N);
4689  CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4690  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4691  }
4692  }
4693  // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
4694  if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
4695  N0.hasOneUse()) {
4696  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4697  EVT MemVT = LN0->getMemoryVT();
4698  // If we zero all the possible extended bits, then we can turn this into
4699  // a zextload if we are running before legalize or the operation is legal.
4700  unsigned BitWidth = N1.getScalarValueSizeInBits();
4701  if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4702  BitWidth - MemVT.getScalarSizeInBits())) &&
4703  ((!LegalOperations && !LN0->isVolatile()) ||
4704  TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
4705  SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4706  LN0->getChain(), LN0->getBasePtr(),
4707  MemVT, LN0->getMemOperand());
4708  AddToWorklist(N);
4709  CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4710  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4711  }
4712  }
4713  // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
4714  if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
4715  if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
4716  N0.getOperand(1), false))
4717  return BSwap;
4718  }
4719 
4720  if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
4721  return Shifts;
4722 
4723  return SDValue();
4724 }
4725 
4726 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
4727 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
4728  bool DemandHighBits) {
4729  if (!LegalOperations)
4730  return SDValue();
4731 
4732  EVT VT = N->getValueType(0);
4733  if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
4734  return SDValue();
4735  if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
4736  return SDValue();
4737 
4738  // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
4739  bool LookPassAnd0 = false;
4740  bool LookPassAnd1 = false;
4741  if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
4742  std::swap(N0, N1);
4743  if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
4744  std::swap(N0, N1);
4745  if (N0.getOpcode() == ISD::AND) {
4746  if (!N0.getNode()->hasOneUse())
4747  return SDValue();
4749  // Also handle 0xffff since the LHS is guaranteed to have zeros there.
4750  // This is needed for X86.
4751  if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
4752  N01C->getZExtValue() != 0xFFFF))
4753  return SDValue();
4754  N0 = N0.getOperand(0);
4755  LookPassAnd0 = true;
4756  }
4757 
4758  if (N1.getOpcode() == ISD::AND) {
4759  if (!N1.getNode()->hasOneUse())
4760  return SDValue();
4762  if (!N11C || N11C->getZExtValue() != 0xFF)
4763  return SDValue();
4764  N1 = N1.getOperand(0);
4765  LookPassAnd1 = true;
4766  }
4767 
4768  if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
4769  std::swap(N0, N1);
4770  if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
4771  return SDValue();
4772  if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
4773  return SDValue();
4774 
4777  if (!N01C || !N11C)
4778  return SDValue();
4779  if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
4780  return SDValue();
4781 
4782  // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
4783  SDValue N00 = N0->getOperand(0);
4784  if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
4785  if (!N00.getNode()->hasOneUse())
4786  return SDValue();
4788  if (!N001C || N001C->getZExtValue() != 0xFF)
4789  return SDValue();
4790  N00 = N00.getOperand(0);
4791  LookPassAnd0 = true;
4792  }
4793 
4794  SDValue N10 = N1->getOperand(0);
4795  if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
4796  if (!N10.getNode()->hasOneUse())
4797  return SDValue();
4799  // Also allow 0xFFFF since the bits will be shifted out. This is needed
4800  // for X86.
4801  if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
4802  N101C->getZExtValue() != 0xFFFF))
4803  return SDValue();
4804  N10 = N10.getOperand(0);
4805  LookPassAnd1 = true;
4806  }
4807 
4808  if (N00 != N10)
4809  return SDValue();
4810 
4811  // Make sure everything beyond the low halfword gets set to zero since the SRL
4812  // 16 will clear the top bits.
4813  unsigned OpSizeInBits = VT.getSizeInBits();
4814  if (DemandHighBits && OpSizeInBits > 16) {
4815  // If the left-shift isn't masked out then the only way this is a bswap is
4816  // if all bits beyond the low 8 are 0. In that case the entire pattern
4817  // reduces to a left shift anyway: leave it for other parts of the combiner.
4818  if (!LookPassAnd0)
4819  return SDValue();
4820 
4821  // However, if the right shift isn't masked out then it might be because
4822  // it's not needed. See if we can spot that too.
4823  if (!LookPassAnd1 &&
4824  !DAG.MaskedValueIsZero(
4825  N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
4826  return SDValue();
4827  }
4828 
4829  SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
4830  if (OpSizeInBits > 16) {
4831  SDLoc DL(N);
4832  Res = DAG.getNode(ISD::SRL, DL, VT, Res,
4833  DAG.getConstant(OpSizeInBits - 16, DL,
4834  getShiftAmountTy(VT)));
4835  }
4836  return Res;
4837 }
4838 
4839 /// Return true if the specified node is an element that makes up a 32-bit
4840 /// packed halfword byteswap.
4841 /// ((x & 0x000000ff) << 8) |
4842 /// ((x & 0x0000ff00) >> 8) |
4843 /// ((x & 0x00ff0000) << 8) |
4844 /// ((x & 0xff000000) >> 8)
4846  if (!N.getNode()->hasOneUse())
4847  return false;
4848 
4849  unsigned Opc = N.getOpcode();
4850  if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
4851  return false;
4852 
4853  SDValue N0 = N.getOperand(0);
4854  unsigned Opc0 = N0.getOpcode();
4855  if (Opc0 !=