LLVM  8.0.0svn
DAGCombiner.cpp
Go to the documentation of this file.
1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
11 // both before and after the DAG is legalized.
12 //
13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
14 // primarily intended to handle simplification opportunities that are implicit
15 // in the LLVM IR and exposed by the various codegen lowering phases.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/ADT/IntervalMap.h"
24 #include "llvm/ADT/None.h"
25 #include "llvm/ADT/Optional.h"
26 #include "llvm/ADT/STLExtras.h"
27 #include "llvm/ADT/SetVector.h"
29 #include "llvm/ADT/SmallPtrSet.h"
30 #include "llvm/ADT/SmallSet.h"
31 #include "llvm/ADT/SmallVector.h"
32 #include "llvm/ADT/Statistic.h"
49 #include "llvm/IR/Attributes.h"
50 #include "llvm/IR/Constant.h"
51 #include "llvm/IR/DataLayout.h"
52 #include "llvm/IR/DerivedTypes.h"
53 #include "llvm/IR/Function.h"
54 #include "llvm/IR/LLVMContext.h"
55 #include "llvm/IR/Metadata.h"
56 #include "llvm/Support/Casting.h"
57 #include "llvm/Support/CodeGen.h"
59 #include "llvm/Support/Compiler.h"
60 #include "llvm/Support/Debug.h"
62 #include "llvm/Support/KnownBits.h"
68 #include <algorithm>
69 #include <cassert>
70 #include <cstdint>
71 #include <functional>
72 #include <iterator>
73 #include <string>
74 #include <tuple>
75 #include <utility>
76 
77 using namespace llvm;
78 
79 #define DEBUG_TYPE "dagcombine"
80 
81 STATISTIC(NodesCombined , "Number of dag nodes combined");
82 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
83 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
84 STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
85 STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
86 STATISTIC(SlicedLoads, "Number of load sliced");
87 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
88 
89 static cl::opt<bool>
90 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
91  cl::desc("Enable DAG combiner's use of IR alias analysis"));
92 
93 static cl::opt<bool>
94 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
95  cl::desc("Enable DAG combiner's use of TBAA"));
96 
97 #ifndef NDEBUG
99 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
100  cl::desc("Only use DAG-combiner alias analysis in this"
101  " function"));
102 #endif
103 
104 /// Hidden option to stress test load slicing, i.e., when this option
105 /// is enabled, load slicing bypasses most of its profitability guards.
106 static cl::opt<bool>
107 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
108  cl::desc("Bypass the profitability model of load slicing"),
109  cl::init(false));
110 
111 static cl::opt<bool>
112  MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
113  cl::desc("DAG combiner may split indexing from loads"));
114 
115 namespace {
116 
117  class DAGCombiner {
118  SelectionDAG &DAG;
119  const TargetLowering &TLI;
121  CodeGenOpt::Level OptLevel;
122  bool LegalOperations = false;
123  bool LegalTypes = false;
124  bool ForCodeSize;
125 
126  /// Worklist of all of the nodes that need to be simplified.
127  ///
128  /// This must behave as a stack -- new nodes to process are pushed onto the
129  /// back and when processing we pop off of the back.
130  ///
131  /// The worklist will not contain duplicates but may contain null entries
132  /// due to nodes being deleted from the underlying DAG.
133  SmallVector<SDNode *, 64> Worklist;
134 
135  /// Mapping from an SDNode to its position on the worklist.
136  ///
137  /// This is used to find and remove nodes from the worklist (by nulling
138  /// them) when they are deleted from the underlying DAG. It relies on
139  /// stable indices of nodes within the worklist.
140  DenseMap<SDNode *, unsigned> WorklistMap;
141 
142  /// Set of nodes which have been combined (at least once).
143  ///
144  /// This is used to allow us to reliably add any operands of a DAG node
145  /// which have not yet been combined to the worklist.
146  SmallPtrSet<SDNode *, 32> CombinedNodes;
147 
148  // AA - Used for DAG load/store alias analysis.
149  AliasAnalysis *AA;
150 
151  /// When an instruction is simplified, add all users of the instruction to
152  /// the work lists because they might get more simplified now.
153  void AddUsersToWorklist(SDNode *N) {
154  for (SDNode *Node : N->uses())
155  AddToWorklist(Node);
156  }
157 
158  /// Call the node-specific routine that folds each particular type of node.
159  SDValue visit(SDNode *N);
160 
161  public:
162  DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
163  : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
164  OptLevel(OL), AA(AA) {
165  ForCodeSize = DAG.getMachineFunction().getFunction().optForSize();
166 
167  MaximumLegalStoreInBits = 0;
168  for (MVT VT : MVT::all_valuetypes())
169  if (EVT(VT).isSimple() && VT != MVT::Other &&
170  TLI.isTypeLegal(EVT(VT)) &&
171  VT.getSizeInBits() >= MaximumLegalStoreInBits)
172  MaximumLegalStoreInBits = VT.getSizeInBits();
173  }
174 
175  /// Add to the worklist making sure its instance is at the back (next to be
176  /// processed.)
177  void AddToWorklist(SDNode *N) {
179  "Deleted Node added to Worklist");
180 
181  // Skip handle nodes as they can't usefully be combined and confuse the
182  // zero-use deletion strategy.
183  if (N->getOpcode() == ISD::HANDLENODE)
184  return;
185 
186  if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
187  Worklist.push_back(N);
188  }
189 
190  /// Remove all instances of N from the worklist.
191  void removeFromWorklist(SDNode *N) {
192  CombinedNodes.erase(N);
193 
194  auto It = WorklistMap.find(N);
195  if (It == WorklistMap.end())
196  return; // Not in the worklist.
197 
198  // Null out the entry rather than erasing it to avoid a linear operation.
199  Worklist[It->second] = nullptr;
200  WorklistMap.erase(It);
201  }
202 
203  void deleteAndRecombine(SDNode *N);
204  bool recursivelyDeleteUnusedNodes(SDNode *N);
205 
206  /// Replaces all uses of the results of one DAG node with new values.
207  SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
208  bool AddTo = true);
209 
210  /// Replaces all uses of the results of one DAG node with new values.
211  SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
212  return CombineTo(N, &Res, 1, AddTo);
213  }
214 
215  /// Replaces all uses of the results of one DAG node with new values.
216  SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
217  bool AddTo = true) {
218  SDValue To[] = { Res0, Res1 };
219  return CombineTo(N, To, 2, AddTo);
220  }
221 
222  void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
223 
224  private:
225  unsigned MaximumLegalStoreInBits;
226 
227  /// Check the specified integer node value to see if it can be simplified or
228  /// if things it uses can be simplified by bit propagation.
229  /// If so, return true.
230  bool SimplifyDemandedBits(SDValue Op) {
231  unsigned BitWidth = Op.getScalarValueSizeInBits();
232  APInt Demanded = APInt::getAllOnesValue(BitWidth);
233  return SimplifyDemandedBits(Op, Demanded);
234  }
235 
236  /// Check the specified vector node value to see if it can be simplified or
237  /// if things it uses can be simplified as it only uses some of the
238  /// elements. If so, return true.
239  bool SimplifyDemandedVectorElts(SDValue Op) {
240  unsigned NumElts = Op.getValueType().getVectorNumElements();
241  APInt Demanded = APInt::getAllOnesValue(NumElts);
242  return SimplifyDemandedVectorElts(Op, Demanded);
243  }
244 
245  bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
246  bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
247  bool AssumeSingleUse = false);
248 
249  bool CombineToPreIndexedLoadStore(SDNode *N);
250  bool CombineToPostIndexedLoadStore(SDNode *N);
251  SDValue SplitIndexingFromLoad(LoadSDNode *LD);
252  bool SliceUpLoad(SDNode *N);
253 
254  // Scalars have size 0 to distinguish from singleton vectors.
255  SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
256  bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
257  bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
258 
259  /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
260  /// load.
261  ///
262  /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
263  /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
264  /// \param EltNo index of the vector element to load.
265  /// \param OriginalLoad load that EVE came from to be replaced.
266  /// \returns EVE on success SDValue() on failure.
267  SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
268  SDValue EltNo,
269  LoadSDNode *OriginalLoad);
270  void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
271  SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
272  SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
273  SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
274  SDValue PromoteIntBinOp(SDValue Op);
275  SDValue PromoteIntShiftOp(SDValue Op);
276  SDValue PromoteExtend(SDValue Op);
277  bool PromoteLoad(SDValue Op);
278 
279  /// Call the node-specific routine that knows how to fold each
280  /// particular type of node. If that doesn't do anything, try the
281  /// target-specific DAG combines.
282  SDValue combine(SDNode *N);
283 
284  // Visitation implementation - Implement dag node combining for different
285  // node types. The semantics are as follows:
286  // Return Value:
287  // SDValue.getNode() == 0 - No change was made
288  // SDValue.getNode() == N - N was replaced, is dead and has been handled.
289  // otherwise - N should be replaced by the returned Operand.
290  //
291  SDValue visitTokenFactor(SDNode *N);
292  SDValue visitMERGE_VALUES(SDNode *N);
293  SDValue visitADD(SDNode *N);
294  SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
295  SDValue visitSUB(SDNode *N);
296  SDValue visitADDC(SDNode *N);
297  SDValue visitUADDO(SDNode *N);
298  SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
299  SDValue visitSUBC(SDNode *N);
300  SDValue visitUSUBO(SDNode *N);
301  SDValue visitADDE(SDNode *N);
302  SDValue visitADDCARRY(SDNode *N);
303  SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
304  SDValue visitSUBE(SDNode *N);
305  SDValue visitSUBCARRY(SDNode *N);
306  SDValue visitMUL(SDNode *N);
307  SDValue useDivRem(SDNode *N);
308  SDValue visitSDIV(SDNode *N);
309  SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
310  SDValue visitUDIV(SDNode *N);
311  SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
312  SDValue visitREM(SDNode *N);
313  SDValue visitMULHU(SDNode *N);
314  SDValue visitMULHS(SDNode *N);
315  SDValue visitSMUL_LOHI(SDNode *N);
316  SDValue visitUMUL_LOHI(SDNode *N);
317  SDValue visitSMULO(SDNode *N);
318  SDValue visitUMULO(SDNode *N);
319  SDValue visitIMINMAX(SDNode *N);
320  SDValue visitAND(SDNode *N);
321  SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
322  SDValue visitOR(SDNode *N);
323  SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
324  SDValue visitXOR(SDNode *N);
325  SDValue SimplifyVBinOp(SDNode *N);
326  SDValue visitSHL(SDNode *N);
327  SDValue visitSRA(SDNode *N);
328  SDValue visitSRL(SDNode *N);
329  SDValue visitFunnelShift(SDNode *N);
330  SDValue visitRotate(SDNode *N);
331  SDValue visitABS(SDNode *N);
332  SDValue visitBSWAP(SDNode *N);
333  SDValue visitBITREVERSE(SDNode *N);
334  SDValue visitCTLZ(SDNode *N);
335  SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
336  SDValue visitCTTZ(SDNode *N);
337  SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
338  SDValue visitCTPOP(SDNode *N);
339  SDValue visitSELECT(SDNode *N);
340  SDValue visitVSELECT(SDNode *N);
341  SDValue visitSELECT_CC(SDNode *N);
342  SDValue visitSETCC(SDNode *N);
343  SDValue visitSETCCCARRY(SDNode *N);
344  SDValue visitSIGN_EXTEND(SDNode *N);
345  SDValue visitZERO_EXTEND(SDNode *N);
346  SDValue visitANY_EXTEND(SDNode *N);
347  SDValue visitAssertExt(SDNode *N);
348  SDValue visitSIGN_EXTEND_INREG(SDNode *N);
349  SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
350  SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
351  SDValue visitTRUNCATE(SDNode *N);
352  SDValue visitBITCAST(SDNode *N);
353  SDValue visitBUILD_PAIR(SDNode *N);
354  SDValue visitFADD(SDNode *N);
355  SDValue visitFSUB(SDNode *N);
356  SDValue visitFMUL(SDNode *N);
357  SDValue visitFMA(SDNode *N);
358  SDValue visitFDIV(SDNode *N);
359  SDValue visitFREM(SDNode *N);
360  SDValue visitFSQRT(SDNode *N);
361  SDValue visitFCOPYSIGN(SDNode *N);
362  SDValue visitFPOW(SDNode *N);
363  SDValue visitSINT_TO_FP(SDNode *N);
364  SDValue visitUINT_TO_FP(SDNode *N);
365  SDValue visitFP_TO_SINT(SDNode *N);
366  SDValue visitFP_TO_UINT(SDNode *N);
367  SDValue visitFP_ROUND(SDNode *N);
368  SDValue visitFP_ROUND_INREG(SDNode *N);
369  SDValue visitFP_EXTEND(SDNode *N);
370  SDValue visitFNEG(SDNode *N);
371  SDValue visitFABS(SDNode *N);
372  SDValue visitFCEIL(SDNode *N);
373  SDValue visitFTRUNC(SDNode *N);
374  SDValue visitFFLOOR(SDNode *N);
375  SDValue visitFMINNUM(SDNode *N);
376  SDValue visitFMAXNUM(SDNode *N);
377  SDValue visitFMINIMUM(SDNode *N);
378  SDValue visitFMAXIMUM(SDNode *N);
379  SDValue visitBRCOND(SDNode *N);
380  SDValue visitBR_CC(SDNode *N);
381  SDValue visitLOAD(SDNode *N);
382 
383  SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
384  SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
385 
386  SDValue visitSTORE(SDNode *N);
387  SDValue visitINSERT_VECTOR_ELT(SDNode *N);
388  SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
389  SDValue visitBUILD_VECTOR(SDNode *N);
390  SDValue visitCONCAT_VECTORS(SDNode *N);
391  SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
392  SDValue visitVECTOR_SHUFFLE(SDNode *N);
393  SDValue visitSCALAR_TO_VECTOR(SDNode *N);
394  SDValue visitINSERT_SUBVECTOR(SDNode *N);
395  SDValue visitMLOAD(SDNode *N);
396  SDValue visitMSTORE(SDNode *N);
397  SDValue visitMGATHER(SDNode *N);
398  SDValue visitMSCATTER(SDNode *N);
399  SDValue visitFP_TO_FP16(SDNode *N);
400  SDValue visitFP16_TO_FP(SDNode *N);
401 
402  SDValue visitFADDForFMACombine(SDNode *N);
403  SDValue visitFSUBForFMACombine(SDNode *N);
404  SDValue visitFMULForFMADistributiveCombine(SDNode *N);
405 
406  SDValue XformToShuffleWithZero(SDNode *N);
407  SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
408  SDValue N1, SDNodeFlags Flags);
409 
410  SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
411 
412  SDValue foldSelectOfConstants(SDNode *N);
413  SDValue foldVSelectOfConstants(SDNode *N);
414  SDValue foldBinOpIntoSelect(SDNode *BO);
415  bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
416  SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
417  SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
418  SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
419  SDValue N2, SDValue N3, ISD::CondCode CC,
420  bool NotExtCompare = false);
421  SDValue convertSelectOfFPConstantsToLoadOffset(
422  const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
423  ISD::CondCode CC);
424  SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
425  SDValue N2, SDValue N3, ISD::CondCode CC);
426  SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
427  const SDLoc &DL);
428  SDValue unfoldMaskedMerge(SDNode *N);
429  SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
430  SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
431  const SDLoc &DL, bool foldBooleans);
432  SDValue rebuildSetCC(SDValue N);
433 
434  bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
435  SDValue &CC) const;
436  bool isOneUseSetCC(SDValue N) const;
437 
438  SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
439  unsigned HiOp);
440  SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
441  SDValue CombineExtLoad(SDNode *N);
442  SDValue CombineZExtLogicopShiftLoad(SDNode *N);
443  SDValue combineRepeatedFPDivisors(SDNode *N);
444  SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
445  SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
446  SDValue BuildSDIV(SDNode *N);
447  SDValue BuildSDIVPow2(SDNode *N);
448  SDValue BuildUDIV(SDNode *N);
449  SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
450  SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
451  SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
452  SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
453  SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
454  SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
455  SDNodeFlags Flags, bool Reciprocal);
456  SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
457  SDNodeFlags Flags, bool Reciprocal);
458  SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
459  bool DemandHighBits = true);
460  SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
461  SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
462  SDValue InnerPos, SDValue InnerNeg,
463  unsigned PosOpcode, unsigned NegOpcode,
464  const SDLoc &DL);
465  SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
466  SDValue MatchLoadCombine(SDNode *N);
467  SDValue ReduceLoadWidth(SDNode *N);
468  SDValue ReduceLoadOpStoreWidth(SDNode *N);
470  SDValue TransformFPLoadStorePair(SDNode *N);
471  SDValue convertBuildVecZextToZext(SDNode *N);
472  SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
473  SDValue reduceBuildVecToShuffle(SDNode *N);
474  SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
475  ArrayRef<int> VectorMask, SDValue VecIn1,
476  SDValue VecIn2, unsigned LeftIdx);
477  SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
478 
479  /// Walk up chain skipping non-aliasing memory nodes,
480  /// looking for aliasing nodes and adding them to the Aliases vector.
481  void GatherAllAliases(SDNode *N, SDValue OriginalChain,
482  SmallVectorImpl<SDValue> &Aliases);
483 
484  /// Return true if there is any possibility that the two addresses overlap.
485  bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
486 
487  /// Walk up chain skipping non-aliasing memory nodes, looking for a better
488  /// chain (aliasing node.)
489  SDValue FindBetterChain(SDNode *N, SDValue Chain);
490 
491  /// Try to replace a store and any possibly adjacent stores on
492  /// consecutive chains with better chains. Return true only if St is
493  /// replaced.
494  ///
495  /// Notice that other chains may still be replaced even if the function
496  /// returns false.
497  bool findBetterNeighborChains(StoreSDNode *St);
498 
499  // Helper for findBetterNeighborChains. Walk up store chain add additional
500  // chained stores that do not overlap and can be parallelized.
501  bool parallelizeChainedStores(StoreSDNode *St);
502 
503  /// Holds a pointer to an LSBaseSDNode as well as information on where it
504  /// is located in a sequence of memory operations connected by a chain.
505  struct MemOpLink {
506  // Ptr to the mem node.
507  LSBaseSDNode *MemNode;
508 
509  // Offset from the base ptr.
510  int64_t OffsetFromBase;
511 
512  MemOpLink(LSBaseSDNode *N, int64_t Offset)
513  : MemNode(N), OffsetFromBase(Offset) {}
514  };
515 
516  /// This is a helper function for visitMUL to check the profitability
517  /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
518  /// MulNode is the original multiply, AddNode is (add x, c1),
519  /// and ConstNode is c2.
520  bool isMulAddWithConstProfitable(SDNode *MulNode,
521  SDValue &AddNode,
522  SDValue &ConstNode);
523 
524  /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
525  /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
526  /// the type of the loaded value to be extended.
527  bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
528  EVT LoadResultTy, EVT &ExtVT);
529 
530  /// Helper function to calculate whether the given Load/Store can have its
531  /// width reduced to ExtVT.
532  bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
533  EVT &MemVT, unsigned ShAmt = 0);
534 
535  /// Used by BackwardsPropagateMask to find suitable loads.
536  bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
537  SmallPtrSetImpl<SDNode*> &NodesWithConsts,
538  ConstantSDNode *Mask, SDNode *&NodeToMask);
539  /// Attempt to propagate a given AND node back to load leaves so that they
540  /// can be combined into narrow loads.
541  bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
542 
543  /// Helper function for MergeConsecutiveStores which merges the
544  /// component store chains.
545  SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
546  unsigned NumStores);
547 
548  /// This is a helper function for MergeConsecutiveStores. When the
549  /// source elements of the consecutive stores are all constants or
550  /// all extracted vector elements, try to merge them into one
551  /// larger store introducing bitcasts if necessary. \return True
552  /// if a merged store was created.
553  bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
554  EVT MemVT, unsigned NumStores,
555  bool IsConstantSrc, bool UseVector,
556  bool UseTrunc);
557 
558  /// This is a helper function for MergeConsecutiveStores. Stores
559  /// that potentially may be merged with St are placed in
560  /// StoreNodes. RootNode is a chain predecessor to all store
561  /// candidates.
562  void getStoreMergeCandidates(StoreSDNode *St,
563  SmallVectorImpl<MemOpLink> &StoreNodes,
564  SDNode *&Root);
565 
566  /// Helper function for MergeConsecutiveStores. Checks if
567  /// candidate stores have indirect dependency through their
568  /// operands. RootNode is the predecessor to all stores calculated
569  /// by getStoreMergeCandidates and is used to prune the dependency check.
570  /// \return True if safe to merge.
571  bool checkMergeStoreCandidatesForDependencies(
572  SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
573  SDNode *RootNode);
574 
575  /// Merge consecutive store operations into a wide store.
576  /// This optimization uses wide integers or vectors when possible.
577  /// \return number of stores that were merged into a merged store (the
578  /// affected nodes are stored as a prefix in \p StoreNodes).
579  bool MergeConsecutiveStores(StoreSDNode *St);
580 
581  /// Try to transform a truncation where C is a constant:
582  /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
583  ///
584  /// \p N needs to be a truncation and its first operand an AND. Other
585  /// requirements are checked by the function (e.g. that trunc is
586  /// single-use) and if missed an empty SDValue is returned.
587  SDValue distributeTruncateThroughAnd(SDNode *N);
588 
589  /// Helper function to determine whether the target supports operation
590  /// given by \p Opcode for type \p VT, that is, whether the operation
591  /// is legal or custom before legalizing operations, and whether is
592  /// legal (but not custom) after legalization.
593  bool hasOperation(unsigned Opcode, EVT VT) {
594  if (LegalOperations)
595  return TLI.isOperationLegal(Opcode, VT);
596  return TLI.isOperationLegalOrCustom(Opcode, VT);
597  }
598 
599  public:
600  /// Runs the dag combiner on all nodes in the work list
601  void Run(CombineLevel AtLevel);
602 
603  SelectionDAG &getDAG() const { return DAG; }
604 
605  /// Returns a type large enough to hold any valid shift amount - before type
606  /// legalization these can be huge.
607  EVT getShiftAmountTy(EVT LHSTy) {
608  assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
609  return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
610  }
611 
612  /// This method returns true if we are running before type legalization or
613  /// if the specified VT is legal.
614  bool isTypeLegal(const EVT &VT) {
615  if (!LegalTypes) return true;
616  return TLI.isTypeLegal(VT);
617  }
618 
619  /// Convenience wrapper around TargetLowering::getSetCCResultType
620  EVT getSetCCResultType(EVT VT) const {
621  return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
622  }
623 
624  void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
625  SDValue OrigLoad, SDValue ExtLoad,
627  };
628 
629 /// This class is a DAGUpdateListener that removes any deleted
630 /// nodes from the worklist.
631 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
632  DAGCombiner &DC;
633 
634 public:
635  explicit WorklistRemover(DAGCombiner &dc)
636  : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
637 
638  void NodeDeleted(SDNode *N, SDNode *E) override {
639  DC.removeFromWorklist(N);
640  }
641 };
642 
643 } // end anonymous namespace
644 
645 //===----------------------------------------------------------------------===//
646 // TargetLowering::DAGCombinerInfo implementation
647 //===----------------------------------------------------------------------===//
648 
650  ((DAGCombiner*)DC)->AddToWorklist(N);
651 }
652 
654 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
655  return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
656 }
657 
659 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
660  return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
661 }
662 
664 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
665  return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
666 }
667 
670  return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
671 }
672 
673 //===----------------------------------------------------------------------===//
674 // Helper Functions
675 //===----------------------------------------------------------------------===//
676 
677 void DAGCombiner::deleteAndRecombine(SDNode *N) {
678  removeFromWorklist(N);
679 
680  // If the operands of this node are only used by the node, they will now be
681  // dead. Make sure to re-visit them and recursively delete dead nodes.
682  for (const SDValue &Op : N->ops())
683  // For an operand generating multiple values, one of the values may
684  // become dead allowing further simplification (e.g. split index
685  // arithmetic from an indexed load).
686  if (Op->hasOneUse() || Op->getNumValues() > 1)
687  AddToWorklist(Op.getNode());
688 
689  DAG.DeleteNode(N);
690 }
691 
692 /// Return 1 if we can compute the negated form of the specified expression for
693 /// the same cost as the expression itself, or 2 if we can compute the negated
694 /// form more cheaply than the expression itself.
695 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
696  const TargetLowering &TLI,
697  const TargetOptions *Options,
698  unsigned Depth = 0) {
699  // fneg is removable even if it has multiple uses.
700  if (Op.getOpcode() == ISD::FNEG) return 2;
701 
702  // Don't allow anything with multiple uses unless we know it is free.
703  EVT VT = Op.getValueType();
704  const SDNodeFlags Flags = Op->getFlags();
705  if (!Op.hasOneUse())
706  if (!(Op.getOpcode() == ISD::FP_EXTEND &&
707  TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
708  return 0;
709 
710  // Don't recurse exponentially.
711  if (Depth > 6) return 0;
712 
713  switch (Op.getOpcode()) {
714  default: return false;
715  case ISD::ConstantFP: {
716  if (!LegalOperations)
717  return 1;
718 
719  // Don't invert constant FP values after legalization unless the target says
720  // the negated constant is legal.
721  return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
722  TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
723  }
724  case ISD::FADD:
725  if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros())
726  return 0;
727 
728  // After operation legalization, it might not be legal to create new FSUBs.
729  if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
730  return 0;
731 
732  // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
733  if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
734  Options, Depth + 1))
735  return V;
736  // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
737  return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
738  Depth + 1);
739  case ISD::FSUB:
740  // We can't turn -(A-B) into B-A when we honor signed zeros.
741  if (!Options->NoSignedZerosFPMath &&
742  !Flags.hasNoSignedZeros())
743  return 0;
744 
745  // fold (fneg (fsub A, B)) -> (fsub B, A)
746  return 1;
747 
748  case ISD::FMUL:
749  case ISD::FDIV:
750  // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
751  if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
752  Options, Depth + 1))
753  return V;
754 
755  return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
756  Depth + 1);
757 
758  case ISD::FP_EXTEND:
759  case ISD::FP_ROUND:
760  case ISD::FSIN:
761  return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
762  Depth + 1);
763  }
764 }
765 
766 /// If isNegatibleForFree returns true, return the newly negated expression.
768  bool LegalOperations, unsigned Depth = 0) {
769  const TargetOptions &Options = DAG.getTarget().Options;
770  // fneg is removable even if it has multiple uses.
771  if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
772 
773  assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
774 
775  const SDNodeFlags Flags = Op.getNode()->getFlags();
776 
777  switch (Op.getOpcode()) {
778  default: llvm_unreachable("Unknown code");
779  case ISD::ConstantFP: {
780  APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
781  V.changeSign();
782  return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
783  }
784  case ISD::FADD:
785  assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros());
786 
787  // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
788  if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
789  DAG.getTargetLoweringInfo(), &Options, Depth+1))
790  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
791  GetNegatedExpression(Op.getOperand(0), DAG,
792  LegalOperations, Depth+1),
793  Op.getOperand(1), Flags);
794  // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
795  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
796  GetNegatedExpression(Op.getOperand(1), DAG,
797  LegalOperations, Depth+1),
798  Op.getOperand(0), Flags);
799  case ISD::FSUB:
800  // fold (fneg (fsub 0, B)) -> B
801  if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
802  if (N0CFP->isZero())
803  return Op.getOperand(1);
804 
805  // fold (fneg (fsub A, B)) -> (fsub B, A)
806  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
807  Op.getOperand(1), Op.getOperand(0), Flags);
808 
809  case ISD::FMUL:
810  case ISD::FDIV:
811  // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
812  if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
813  DAG.getTargetLoweringInfo(), &Options, Depth+1))
814  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
815  GetNegatedExpression(Op.getOperand(0), DAG,
816  LegalOperations, Depth+1),
817  Op.getOperand(1), Flags);
818 
819  // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
820  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
821  Op.getOperand(0),
822  GetNegatedExpression(Op.getOperand(1), DAG,
823  LegalOperations, Depth+1), Flags);
824 
825  case ISD::FP_EXTEND:
826  case ISD::FSIN:
827  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
828  GetNegatedExpression(Op.getOperand(0), DAG,
829  LegalOperations, Depth+1));
830  case ISD::FP_ROUND:
831  return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
832  GetNegatedExpression(Op.getOperand(0), DAG,
833  LegalOperations, Depth+1),
834  Op.getOperand(1));
835  }
836 }
837 
838 // APInts must be the same size for most operations, this helper
839 // function zero extends the shorter of the pair so that they match.
840 // We provide an Offset so that we can create bitwidths that won't overflow.
841 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
842  unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
843  LHS = LHS.zextOrSelf(Bits);
844  RHS = RHS.zextOrSelf(Bits);
845 }
846 
847 // Return true if this node is a setcc, or is a select_cc
848 // that selects between the target values used for true and false, making it
849 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
850 // the appropriate nodes based on the type of node we are checking. This
851 // simplifies life a bit for the callers.
852 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
853  SDValue &CC) const {
854  if (N.getOpcode() == ISD::SETCC) {
855  LHS = N.getOperand(0);
856  RHS = N.getOperand(1);
857  CC = N.getOperand(2);
858  return true;
859  }
860 
861  if (N.getOpcode() != ISD::SELECT_CC ||
862  !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
863  !TLI.isConstFalseVal(N.getOperand(3).getNode()))
864  return false;
865 
866  if (TLI.getBooleanContents(N.getValueType()) ==
868  return false;
869 
870  LHS = N.getOperand(0);
871  RHS = N.getOperand(1);
872  CC = N.getOperand(4);
873  return true;
874 }
875 
876 /// Return true if this is a SetCC-equivalent operation with only one use.
877 /// If this is true, it allows the users to invert the operation for free when
878 /// it is profitable to do so.
879 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
880  SDValue N0, N1, N2;
881  if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
882  return true;
883  return false;
884 }
885 
886 // Returns the SDNode if it is a constant float BuildVector
887 // or constant float.
889  if (isa<ConstantFPSDNode>(N))
890  return N.getNode();
892  return N.getNode();
893  return nullptr;
894 }
895 
896 // Determines if it is a constant integer or a build vector of constant
897 // integers (and undefs).
898 // Do not permit build vector implicit truncation.
899 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
900  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
901  return !(Const->isOpaque() && NoOpaques);
902  if (N.getOpcode() != ISD::BUILD_VECTOR)
903  return false;
904  unsigned BitWidth = N.getScalarValueSizeInBits();
905  for (const SDValue &Op : N->op_values()) {
906  if (Op.isUndef())
907  continue;
909  if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
910  (Const->isOpaque() && NoOpaques))
911  return false;
912  }
913  return true;
914 }
915 
916 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
917 // undef's.
918 static bool isAnyConstantBuildVector(const SDNode *N) {
921 }
922 
923 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
924  SDValue N1, SDNodeFlags Flags) {
925  // Don't reassociate reductions.
926  if (Flags.hasVectorReduction())
927  return SDValue();
928 
929  EVT VT = N0.getValueType();
930  if (N0.getOpcode() == Opc && !N0->getFlags().hasVectorReduction()) {
933  // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
934  if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
935  return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
936  return SDValue();
937  }
938  if (N0.hasOneUse()) {
939  // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
940  // use
941  SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
942  if (!OpNode.getNode())
943  return SDValue();
944  AddToWorklist(OpNode.getNode());
945  return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
946  }
947  }
948  }
949 
950  if (N1.getOpcode() == Opc && !N1->getFlags().hasVectorReduction()) {
953  // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
954  if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
955  return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
956  return SDValue();
957  }
958  if (N1.hasOneUse()) {
959  // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
960  // use
961  SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
962  if (!OpNode.getNode())
963  return SDValue();
964  AddToWorklist(OpNode.getNode());
965  return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
966  }
967  }
968  }
969 
970  return SDValue();
971 }
972 
973 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
974  bool AddTo) {
975  assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
976  ++NodesCombined;
977  LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
978  To[0].getNode()->dump(&DAG);
979  dbgs() << " and " << NumTo - 1 << " other values\n");
980  for (unsigned i = 0, e = NumTo; i != e; ++i)
981  assert((!To[i].getNode() ||
982  N->getValueType(i) == To[i].getValueType()) &&
983  "Cannot combine value to value of different type!");
984 
985  WorklistRemover DeadNodes(*this);
986  DAG.ReplaceAllUsesWith(N, To);
987  if (AddTo) {
988  // Push the new nodes and any users onto the worklist
989  for (unsigned i = 0, e = NumTo; i != e; ++i) {
990  if (To[i].getNode()) {
991  AddToWorklist(To[i].getNode());
992  AddUsersToWorklist(To[i].getNode());
993  }
994  }
995  }
996 
997  // Finally, if the node is now dead, remove it from the graph. The node
998  // may not be dead if the replacement process recursively simplified to
999  // something else needing this node.
1000  if (N->use_empty())
1001  deleteAndRecombine(N);
1002  return SDValue(N, 0);
1003 }
1004 
1005 void DAGCombiner::
1006 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1007  // Replace all uses. If any nodes become isomorphic to other nodes and
1008  // are deleted, make sure to remove them from our worklist.
1009  WorklistRemover DeadNodes(*this);
1010  DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1011 
1012  // Push the new node and any (possibly new) users onto the worklist.
1013  AddToWorklist(TLO.New.getNode());
1014  AddUsersToWorklist(TLO.New.getNode());
1015 
1016  // Finally, if the node is now dead, remove it from the graph. The node
1017  // may not be dead if the replacement process recursively simplified to
1018  // something else needing this node.
1019  if (TLO.Old.getNode()->use_empty())
1020  deleteAndRecombine(TLO.Old.getNode());
1021 }
1022 
1023 /// Check the specified integer node value to see if it can be simplified or if
1024 /// things it uses can be simplified by bit propagation. If so, return true.
1025 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
1026  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1027  KnownBits Known;
1028  if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO))
1029  return false;
1030 
1031  // Revisit the node.
1032  AddToWorklist(Op.getNode());
1033 
1034  // Replace the old value with the new one.
1035  ++NodesCombined;
1036  LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1037  dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1038  dbgs() << '\n');
1039 
1040  CommitTargetLoweringOpt(TLO);
1041  return true;
1042 }
1043 
1044 /// Check the specified vector node value to see if it can be simplified or
1045 /// if things it uses can be simplified as it only uses some of the elements.
1046 /// If so, return true.
1047 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
1048  bool AssumeSingleUse) {
1049  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1050  APInt KnownUndef, KnownZero;
1051  if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO,
1052  0, AssumeSingleUse))
1053  return false;
1054 
1055  // Revisit the node.
1056  AddToWorklist(Op.getNode());
1057 
1058  // Replace the old value with the new one.
1059  ++NodesCombined;
1060  LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1061  dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1062  dbgs() << '\n');
1063 
1064  CommitTargetLoweringOpt(TLO);
1065  return true;
1066 }
1067 
1068 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1069  SDLoc DL(Load);
1070  EVT VT = Load->getValueType(0);
1071  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1072 
1073  LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1074  Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1075  WorklistRemover DeadNodes(*this);
1076  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1077  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1078  deleteAndRecombine(Load);
1079  AddToWorklist(Trunc.getNode());
1080 }
1081 
1082 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1083  Replace = false;
1084  SDLoc DL(Op);
1085  if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1086  LoadSDNode *LD = cast<LoadSDNode>(Op);
1087  EVT MemVT = LD->getMemoryVT();
1089  : LD->getExtensionType();
1090  Replace = true;
1091  return DAG.getExtLoad(ExtType, DL, PVT,
1092  LD->getChain(), LD->getBasePtr(),
1093  MemVT, LD->getMemOperand());
1094  }
1095 
1096  unsigned Opc = Op.getOpcode();
1097  switch (Opc) {
1098  default: break;
1099  case ISD::AssertSext:
1100  if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1101  return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1102  break;
1103  case ISD::AssertZext:
1104  if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1105  return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1106  break;
1107  case ISD::Constant: {
1108  unsigned ExtOpc =
1110  return DAG.getNode(ExtOpc, DL, PVT, Op);
1111  }
1112  }
1113 
1114  if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1115  return SDValue();
1116  return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1117 }
1118 
1119 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1121  return SDValue();
1122  EVT OldVT = Op.getValueType();
1123  SDLoc DL(Op);
1124  bool Replace = false;
1125  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1126  if (!NewOp.getNode())
1127  return SDValue();
1128  AddToWorklist(NewOp.getNode());
1129 
1130  if (Replace)
1131  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1132  return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1133  DAG.getValueType(OldVT));
1134 }
1135 
1136 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1137  EVT OldVT = Op.getValueType();
1138  SDLoc DL(Op);
1139  bool Replace = false;
1140  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1141  if (!NewOp.getNode())
1142  return SDValue();
1143  AddToWorklist(NewOp.getNode());
1144 
1145  if (Replace)
1146  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1147  return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1148 }
1149 
1150 /// Promote the specified integer binary operation if the target indicates it is
1151 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1152 /// i32 since i16 instructions are longer.
1153 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1154  if (!LegalOperations)
1155  return SDValue();
1156 
1157  EVT VT = Op.getValueType();
1158  if (VT.isVector() || !VT.isInteger())
1159  return SDValue();
1160 
1161  // If operation type is 'undesirable', e.g. i16 on x86, consider
1162  // promoting it.
1163  unsigned Opc = Op.getOpcode();
1164  if (TLI.isTypeDesirableForOp(Opc, VT))
1165  return SDValue();
1166 
1167  EVT PVT = VT;
1168  // Consult target whether it is a good idea to promote this operation and
1169  // what's the right type to promote it to.
1170  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1171  assert(PVT != VT && "Don't know what type to promote to!");
1172 
1173  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1174 
1175  bool Replace0 = false;
1176  SDValue N0 = Op.getOperand(0);
1177  SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1178 
1179  bool Replace1 = false;
1180  SDValue N1 = Op.getOperand(1);
1181  SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1182  SDLoc DL(Op);
1183 
1184  SDValue RV =
1185  DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1186 
1187  // We are always replacing N0/N1's use in N and only need
1188  // additional replacements if there are additional uses.
1189  Replace0 &= !N0->hasOneUse();
1190  Replace1 &= (N0 != N1) && !N1->hasOneUse();
1191 
1192  // Combine Op here so it is preserved past replacements.
1193  CombineTo(Op.getNode(), RV);
1194 
1195  // If operands have a use ordering, make sure we deal with
1196  // predecessor first.
1197  if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1198  std::swap(N0, N1);
1199  std::swap(NN0, NN1);
1200  }
1201 
1202  if (Replace0) {
1203  AddToWorklist(NN0.getNode());
1204  ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1205  }
1206  if (Replace1) {
1207  AddToWorklist(NN1.getNode());
1208  ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1209  }
1210  return Op;
1211  }
1212  return SDValue();
1213 }
1214 
1215 /// Promote the specified integer shift operation if the target indicates it is
1216 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1217 /// i32 since i16 instructions are longer.
1218 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1219  if (!LegalOperations)
1220  return SDValue();
1221 
1222  EVT VT = Op.getValueType();
1223  if (VT.isVector() || !VT.isInteger())
1224  return SDValue();
1225 
1226  // If operation type is 'undesirable', e.g. i16 on x86, consider
1227  // promoting it.
1228  unsigned Opc = Op.getOpcode();
1229  if (TLI.isTypeDesirableForOp(Opc, VT))
1230  return SDValue();
1231 
1232  EVT PVT = VT;
1233  // Consult target whether it is a good idea to promote this operation and
1234  // what's the right type to promote it to.
1235  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1236  assert(PVT != VT && "Don't know what type to promote to!");
1237 
1238  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1239 
1240  bool Replace = false;
1241  SDValue N0 = Op.getOperand(0);
1242  SDValue N1 = Op.getOperand(1);
1243  if (Opc == ISD::SRA)
1244  N0 = SExtPromoteOperand(N0, PVT);
1245  else if (Opc == ISD::SRL)
1246  N0 = ZExtPromoteOperand(N0, PVT);
1247  else
1248  N0 = PromoteOperand(N0, PVT, Replace);
1249 
1250  if (!N0.getNode())
1251  return SDValue();
1252 
1253  SDLoc DL(Op);
1254  SDValue RV =
1255  DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1256 
1257  AddToWorklist(N0.getNode());
1258  if (Replace)
1259  ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1260 
1261  // Deal with Op being deleted.
1262  if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1263  return RV;
1264  }
1265  return SDValue();
1266 }
1267 
1268 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1269  if (!LegalOperations)
1270  return SDValue();
1271 
1272  EVT VT = Op.getValueType();
1273  if (VT.isVector() || !VT.isInteger())
1274  return SDValue();
1275 
1276  // If operation type is 'undesirable', e.g. i16 on x86, consider
1277  // promoting it.
1278  unsigned Opc = Op.getOpcode();
1279  if (TLI.isTypeDesirableForOp(Opc, VT))
1280  return SDValue();
1281 
1282  EVT PVT = VT;
1283  // Consult target whether it is a good idea to promote this operation and
1284  // what's the right type to promote it to.
1285  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1286  assert(PVT != VT && "Don't know what type to promote to!");
1287  // fold (aext (aext x)) -> (aext x)
1288  // fold (aext (zext x)) -> (zext x)
1289  // fold (aext (sext x)) -> (sext x)
1290  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1291  return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1292  }
1293  return SDValue();
1294 }
1295 
1296 bool DAGCombiner::PromoteLoad(SDValue Op) {
1297  if (!LegalOperations)
1298  return false;
1299 
1300  if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1301  return false;
1302 
1303  EVT VT = Op.getValueType();
1304  if (VT.isVector() || !VT.isInteger())
1305  return false;
1306 
1307  // If operation type is 'undesirable', e.g. i16 on x86, consider
1308  // promoting it.
1309  unsigned Opc = Op.getOpcode();
1310  if (TLI.isTypeDesirableForOp(Opc, VT))
1311  return false;
1312 
1313  EVT PVT = VT;
1314  // Consult target whether it is a good idea to promote this operation and
1315  // what's the right type to promote it to.
1316  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1317  assert(PVT != VT && "Don't know what type to promote to!");
1318 
1319  SDLoc DL(Op);
1320  SDNode *N = Op.getNode();
1321  LoadSDNode *LD = cast<LoadSDNode>(N);
1322  EVT MemVT = LD->getMemoryVT();
1324  : LD->getExtensionType();
1325  SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1326  LD->getChain(), LD->getBasePtr(),
1327  MemVT, LD->getMemOperand());
1328  SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1329 
1330  LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1331  Result.getNode()->dump(&DAG); dbgs() << '\n');
1332  WorklistRemover DeadNodes(*this);
1333  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1334  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1335  deleteAndRecombine(N);
1336  AddToWorklist(Result.getNode());
1337  return true;
1338  }
1339  return false;
1340 }
1341 
1342 /// Recursively delete a node which has no uses and any operands for
1343 /// which it is the only use.
1344 ///
1345 /// Note that this both deletes the nodes and removes them from the worklist.
1346 /// It also adds any nodes who have had a user deleted to the worklist as they
1347 /// may now have only one use and subject to other combines.
1348 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1349  if (!N->use_empty())
1350  return false;
1351 
1353  Nodes.insert(N);
1354  do {
1355  N = Nodes.pop_back_val();
1356  if (!N)
1357  continue;
1358 
1359  if (N->use_empty()) {
1360  for (const SDValue &ChildN : N->op_values())
1361  Nodes.insert(ChildN.getNode());
1362 
1363  removeFromWorklist(N);
1364  DAG.DeleteNode(N);
1365  } else {
1366  AddToWorklist(N);
1367  }
1368  } while (!Nodes.empty());
1369  return true;
1370 }
1371 
1372 //===----------------------------------------------------------------------===//
1373 // Main DAG Combiner implementation
1374 //===----------------------------------------------------------------------===//
1375 
1376 void DAGCombiner::Run(CombineLevel AtLevel) {
1377  // set the instance variables, so that the various visit routines may use it.
1378  Level = AtLevel;
1379  LegalOperations = Level >= AfterLegalizeVectorOps;
1380  LegalTypes = Level >= AfterLegalizeTypes;
1381 
1382  // Add all the dag nodes to the worklist.
1383  for (SDNode &Node : DAG.allnodes())
1384  AddToWorklist(&Node);
1385 
1386  // Create a dummy node (which is not added to allnodes), that adds a reference
1387  // to the root node, preventing it from being deleted, and tracking any
1388  // changes of the root.
1389  HandleSDNode Dummy(DAG.getRoot());
1390 
1391  // While the worklist isn't empty, find a node and try to combine it.
1392  while (!WorklistMap.empty()) {
1393  SDNode *N;
1394  // The Worklist holds the SDNodes in order, but it may contain null entries.
1395  do {
1396  N = Worklist.pop_back_val();
1397  } while (!N);
1398 
1399  bool GoodWorklistEntry = WorklistMap.erase(N);
1400  (void)GoodWorklistEntry;
1401  assert(GoodWorklistEntry &&
1402  "Found a worklist entry without a corresponding map entry!");
1403 
1404  // If N has no uses, it is dead. Make sure to revisit all N's operands once
1405  // N is deleted from the DAG, since they too may now be dead or may have a
1406  // reduced number of uses, allowing other xforms.
1407  if (recursivelyDeleteUnusedNodes(N))
1408  continue;
1409 
1410  WorklistRemover DeadNodes(*this);
1411 
1412  // If this combine is running after legalizing the DAG, re-legalize any
1413  // nodes pulled off the worklist.
1414  if (Level == AfterLegalizeDAG) {
1415  SmallSetVector<SDNode *, 16> UpdatedNodes;
1416  bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1417 
1418  for (SDNode *LN : UpdatedNodes) {
1419  AddToWorklist(LN);
1420  AddUsersToWorklist(LN);
1421  }
1422  if (!NIsValid)
1423  continue;
1424  }
1425 
1426  LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1427 
1428  // Add any operands of the new node which have not yet been combined to the
1429  // worklist as well. Because the worklist uniques things already, this
1430  // won't repeatedly process the same operand.
1431  CombinedNodes.insert(N);
1432  for (const SDValue &ChildN : N->op_values())
1433  if (!CombinedNodes.count(ChildN.getNode()))
1434  AddToWorklist(ChildN.getNode());
1435 
1436  SDValue RV = combine(N);
1437 
1438  if (!RV.getNode())
1439  continue;
1440 
1441  ++NodesCombined;
1442 
1443  // If we get back the same node we passed in, rather than a new node or
1444  // zero, we know that the node must have defined multiple values and
1445  // CombineTo was used. Since CombineTo takes care of the worklist
1446  // mechanics for us, we have no work to do in this case.
1447  if (RV.getNode() == N)
1448  continue;
1449 
1451  RV.getOpcode() != ISD::DELETED_NODE &&
1452  "Node was deleted but visit returned new node!");
1453 
1454  LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1455 
1456  if (N->getNumValues() == RV.getNode()->getNumValues())
1457  DAG.ReplaceAllUsesWith(N, RV.getNode());
1458  else {
1459  assert(N->getValueType(0) == RV.getValueType() &&
1460  N->getNumValues() == 1 && "Type mismatch");
1461  DAG.ReplaceAllUsesWith(N, &RV);
1462  }
1463 
1464  // Push the new node and any users onto the worklist
1465  AddToWorklist(RV.getNode());
1466  AddUsersToWorklist(RV.getNode());
1467 
1468  // Finally, if the node is now dead, remove it from the graph. The node
1469  // may not be dead if the replacement process recursively simplified to
1470  // something else needing this node. This will also take care of adding any
1471  // operands which have lost a user to the worklist.
1472  recursivelyDeleteUnusedNodes(N);
1473  }
1474 
1475  // If the root changed (e.g. it was a dead load, update the root).
1476  DAG.setRoot(Dummy.getValue());
1477  DAG.RemoveDeadNodes();
1478 }
1479 
1480 SDValue DAGCombiner::visit(SDNode *N) {
1481  switch (N->getOpcode()) {
1482  default: break;
1483  case ISD::TokenFactor: return visitTokenFactor(N);
1484  case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1485  case ISD::ADD: return visitADD(N);
1486  case ISD::SUB: return visitSUB(N);
1487  case ISD::ADDC: return visitADDC(N);
1488  case ISD::UADDO: return visitUADDO(N);
1489  case ISD::SUBC: return visitSUBC(N);
1490  case ISD::USUBO: return visitUSUBO(N);
1491  case ISD::ADDE: return visitADDE(N);
1492  case ISD::ADDCARRY: return visitADDCARRY(N);
1493  case ISD::SUBE: return visitSUBE(N);
1494  case ISD::SUBCARRY: return visitSUBCARRY(N);
1495  case ISD::MUL: return visitMUL(N);
1496  case ISD::SDIV: return visitSDIV(N);
1497  case ISD::UDIV: return visitUDIV(N);
1498  case ISD::SREM:
1499  case ISD::UREM: return visitREM(N);
1500  case ISD::MULHU: return visitMULHU(N);
1501  case ISD::MULHS: return visitMULHS(N);
1502  case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1503  case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1504  case ISD::SMULO: return visitSMULO(N);
1505  case ISD::UMULO: return visitUMULO(N);
1506  case ISD::SMIN:
1507  case ISD::SMAX:
1508  case ISD::UMIN:
1509  case ISD::UMAX: return visitIMINMAX(N);
1510  case ISD::AND: return visitAND(N);
1511  case ISD::OR: return visitOR(N);
1512  case ISD::XOR: return visitXOR(N);
1513  case ISD::SHL: return visitSHL(N);
1514  case ISD::SRA: return visitSRA(N);
1515  case ISD::SRL: return visitSRL(N);
1516  case ISD::ROTR:
1517  case ISD::ROTL: return visitRotate(N);
1518  case ISD::FSHL:
1519  case ISD::FSHR: return visitFunnelShift(N);
1520  case ISD::ABS: return visitABS(N);
1521  case ISD::BSWAP: return visitBSWAP(N);
1522  case ISD::BITREVERSE: return visitBITREVERSE(N);
1523  case ISD::CTLZ: return visitCTLZ(N);
1524  case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1525  case ISD::CTTZ: return visitCTTZ(N);
1526  case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1527  case ISD::CTPOP: return visitCTPOP(N);
1528  case ISD::SELECT: return visitSELECT(N);
1529  case ISD::VSELECT: return visitVSELECT(N);
1530  case ISD::SELECT_CC: return visitSELECT_CC(N);
1531  case ISD::SETCC: return visitSETCC(N);
1532  case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1533  case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1534  case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1535  case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1536  case ISD::AssertSext:
1537  case ISD::AssertZext: return visitAssertExt(N);
1538  case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1539  case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1540  case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1541  case ISD::TRUNCATE: return visitTRUNCATE(N);
1542  case ISD::BITCAST: return visitBITCAST(N);
1543  case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1544  case ISD::FADD: return visitFADD(N);
1545  case ISD::FSUB: return visitFSUB(N);
1546  case ISD::FMUL: return visitFMUL(N);
1547  case ISD::FMA: return visitFMA(N);
1548  case ISD::FDIV: return visitFDIV(N);
1549  case ISD::FREM: return visitFREM(N);
1550  case ISD::FSQRT: return visitFSQRT(N);
1551  case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1552  case ISD::FPOW: return visitFPOW(N);
1553  case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1554  case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1555  case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1556  case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1557  case ISD::FP_ROUND: return visitFP_ROUND(N);
1558  case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N);
1559  case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1560  case ISD::FNEG: return visitFNEG(N);
1561  case ISD::FABS: return visitFABS(N);
1562  case ISD::FFLOOR: return visitFFLOOR(N);
1563  case ISD::FMINNUM: return visitFMINNUM(N);
1564  case ISD::FMAXNUM: return visitFMAXNUM(N);
1565  case ISD::FMINIMUM: return visitFMINIMUM(N);
1566  case ISD::FMAXIMUM: return visitFMAXIMUM(N);
1567  case ISD::FCEIL: return visitFCEIL(N);
1568  case ISD::FTRUNC: return visitFTRUNC(N);
1569  case ISD::BRCOND: return visitBRCOND(N);
1570  case ISD::BR_CC: return visitBR_CC(N);
1571  case ISD::LOAD: return visitLOAD(N);
1572  case ISD::STORE: return visitSTORE(N);
1573  case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1574  case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1575  case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1576  case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1577  case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1578  case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1579  case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1580  case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1581  case ISD::MGATHER: return visitMGATHER(N);
1582  case ISD::MLOAD: return visitMLOAD(N);
1583  case ISD::MSCATTER: return visitMSCATTER(N);
1584  case ISD::MSTORE: return visitMSTORE(N);
1585  case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1586  case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1587  }
1588  return SDValue();
1589 }
1590 
1591 SDValue DAGCombiner::combine(SDNode *N) {
1592  SDValue RV = visit(N);
1593 
1594  // If nothing happened, try a target-specific DAG combine.
1595  if (!RV.getNode()) {
1597  "Node was deleted but visit returned NULL!");
1598 
1599  if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1601 
1602  // Expose the DAG combiner to the target combiner impls.
1604  DagCombineInfo(DAG, Level, false, this);
1605 
1606  RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1607  }
1608  }
1609 
1610  // If nothing happened still, try promoting the operation.
1611  if (!RV.getNode()) {
1612  switch (N->getOpcode()) {
1613  default: break;
1614  case ISD::ADD:
1615  case ISD::SUB:
1616  case ISD::MUL:
1617  case ISD::AND:
1618  case ISD::OR:
1619  case ISD::XOR:
1620  RV = PromoteIntBinOp(SDValue(N, 0));
1621  break;
1622  case ISD::SHL:
1623  case ISD::SRA:
1624  case ISD::SRL:
1625  RV = PromoteIntShiftOp(SDValue(N, 0));
1626  break;
1627  case ISD::SIGN_EXTEND:
1628  case ISD::ZERO_EXTEND:
1629  case ISD::ANY_EXTEND:
1630  RV = PromoteExtend(SDValue(N, 0));
1631  break;
1632  case ISD::LOAD:
1633  if (PromoteLoad(SDValue(N, 0)))
1634  RV = SDValue(N, 0);
1635  break;
1636  }
1637  }
1638 
1639  // If N is a commutative binary node, try eliminate it if the commuted
1640  // version is already present in the DAG.
1641  if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1642  N->getNumValues() == 1) {
1643  SDValue N0 = N->getOperand(0);
1644  SDValue N1 = N->getOperand(1);
1645 
1646  // Constant operands are canonicalized to RHS.
1647  if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1648  SDValue Ops[] = {N1, N0};
1649  SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1650  N->getFlags());
1651  if (CSENode)
1652  return SDValue(CSENode, 0);
1653  }
1654  }
1655 
1656  return RV;
1657 }
1658 
1659 /// Given a node, return its input chain if it has one, otherwise return a null
1660 /// sd operand.
1662  if (unsigned NumOps = N->getNumOperands()) {
1663  if (N->getOperand(0).getValueType() == MVT::Other)
1664  return N->getOperand(0);
1665  if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1666  return N->getOperand(NumOps-1);
1667  for (unsigned i = 1; i < NumOps-1; ++i)
1668  if (N->getOperand(i).getValueType() == MVT::Other)
1669  return N->getOperand(i);
1670  }
1671  return SDValue();
1672 }
1673 
1674 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1675  // If N has two operands, where one has an input chain equal to the other,
1676  // the 'other' chain is redundant.
1677  if (N->getNumOperands() == 2) {
1678  if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1679  return N->getOperand(0);
1680  if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1681  return N->getOperand(1);
1682  }
1683 
1684  // Don't simplify token factors if optnone.
1685  if (OptLevel == CodeGenOpt::None)
1686  return SDValue();
1687 
1688  SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
1689  SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
1690  SmallPtrSet<SDNode*, 16> SeenOps;
1691  bool Changed = false; // If we should replace this token factor.
1692 
1693  // Start out with this token factor.
1694  TFs.push_back(N);
1695 
1696  // Iterate through token factors. The TFs grows when new token factors are
1697  // encountered.
1698  for (unsigned i = 0; i < TFs.size(); ++i) {
1699  SDNode *TF = TFs[i];
1700 
1701  // Check each of the operands.
1702  for (const SDValue &Op : TF->op_values()) {
1703  switch (Op.getOpcode()) {
1704  case ISD::EntryToken:
1705  // Entry tokens don't need to be added to the list. They are
1706  // redundant.
1707  Changed = true;
1708  break;
1709 
1710  case ISD::TokenFactor:
1711  if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1712  // Queue up for processing.
1713  TFs.push_back(Op.getNode());
1714  // Clean up in case the token factor is removed.
1715  AddToWorklist(Op.getNode());
1716  Changed = true;
1717  break;
1718  }
1720 
1721  default:
1722  // Only add if it isn't already in the list.
1723  if (SeenOps.insert(Op.getNode()).second)
1724  Ops.push_back(Op);
1725  else
1726  Changed = true;
1727  break;
1728  }
1729  }
1730  }
1731 
1732  // Remove Nodes that are chained to another node in the list. Do so
1733  // by walking up chains breath-first stopping when we've seen
1734  // another operand. In general we must climb to the EntryNode, but we can exit
1735  // early if we find all remaining work is associated with just one operand as
1736  // no further pruning is possible.
1737 
1738  // List of nodes to search through and original Ops from which they originate.
1740  SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1741  SmallPtrSet<SDNode *, 16> SeenChains;
1742  bool DidPruneOps = false;
1743 
1744  unsigned NumLeftToConsider = 0;
1745  for (const SDValue &Op : Ops) {
1746  Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1747  OpWorkCount.push_back(1);
1748  }
1749 
1750  auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1751  // If this is an Op, we can remove the op from the list. Remark any
1752  // search associated with it as from the current OpNumber.
1753  if (SeenOps.count(Op) != 0) {
1754  Changed = true;
1755  DidPruneOps = true;
1756  unsigned OrigOpNumber = 0;
1757  while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1758  OrigOpNumber++;
1759  assert((OrigOpNumber != Ops.size()) &&
1760  "expected to find TokenFactor Operand");
1761  // Re-mark worklist from OrigOpNumber to OpNumber
1762  for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1763  if (Worklist[i].second == OrigOpNumber) {
1764  Worklist[i].second = OpNumber;
1765  }
1766  }
1767  OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1768  OpWorkCount[OrigOpNumber] = 0;
1769  NumLeftToConsider--;
1770  }
1771  // Add if it's a new chain
1772  if (SeenChains.insert(Op).second) {
1773  OpWorkCount[OpNumber]++;
1774  Worklist.push_back(std::make_pair(Op, OpNumber));
1775  }
1776  };
1777 
1778  for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1779  // We need at least be consider at least 2 Ops to prune.
1780  if (NumLeftToConsider <= 1)
1781  break;
1782  auto CurNode = Worklist[i].first;
1783  auto CurOpNumber = Worklist[i].second;
1784  assert((OpWorkCount[CurOpNumber] > 0) &&
1785  "Node should not appear in worklist");
1786  switch (CurNode->getOpcode()) {
1787  case ISD::EntryToken:
1788  // Hitting EntryToken is the only way for the search to terminate without
1789  // hitting
1790  // another operand's search. Prevent us from marking this operand
1791  // considered.
1792  NumLeftToConsider++;
1793  break;
1794  case ISD::TokenFactor:
1795  for (const SDValue &Op : CurNode->op_values())
1796  AddToWorklist(i, Op.getNode(), CurOpNumber);
1797  break;
1798  case ISD::CopyFromReg:
1799  case ISD::CopyToReg:
1800  AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1801  break;
1802  default:
1803  if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1804  AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1805  break;
1806  }
1807  OpWorkCount[CurOpNumber]--;
1808  if (OpWorkCount[CurOpNumber] == 0)
1809  NumLeftToConsider--;
1810  }
1811 
1812  // If we've changed things around then replace token factor.
1813  if (Changed) {
1814  SDValue Result;
1815  if (Ops.empty()) {
1816  // The entry token is the only possible outcome.
1817  Result = DAG.getEntryNode();
1818  } else {
1819  if (DidPruneOps) {
1820  SmallVector<SDValue, 8> PrunedOps;
1821  //
1822  for (const SDValue &Op : Ops) {
1823  if (SeenChains.count(Op.getNode()) == 0)
1824  PrunedOps.push_back(Op);
1825  }
1826  Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps);
1827  } else {
1828  Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
1829  }
1830  }
1831  return Result;
1832  }
1833  return SDValue();
1834 }
1835 
1836 /// MERGE_VALUES can always be eliminated.
1837 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1838  WorklistRemover DeadNodes(*this);
1839  // Replacing results may cause a different MERGE_VALUES to suddenly
1840  // be CSE'd with N, and carry its uses with it. Iterate until no
1841  // uses remain, to ensure that the node can be safely deleted.
1842  // First add the users of this node to the work list so that they
1843  // can be tried again once they have new operands.
1844  AddUsersToWorklist(N);
1845  do {
1846  // Do as a single replacement to avoid rewalking use lists.
1848  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1849  Ops.push_back(N->getOperand(i));
1850  DAG.ReplaceAllUsesWith(N, Ops.data());
1851  } while (!N->use_empty());
1852  deleteAndRecombine(N);
1853  return SDValue(N, 0); // Return N so it doesn't get rechecked!
1854 }
1855 
1856 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1857 /// ConstantSDNode pointer else nullptr.
1860  return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1861 }
1862 
1863 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
1864  assert(ISD::isBinaryOp(BO) && "Unexpected binary operator");
1865 
1866  // Don't do this unless the old select is going away. We want to eliminate the
1867  // binary operator, not replace a binop with a select.
1868  // TODO: Handle ISD::SELECT_CC.
1869  unsigned SelOpNo = 0;
1870  SDValue Sel = BO->getOperand(0);
1871  if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
1872  SelOpNo = 1;
1873  Sel = BO->getOperand(1);
1874  }
1875 
1876  if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1877  return SDValue();
1878 
1879  SDValue CT = Sel.getOperand(1);
1880  if (!isConstantOrConstantVector(CT, true) &&
1882  return SDValue();
1883 
1884  SDValue CF = Sel.getOperand(2);
1885  if (!isConstantOrConstantVector(CF, true) &&
1887  return SDValue();
1888 
1889  // Bail out if any constants are opaque because we can't constant fold those.
1890  // The exception is "and" and "or" with either 0 or -1 in which case we can
1891  // propagate non constant operands into select. I.e.:
1892  // and (select Cond, 0, -1), X --> select Cond, 0, X
1893  // or X, (select Cond, -1, 0) --> select Cond, -1, X
1894  auto BinOpcode = BO->getOpcode();
1895  bool CanFoldNonConst =
1896  (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
1899 
1900  SDValue CBO = BO->getOperand(SelOpNo ^ 1);
1901  if (!CanFoldNonConst &&
1902  !isConstantOrConstantVector(CBO, true) &&
1904  return SDValue();
1905 
1906  EVT VT = Sel.getValueType();
1907 
1908  // In case of shift value and shift amount may have different VT. For instance
1909  // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
1910  // swapped operands and value types do not match. NB: x86 is fine if operands
1911  // are not swapped with shift amount VT being not bigger than shifted value.
1912  // TODO: that is possible to check for a shift operation, correct VTs and
1913  // still perform optimization on x86 if needed.
1914  if (SelOpNo && VT != CBO.getValueType())
1915  return SDValue();
1916 
1917  // We have a select-of-constants followed by a binary operator with a
1918  // constant. Eliminate the binop by pulling the constant math into the select.
1919  // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
1920  SDLoc DL(Sel);
1921  SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
1922  : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
1923  if (!CanFoldNonConst && !NewCT.isUndef() &&
1924  !isConstantOrConstantVector(NewCT, true) &&
1926  return SDValue();
1927 
1928  SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
1929  : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
1930  if (!CanFoldNonConst && !NewCF.isUndef() &&
1931  !isConstantOrConstantVector(NewCF, true) &&
1933  return SDValue();
1934 
1935  return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
1936 }
1937 
1939  assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
1940  "Expecting add or sub");
1941 
1942  // Match a constant operand and a zext operand for the math instruction:
1943  // add Z, C
1944  // sub C, Z
1945  bool IsAdd = N->getOpcode() == ISD::ADD;
1946  SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
1947  SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
1948  auto *CN = dyn_cast<ConstantSDNode>(C);
1949  if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
1950  return SDValue();
1951 
1952  // Match the zext operand as a setcc of a boolean.
1953  if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
1954  Z.getOperand(0).getValueType() != MVT::i1)
1955  return SDValue();
1956 
1957  // Match the compare as: setcc (X & 1), 0, eq.
1958  SDValue SetCC = Z.getOperand(0);
1959  ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
1960  if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
1961  SetCC.getOperand(0).getOpcode() != ISD::AND ||
1962  !isOneConstant(SetCC.getOperand(0).getOperand(1)))
1963  return SDValue();
1964 
1965  // We are adding/subtracting a constant and an inverted low bit. Turn that
1966  // into a subtract/add of the low bit with incremented/decremented constant:
1967  // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
1968  // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
1969  EVT VT = C.getValueType();
1970  SDLoc DL(N);
1971  SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
1972  SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
1973  DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
1974  return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
1975 }
1976 
1977 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
1978 /// a shift and add with a different constant.
1980  assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
1981  "Expecting add or sub");
1982 
1983  // We need a constant operand for the add/sub, and the other operand is a
1984  // logical shift right: add (srl), C or sub C, (srl).
1985  bool IsAdd = N->getOpcode() == ISD::ADD;
1986  SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
1987  SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
1988  ConstantSDNode *C = isConstOrConstSplat(ConstantOp);
1989  if (!C || ShiftOp.getOpcode() != ISD::SRL)
1990  return SDValue();
1991 
1992  // The shift must be of a 'not' value.
1993  SDValue Not = ShiftOp.getOperand(0);
1994  if (!Not.hasOneUse() || !isBitwiseNot(Not))
1995  return SDValue();
1996 
1997  // The shift must be moving the sign bit to the least-significant-bit.
1998  EVT VT = ShiftOp.getValueType();
1999  SDValue ShAmt = ShiftOp.getOperand(1);
2000  ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2001  if (!ShAmtC || ShAmtC->getZExtValue() != VT.getScalarSizeInBits() - 1)
2002  return SDValue();
2003 
2004  // Eliminate the 'not' by adjusting the shift and add/sub constant:
2005  // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2006  // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2007  SDLoc DL(N);
2008  auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2009  SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2010  APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1;
2011  return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
2012 }
2013 
2014 SDValue DAGCombiner::visitADD(SDNode *N) {
2015  SDValue N0 = N->getOperand(0);
2016  SDValue N1 = N->getOperand(1);
2017  EVT VT = N0.getValueType();
2018  SDLoc DL(N);
2019 
2020  // fold vector ops
2021  if (VT.isVector()) {
2022  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2023  return FoldedVOp;
2024 
2025  // fold (add x, 0) -> x, vector edition
2027  return N0;
2029  return N1;
2030  }
2031 
2032  // fold (add x, undef) -> undef
2033  if (N0.isUndef())
2034  return N0;
2035 
2036  if (N1.isUndef())
2037  return N1;
2038 
2040  // canonicalize constant to RHS
2042  return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2043  // fold (add c1, c2) -> c1+c2
2044  return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
2045  N1.getNode());
2046  }
2047 
2048  // fold (add x, 0) -> x
2049  if (isNullConstant(N1))
2050  return N0;
2051 
2052  if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2053  // fold ((c1-A)+c2) -> (c1+c2)-A
2054  if (N0.getOpcode() == ISD::SUB &&
2055  isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2056  // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
2057  return DAG.getNode(ISD::SUB, DL, VT,
2058  DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2059  N0.getOperand(1));
2060  }
2061 
2062  // add (sext i1 X), 1 -> zext (not i1 X)
2063  // We don't transform this pattern:
2064  // add (zext i1 X), -1 -> sext (not i1 X)
2065  // because most (?) targets generate better code for the zext form.
2066  if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2067  isOneOrOneSplat(N1)) {
2068  SDValue X = N0.getOperand(0);
2069  if ((!LegalOperations ||
2070  (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2071  TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2072  X.getScalarValueSizeInBits() == 1) {
2073  SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2074  return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2075  }
2076  }
2077 
2078  // Undo the add -> or combine to merge constant offsets from a frame index.
2079  if (N0.getOpcode() == ISD::OR &&
2080  isa<FrameIndexSDNode>(N0.getOperand(0)) &&
2081  isa<ConstantSDNode>(N0.getOperand(1)) &&
2082  DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2083  SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
2084  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2085  }
2086  }
2087 
2088  if (SDValue NewSel = foldBinOpIntoSelect(N))
2089  return NewSel;
2090 
2091  // reassociate add
2092  if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2093  return RADD;
2094 
2095  // fold ((0-A) + B) -> B-A
2096  if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2097  return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2098 
2099  // fold (A + (0-B)) -> A-B
2100  if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2101  return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2102 
2103  // fold (A+(B-A)) -> B
2104  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2105  return N1.getOperand(0);
2106 
2107  // fold ((B-A)+A) -> B
2108  if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2109  return N0.getOperand(0);
2110 
2111  // fold (A+(B-(A+C))) to (B-C)
2112  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2113  N0 == N1.getOperand(1).getOperand(0))
2114  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2115  N1.getOperand(1).getOperand(1));
2116 
2117  // fold (A+(B-(C+A))) to (B-C)
2118  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2119  N0 == N1.getOperand(1).getOperand(1))
2120  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2121  N1.getOperand(1).getOperand(0));
2122 
2123  // fold (A+((B-A)+or-C)) to (B+or-C)
2124  if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2125  N1.getOperand(0).getOpcode() == ISD::SUB &&
2126  N0 == N1.getOperand(0).getOperand(1))
2127  return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2128  N1.getOperand(1));
2129 
2130  // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2131  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2132  SDValue N00 = N0.getOperand(0);
2133  SDValue N01 = N0.getOperand(1);
2134  SDValue N10 = N1.getOperand(0);
2135  SDValue N11 = N1.getOperand(1);
2136 
2138  return DAG.getNode(ISD::SUB, DL, VT,
2139  DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2140  DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2141  }
2142 
2143  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2144  return V;
2145 
2146  if (SDValue V = foldAddSubOfSignBit(N, DAG))
2147  return V;
2148 
2149  if (SimplifyDemandedBits(SDValue(N, 0)))
2150  return SDValue(N, 0);
2151 
2152  // fold (a+b) -> (a|b) iff a and b share no bits.
2153  if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2154  DAG.haveNoCommonBitsSet(N0, N1))
2155  return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2156 
2157  // fold (add (xor a, -1), 1) -> (sub 0, a)
2158  if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
2159  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2160  N0.getOperand(0));
2161 
2162  if (SDValue Combined = visitADDLike(N0, N1, N))
2163  return Combined;
2164 
2165  if (SDValue Combined = visitADDLike(N1, N0, N))
2166  return Combined;
2167 
2168  return SDValue();
2169 }
2170 
2171 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2172  bool Masked = false;
2173 
2174  // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2175  while (true) {
2176  if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2177  V = V.getOperand(0);
2178  continue;
2179  }
2180 
2181  if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2182  Masked = true;
2183  V = V.getOperand(0);
2184  continue;
2185  }
2186 
2187  break;
2188  }
2189 
2190  // If this is not a carry, return.
2191  if (V.getResNo() != 1)
2192  return SDValue();
2193 
2194  if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2195  V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2196  return SDValue();
2197 
2198  // If the result is masked, then no matter what kind of bool it is we can
2199  // return. If it isn't, then we need to make sure the bool type is either 0 or
2200  // 1 and not other values.
2201  if (Masked ||
2202  TLI.getBooleanContents(V.getValueType()) ==
2204  return V;
2205 
2206  return SDValue();
2207 }
2208 
2209 SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
2210  EVT VT = N0.getValueType();
2211  SDLoc DL(LocReference);
2212 
2213  // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2214  if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2216  return DAG.getNode(ISD::SUB, DL, VT, N0,
2217  DAG.getNode(ISD::SHL, DL, VT,
2218  N1.getOperand(0).getOperand(1),
2219  N1.getOperand(1)));
2220 
2221  if (N1.getOpcode() == ISD::AND) {
2222  SDValue AndOp0 = N1.getOperand(0);
2223  unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
2224  unsigned DestBits = VT.getScalarSizeInBits();
2225 
2226  // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
2227  // and similar xforms where the inner op is either ~0 or 0.
2228  if (NumSignBits == DestBits && isOneOrOneSplat(N1->getOperand(1)))
2229  return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0);
2230  }
2231 
2232  // add (sext i1), X -> sub X, (zext i1)
2233  if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2234  N0.getOperand(0).getValueType() == MVT::i1 &&
2236  SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2237  return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2238  }
2239 
2240  // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2241  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2242  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2243  if (TN->getVT() == MVT::i1) {
2244  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2245  DAG.getConstant(1, DL, VT));
2246  return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2247  }
2248  }
2249 
2250  // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2251  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2252  N1.getResNo() == 0)
2253  return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2254  N0, N1.getOperand(0), N1.getOperand(2));
2255 
2256  // (add X, Carry) -> (addcarry X, 0, Carry)
2258  if (SDValue Carry = getAsCarry(TLI, N1))
2259  return DAG.getNode(ISD::ADDCARRY, DL,
2260  DAG.getVTList(VT, Carry.getValueType()), N0,
2261  DAG.getConstant(0, DL, VT), Carry);
2262 
2263  return SDValue();
2264 }
2265 
2266 SDValue DAGCombiner::visitADDC(SDNode *N) {
2267  SDValue N0 = N->getOperand(0);
2268  SDValue N1 = N->getOperand(1);
2269  EVT VT = N0.getValueType();
2270  SDLoc DL(N);
2271 
2272  // If the flag result is dead, turn this into an ADD.
2273  if (!N->hasAnyUseOfValue(1))
2274  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2275  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2276 
2277  // canonicalize constant to RHS.
2280  if (N0C && !N1C)
2281  return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2282 
2283  // fold (addc x, 0) -> x + no carry out
2284  if (isNullConstant(N1))
2285  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2286  DL, MVT::Glue));
2287 
2288  // If it cannot overflow, transform into an add.
2289  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2290  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2291  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2292 
2293  return SDValue();
2294 }
2295 
2296 static SDValue flipBoolean(SDValue V, const SDLoc &DL, EVT VT,
2297  SelectionDAG &DAG, const TargetLowering &TLI) {
2298  SDValue Cst;
2299  switch (TLI.getBooleanContents(VT)) {
2302  Cst = DAG.getConstant(1, DL, VT);
2303  break;
2305  Cst = DAG.getConstant(-1, DL, VT);
2306  break;
2307  }
2308 
2309  return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
2310 }
2311 
2312 static bool isBooleanFlip(SDValue V, EVT VT, const TargetLowering &TLI) {
2313  if (V.getOpcode() != ISD::XOR) return false;
2315  if (!Const) return false;
2316 
2317  switch(TLI.getBooleanContents(VT)) {
2319  return Const->isOne();
2321  return Const->isAllOnesValue();
2323  return (Const->getAPIntValue() & 0x01) == 1;
2324  }
2325  llvm_unreachable("Unsupported boolean content");
2326 }
2327 
2328 SDValue DAGCombiner::visitUADDO(SDNode *N) {
2329  SDValue N0 = N->getOperand(0);
2330  SDValue N1 = N->getOperand(1);
2331  EVT VT = N0.getValueType();
2332  if (VT.isVector())
2333  return SDValue();
2334 
2335  EVT CarryVT = N->getValueType(1);
2336  SDLoc DL(N);
2337 
2338  // If the flag result is dead, turn this into an ADD.
2339  if (!N->hasAnyUseOfValue(1))
2340  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2341  DAG.getUNDEF(CarryVT));
2342 
2343  // canonicalize constant to RHS.
2346  if (N0C && !N1C)
2347  return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0);
2348 
2349  // fold (uaddo x, 0) -> x + no carry out
2350  if (isNullConstant(N1))
2351  return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2352 
2353  // If it cannot overflow, transform into an add.
2354  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2355  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2356  DAG.getConstant(0, DL, CarryVT));
2357 
2358  // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2359  if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2360  SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2361  DAG.getConstant(0, DL, VT),
2362  N0.getOperand(0));
2363  return CombineTo(N, Sub,
2364  flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
2365  }
2366 
2367  if (SDValue Combined = visitUADDOLike(N0, N1, N))
2368  return Combined;
2369 
2370  if (SDValue Combined = visitUADDOLike(N1, N0, N))
2371  return Combined;
2372 
2373  return SDValue();
2374 }
2375 
2376 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2377  auto VT = N0.getValueType();
2378 
2379  // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2380  // If Y + 1 cannot overflow.
2381  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2382  SDValue Y = N1.getOperand(0);
2383  SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2384  if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2385  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2386  N1.getOperand(2));
2387  }
2388 
2389  // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2391  if (SDValue Carry = getAsCarry(TLI, N1))
2392  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2393  DAG.getConstant(0, SDLoc(N), VT), Carry);
2394 
2395  return SDValue();
2396 }
2397 
2398 SDValue DAGCombiner::visitADDE(SDNode *N) {
2399  SDValue N0 = N->getOperand(0);
2400  SDValue N1 = N->getOperand(1);
2401  SDValue CarryIn = N->getOperand(2);
2402 
2403  // canonicalize constant to RHS
2406  if (N0C && !N1C)
2407  return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2408  N1, N0, CarryIn);
2409 
2410  // fold (adde x, y, false) -> (addc x, y)
2411  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2412  return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2413 
2414  return SDValue();
2415 }
2416 
2417 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2418  SDValue N0 = N->getOperand(0);
2419  SDValue N1 = N->getOperand(1);
2420  SDValue CarryIn = N->getOperand(2);
2421  SDLoc DL(N);
2422 
2423  // canonicalize constant to RHS
2426  if (N0C && !N1C)
2427  return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2428 
2429  // fold (addcarry x, y, false) -> (uaddo x, y)
2430  if (isNullConstant(CarryIn)) {
2431  if (!LegalOperations ||
2433  return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2434  }
2435 
2436  EVT CarryVT = CarryIn.getValueType();
2437 
2438  // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2439  if (isNullConstant(N0) && isNullConstant(N1)) {
2440  EVT VT = N0.getValueType();
2441  SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2442  AddToWorklist(CarryExt.getNode());
2443  return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2444  DAG.getConstant(1, DL, VT)),
2445  DAG.getConstant(0, DL, CarryVT));
2446  }
2447 
2448  // fold (addcarry (xor a, -1), 0, !b) -> (subcarry 0, a, b) and flip carry.
2449  if (isBitwiseNot(N0) && isNullConstant(N1) &&
2450  isBooleanFlip(CarryIn, CarryVT, TLI)) {
2451  SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(),
2452  DAG.getConstant(0, DL, N0.getValueType()),
2453  N0.getOperand(0), CarryIn.getOperand(0));
2454  return CombineTo(N, Sub,
2455  flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
2456  }
2457 
2458  if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2459  return Combined;
2460 
2461  if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2462  return Combined;
2463 
2464  return SDValue();
2465 }
2466 
2467 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2468  SDNode *N) {
2469  // Iff the flag result is dead:
2470  // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2471  if ((N0.getOpcode() == ISD::ADD ||
2472  (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) &&
2473  isNullConstant(N1) && !N->hasAnyUseOfValue(1))
2474  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2475  N0.getOperand(0), N0.getOperand(1), CarryIn);
2476 
2477  /**
2478  * When one of the addcarry argument is itself a carry, we may be facing
2479  * a diamond carry propagation. In which case we try to transform the DAG
2480  * to ensure linear carry propagation if that is possible.
2481  *
2482  * We are trying to get:
2483  * (addcarry X, 0, (addcarry A, B, Z):Carry)
2484  */
2485  if (auto Y = getAsCarry(TLI, N1)) {
2486  /**
2487  * (uaddo A, B)
2488  * / \
2489  * Carry Sum
2490  * | \
2491  * | (addcarry *, 0, Z)
2492  * | /
2493  * \ Carry
2494  * | /
2495  * (addcarry X, *, *)
2496  */
2497  if (Y.getOpcode() == ISD::UADDO &&
2498  CarryIn.getResNo() == 1 &&
2499  CarryIn.getOpcode() == ISD::ADDCARRY &&
2500  isNullConstant(CarryIn.getOperand(1)) &&
2501  CarryIn.getOperand(0) == Y.getValue(0)) {
2502  auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
2503  Y.getOperand(0), Y.getOperand(1),
2504  CarryIn.getOperand(2));
2505  AddToWorklist(NewY.getNode());
2506  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2507  DAG.getConstant(0, SDLoc(N), N0.getValueType()),
2508  NewY.getValue(1));
2509  }
2510  }
2511 
2512  return SDValue();
2513 }
2514 
2515 // Since it may not be valid to emit a fold to zero for vector initializers
2516 // check if we can before folding.
2517 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
2518  SelectionDAG &DAG, bool LegalOperations) {
2519  if (!VT.isVector())
2520  return DAG.getConstant(0, DL, VT);
2521  if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
2522  return DAG.getConstant(0, DL, VT);
2523  return SDValue();
2524 }
2525 
2526 SDValue DAGCombiner::visitSUB(SDNode *N) {
2527  SDValue N0 = N->getOperand(0);
2528  SDValue N1 = N->getOperand(1);
2529  EVT VT = N0.getValueType();
2530  SDLoc DL(N);
2531 
2532  // fold vector ops
2533  if (VT.isVector()) {
2534  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2535  return FoldedVOp;
2536 
2537  // fold (sub x, 0) -> x, vector edition
2539  return N0;
2540  }
2541 
2542  // fold (sub x, x) -> 0
2543  // FIXME: Refactor this and xor and other similar operations together.
2544  if (N0 == N1)
2545  return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
2548  // fold (sub c1, c2) -> c1-c2
2549  return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
2550  N1.getNode());
2551  }
2552 
2553  if (SDValue NewSel = foldBinOpIntoSelect(N))
2554  return NewSel;
2555 
2557 
2558  // fold (sub x, c) -> (add x, -c)
2559  if (N1C) {
2560  return DAG.getNode(ISD::ADD, DL, VT, N0,
2561  DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
2562  }
2563 
2564  if (isNullOrNullSplat(N0)) {
2565  unsigned BitWidth = VT.getScalarSizeInBits();
2566  // Right-shifting everything out but the sign bit followed by negation is
2567  // the same as flipping arithmetic/logical shift type without the negation:
2568  // -(X >>u 31) -> (X >>s 31)
2569  // -(X >>s 31) -> (X >>u 31)
2570  if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
2571  ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
2572  if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
2573  auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
2574  if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
2575  return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
2576  }
2577  }
2578 
2579  // 0 - X --> 0 if the sub is NUW.
2580  if (N->getFlags().hasNoUnsignedWrap())
2581  return N0;
2582 
2583  if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
2584  // N1 is either 0 or the minimum signed value. If the sub is NSW, then
2585  // N1 must be 0 because negating the minimum signed value is undefined.
2586  if (N->getFlags().hasNoSignedWrap())
2587  return N0;
2588 
2589  // 0 - X --> X if X is 0 or the minimum signed value.
2590  return N1;
2591  }
2592  }
2593 
2594  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
2595  if (isAllOnesOrAllOnesSplat(N0))
2596  return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
2597 
2598  // fold (A - (0-B)) -> A+B
2599  if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2600  return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
2601 
2602  // fold A-(A-B) -> B
2603  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
2604  return N1.getOperand(1);
2605 
2606  // fold (A+B)-A -> B
2607  if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
2608  return N0.getOperand(1);
2609 
2610  // fold (A+B)-B -> A
2611  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
2612  return N0.getOperand(0);
2613 
2614  // fold C2-(A+C1) -> (C2-C1)-A
2615  if (N1.getOpcode() == ISD::ADD) {
2616  SDValue N11 = N1.getOperand(1);
2617  if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
2618  isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
2619  SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
2620  return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
2621  }
2622  }
2623 
2624  // fold ((A+(B+or-C))-B) -> A+or-C
2625  if (N0.getOpcode() == ISD::ADD &&
2626  (N0.getOperand(1).getOpcode() == ISD::SUB ||
2627  N0.getOperand(1).getOpcode() == ISD::ADD) &&
2628  N0.getOperand(1).getOperand(0) == N1)
2629  return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
2630  N0.getOperand(1).getOperand(1));
2631 
2632  // fold ((A+(C+B))-B) -> A+C
2633  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
2634  N0.getOperand(1).getOperand(1) == N1)
2635  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
2636  N0.getOperand(1).getOperand(0));
2637 
2638  // fold ((A-(B-C))-C) -> A-B
2639  if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
2640  N0.getOperand(1).getOperand(1) == N1)
2641  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2642  N0.getOperand(1).getOperand(0));
2643 
2644  // fold (A-(B-C)) -> A+(C-B)
2645  if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
2646  return DAG.getNode(ISD::ADD, DL, VT, N0,
2647  DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
2648  N1.getOperand(0)));
2649 
2650  // fold (X - (-Y * Z)) -> (X + (Y * Z))
2651  if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
2652  if (N1.getOperand(0).getOpcode() == ISD::SUB &&
2654  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
2655  N1.getOperand(0).getOperand(1),
2656  N1.getOperand(1));
2657  return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
2658  }
2659  if (N1.getOperand(1).getOpcode() == ISD::SUB &&
2661  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
2662  N1.getOperand(0),
2663  N1.getOperand(1).getOperand(1));
2664  return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
2665  }
2666  }
2667 
2668  // If either operand of a sub is undef, the result is undef
2669  if (N0.isUndef())
2670  return N0;
2671  if (N1.isUndef())
2672  return N1;
2673 
2674  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2675  return V;
2676 
2677  if (SDValue V = foldAddSubOfSignBit(N, DAG))
2678  return V;
2679 
2680  // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
2681  if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
2682  if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
2683  SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
2684  SDValue S0 = N1.getOperand(0);
2685  if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
2686  unsigned OpSizeInBits = VT.getScalarSizeInBits();
2688  if (C->getAPIntValue() == (OpSizeInBits - 1))
2689  return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
2690  }
2691  }
2692  }
2693 
2694  // If the relocation model supports it, consider symbol offsets.
2695  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
2696  if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2697  // fold (sub Sym, c) -> Sym-c
2698  if (N1C && GA->getOpcode() == ISD::GlobalAddress)
2699  return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
2700  GA->getOffset() -
2701  (uint64_t)N1C->getSExtValue());
2702  // fold (sub Sym+c1, Sym+c2) -> c1-c2
2703  if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
2704  if (GA->getGlobal() == GB->getGlobal())
2705  return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
2706  DL, VT);
2707  }
2708 
2709  // sub X, (sextinreg Y i1) -> add X, (and Y 1)
2710  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2711  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2712  if (TN->getVT() == MVT::i1) {
2713  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2714  DAG.getConstant(1, DL, VT));
2715  return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
2716  }
2717  }
2718 
2719  // Prefer an add for more folding potential and possibly better codegen:
2720  // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
2721  if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
2722  SDValue ShAmt = N1.getOperand(1);
2723  ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2724  if (ShAmtC && ShAmtC->getZExtValue() == N1.getScalarValueSizeInBits() - 1) {
2725  SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
2726  return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
2727  }
2728  }
2729 
2730  return SDValue();
2731 }
2732 
2733 SDValue DAGCombiner::visitSUBC(SDNode *N) {
2734  SDValue N0 = N->getOperand(0);
2735  SDValue N1 = N->getOperand(1);
2736  EVT VT = N0.getValueType();
2737  SDLoc DL(N);
2738 
2739  // If the flag result is dead, turn this into an SUB.
2740  if (!N->hasAnyUseOfValue(1))
2741  return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2742  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2743 
2744  // fold (subc x, x) -> 0 + no borrow
2745  if (N0 == N1)
2746  return CombineTo(N, DAG.getConstant(0, DL, VT),
2747  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2748 
2749  // fold (subc x, 0) -> x + no borrow
2750  if (isNullConstant(N1))
2751  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2752 
2753  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2754  if (isAllOnesConstant(N0))
2755  return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2756  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2757 
2758  return SDValue();
2759 }
2760 
2761 SDValue DAGCombiner::visitUSUBO(SDNode *N) {
2762  SDValue N0 = N->getOperand(0);
2763  SDValue N1 = N->getOperand(1);
2764  EVT VT = N0.getValueType();
2765  if (VT.isVector())
2766  return SDValue();
2767 
2768  EVT CarryVT = N->getValueType(1);
2769  SDLoc DL(N);
2770 
2771  // If the flag result is dead, turn this into an SUB.
2772  if (!N->hasAnyUseOfValue(1))
2773  return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2774  DAG.getUNDEF(CarryVT));
2775 
2776  // fold (usubo x, x) -> 0 + no borrow
2777  if (N0 == N1)
2778  return CombineTo(N, DAG.getConstant(0, DL, VT),
2779  DAG.getConstant(0, DL, CarryVT));
2780 
2781  // fold (usubo x, 0) -> x + no borrow
2782  if (isNullConstant(N1))
2783  return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2784 
2785  // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2786  if (isAllOnesConstant(N0))
2787  return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2788  DAG.getConstant(0, DL, CarryVT));
2789 
2790  return SDValue();
2791 }
2792 
2793 SDValue DAGCombiner::visitSUBE(SDNode *N) {
2794  SDValue N0 = N->getOperand(0);
2795  SDValue N1 = N->getOperand(1);
2796  SDValue CarryIn = N->getOperand(2);
2797 
2798  // fold (sube x, y, false) -> (subc x, y)
2799  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2800  return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
2801 
2802  return SDValue();
2803 }
2804 
2805 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
2806  SDValue N0 = N->getOperand(0);
2807  SDValue N1 = N->getOperand(1);
2808  SDValue CarryIn = N->getOperand(2);
2809 
2810  // fold (subcarry x, y, false) -> (usubo x, y)
2811  if (isNullConstant(CarryIn)) {
2812  if (!LegalOperations ||
2814  return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
2815  }
2816 
2817  return SDValue();
2818 }
2819 
2820 SDValue DAGCombiner::visitMUL(SDNode *N) {
2821  SDValue N0 = N->getOperand(0);
2822  SDValue N1 = N->getOperand(1);
2823  EVT VT = N0.getValueType();
2824 
2825  // fold (mul x, undef) -> 0
2826  if (N0.isUndef() || N1.isUndef())
2827  return DAG.getConstant(0, SDLoc(N), VT);
2828 
2829  bool N0IsConst = false;
2830  bool N1IsConst = false;
2831  bool N1IsOpaqueConst = false;
2832  bool N0IsOpaqueConst = false;
2833  APInt ConstValue0, ConstValue1;
2834  // fold vector ops
2835  if (VT.isVector()) {
2836  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2837  return FoldedVOp;
2838 
2839  N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
2840  N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
2841  assert((!N0IsConst ||
2842  ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
2843  "Splat APInt should be element width");
2844  assert((!N1IsConst ||
2845  ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
2846  "Splat APInt should be element width");
2847  } else {
2848  N0IsConst = isa<ConstantSDNode>(N0);
2849  if (N0IsConst) {
2850  ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
2851  N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
2852  }
2853  N1IsConst = isa<ConstantSDNode>(N1);
2854  if (N1IsConst) {
2855  ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
2856  N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
2857  }
2858  }
2859 
2860  // fold (mul c1, c2) -> c1*c2
2861  if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
2862  return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
2863  N0.getNode(), N1.getNode());
2864 
2865  // canonicalize constant to RHS (vector doesn't have to splat)
2868  return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
2869  // fold (mul x, 0) -> 0
2870  if (N1IsConst && ConstValue1.isNullValue())
2871  return N1;
2872  // fold (mul x, 1) -> x
2873  if (N1IsConst && ConstValue1.isOneValue())
2874  return N0;
2875 
2876  if (SDValue NewSel = foldBinOpIntoSelect(N))
2877  return NewSel;
2878 
2879  // fold (mul x, -1) -> 0-x
2880  if (N1IsConst && ConstValue1.isAllOnesValue()) {
2881  SDLoc DL(N);
2882  return DAG.getNode(ISD::SUB, DL, VT,
2883  DAG.getConstant(0, DL, VT), N0);
2884  }
2885  // fold (mul x, (1 << c)) -> x << c
2886  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
2887  DAG.isKnownToBeAPowerOfTwo(N1) &&
2888  (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
2889  SDLoc DL(N);
2890  SDValue LogBase2 = BuildLogBase2(N1, DL);
2891  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
2892  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
2893  return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
2894  }
2895  // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
2896  if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
2897  unsigned Log2Val = (-ConstValue1).logBase2();
2898  SDLoc DL(N);
2899  // FIXME: If the input is something that is easily negated (e.g. a
2900  // single-use add), we should put the negate there.
2901  return DAG.getNode(ISD::SUB, DL, VT,
2902  DAG.getConstant(0, DL, VT),
2903  DAG.getNode(ISD::SHL, DL, VT, N0,
2904  DAG.getConstant(Log2Val, DL,
2905  getShiftAmountTy(N0.getValueType()))));
2906  }
2907 
2908  // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub.
2909  // mul x, (2^N + 1) --> add (shl x, N), x
2910  // mul x, (2^N - 1) --> sub (shl x, N), x
2911  // Examples: x * 33 --> (x << 5) + x
2912  // x * 15 --> (x << 4) - x
2913  // x * -33 --> -((x << 5) + x)
2914  // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
2915  if (N1IsConst && TLI.decomposeMulByConstant(VT, N1)) {
2916  // TODO: We could handle more general decomposition of any constant by
2917  // having the target set a limit on number of ops and making a
2918  // callback to determine that sequence (similar to sqrt expansion).
2919  unsigned MathOp = ISD::DELETED_NODE;
2920  APInt MulC = ConstValue1.abs();
2921  if ((MulC - 1).isPowerOf2())
2922  MathOp = ISD::ADD;
2923  else if ((MulC + 1).isPowerOf2())
2924  MathOp = ISD::SUB;
2925 
2926  if (MathOp != ISD::DELETED_NODE) {
2927  unsigned ShAmt = MathOp == ISD::ADD ? (MulC - 1).logBase2()
2928  : (MulC + 1).logBase2();
2929  assert(ShAmt > 0 && ShAmt < VT.getScalarSizeInBits() &&
2930  "Not expecting multiply-by-constant that could have simplified");
2931  SDLoc DL(N);
2932  SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, N0,
2933  DAG.getConstant(ShAmt, DL, VT));
2934  SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
2935  if (ConstValue1.isNegative())
2936  R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
2937  return R;
2938  }
2939  }
2940 
2941  // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
2942  if (N0.getOpcode() == ISD::SHL &&
2943  isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
2944  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
2945  SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
2947  return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
2948  }
2949 
2950  // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
2951  // use.
2952  {
2953  SDValue Sh(nullptr, 0), Y(nullptr, 0);
2954 
2955  // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
2956  if (N0.getOpcode() == ISD::SHL &&
2958  N0.getNode()->hasOneUse()) {
2959  Sh = N0; Y = N1;
2960  } else if (N1.getOpcode() == ISD::SHL &&
2962  N1.getNode()->hasOneUse()) {
2963  Sh = N1; Y = N0;
2964  }
2965 
2966  if (Sh.getNode()) {
2967  SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
2968  return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
2969  }
2970  }
2971 
2972  // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
2974  N0.getOpcode() == ISD::ADD &&
2976  isMulAddWithConstProfitable(N, N0, N1))
2977  return DAG.getNode(ISD::ADD, SDLoc(N), VT,
2978  DAG.getNode(ISD::MUL, SDLoc(N0), VT,
2979  N0.getOperand(0), N1),
2980  DAG.getNode(ISD::MUL, SDLoc(N1), VT,
2981  N0.getOperand(1), N1));
2982 
2983  // reassociate mul
2984  if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
2985  return RMUL;
2986 
2987  return SDValue();
2988 }
2989 
2990 /// Return true if divmod libcall is available.
2991 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
2992  const TargetLowering &TLI) {
2993  RTLIB::Libcall LC;
2994  EVT NodeType = Node->getValueType(0);
2995  if (!NodeType.isSimple())
2996  return false;
2997  switch (NodeType.getSimpleVT().SimpleTy) {
2998  default: return false; // No libcall for vector types.
2999  case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
3000  case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
3001  case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
3002  case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
3003  case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
3004  }
3005 
3006  return TLI.getLibcallName(LC) != nullptr;
3007 }
3008 
3009 /// Issue divrem if both quotient and remainder are needed.
3010 SDValue DAGCombiner::useDivRem(SDNode *Node) {
3011  if (Node->use_empty())
3012  return SDValue(); // This is a dead node, leave it alone.
3013 
3014  unsigned Opcode = Node->getOpcode();
3015  bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
3016  unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
3017 
3018  // DivMod lib calls can still work on non-legal types if using lib-calls.
3019  EVT VT = Node->getValueType(0);
3020  if (VT.isVector() || !VT.isInteger())
3021  return SDValue();
3022 
3023  if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
3024  return SDValue();
3025 
3026  // If DIVREM is going to get expanded into a libcall,
3027  // but there is no libcall available, then don't combine.
3028  if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
3029  !isDivRemLibcallAvailable(Node, isSigned, TLI))
3030  return SDValue();
3031 
3032  // If div is legal, it's better to do the normal expansion
3033  unsigned OtherOpcode = 0;
3034  if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
3035  OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
3036  if (TLI.isOperationLegalOrCustom(Opcode, VT))
3037  return SDValue();
3038  } else {
3039  OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3040  if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
3041  return SDValue();
3042  }
3043 
3044  SDValue Op0 = Node->getOperand(0);
3045  SDValue Op1 = Node->getOperand(1);
3046  SDValue combined;
3047  for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
3048  UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
3049  SDNode *User = *UI;
3050  if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
3051  User->use_empty())
3052  continue;
3053  // Convert the other matching node(s), too;
3054  // otherwise, the DIVREM may get target-legalized into something
3055  // target-specific that we won't be able to recognize.
3056  unsigned UserOpc = User->getOpcode();
3057  if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
3058  User->getOperand(0) == Op0 &&
3059  User->getOperand(1) == Op1) {
3060  if (!combined) {
3061  if (UserOpc == OtherOpcode) {
3062  SDVTList VTs = DAG.getVTList(VT, VT);
3063  combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
3064  } else if (UserOpc == DivRemOpc) {
3065  combined = SDValue(User, 0);
3066  } else {
3067  assert(UserOpc == Opcode);
3068  continue;
3069  }
3070  }
3071  if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
3072  CombineTo(User, combined);
3073  else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
3074  CombineTo(User, combined.getValue(1));
3075  }
3076  }
3077  return combined;
3078 }
3079 
3081  SDValue N0 = N->getOperand(0);
3082  SDValue N1 = N->getOperand(1);
3083  EVT VT = N->getValueType(0);
3084  SDLoc DL(N);
3085 
3086  unsigned Opc = N->getOpcode();
3087  bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
3089 
3090  // X / undef -> undef
3091  // X % undef -> undef
3092  // X / 0 -> undef
3093  // X % 0 -> undef
3094  // NOTE: This includes vectors where any divisor element is zero/undef.
3095  if (DAG.isUndef(Opc, {N0, N1}))
3096  return DAG.getUNDEF(VT);
3097 
3098  // undef / X -> 0
3099  // undef % X -> 0
3100  if (N0.isUndef())
3101  return DAG.getConstant(0, DL, VT);
3102 
3103  // 0 / X -> 0
3104  // 0 % X -> 0
3106  if (N0C && N0C->isNullValue())
3107  return N0;
3108 
3109  // X / X -> 1
3110  // X % X -> 0
3111  if (N0 == N1)
3112  return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
3113 
3114  // X / 1 -> X
3115  // X % 1 -> 0
3116  // If this is a boolean op (single-bit element type), we can't have
3117  // division-by-zero or remainder-by-zero, so assume the divisor is 1.
3118  // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
3119  // it's a 1.
3120  if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
3121  return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
3122 
3123  return SDValue();
3124 }
3125 
3126 SDValue DAGCombiner::visitSDIV(SDNode *N) {
3127  SDValue N0 = N->getOperand(0);
3128  SDValue N1 = N->getOperand(1);
3129  EVT VT = N->getValueType(0);
3130  EVT CCVT = getSetCCResultType(VT);
3131 
3132  // fold vector ops
3133  if (VT.isVector())
3134  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3135  return FoldedVOp;
3136 
3137  SDLoc DL(N);
3138 
3139  // fold (sdiv c1, c2) -> c1/c2
3142  if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
3143  return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
3144  // fold (sdiv X, -1) -> 0-X
3145  if (N1C && N1C->isAllOnesValue())
3146  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
3147  // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
3148  if (N1C && N1C->getAPIntValue().isMinSignedValue())
3149  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3150  DAG.getConstant(1, DL, VT),
3151  DAG.getConstant(0, DL, VT));
3152 
3153  if (SDValue V = simplifyDivRem(N, DAG))
3154  return V;
3155 
3156  if (SDValue NewSel = foldBinOpIntoSelect(N))
3157  return NewSel;
3158 
3159  // If we know the sign bits of both operands are zero, strength reduce to a
3160  // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
3161  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3162  return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
3163 
3164  if (SDValue V = visitSDIVLike(N0, N1, N))
3165  return V;
3166 
3167  // sdiv, srem -> sdivrem
3168  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3169  // true. Otherwise, we break the simplification logic in visitREM().
3171  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3172  if (SDValue DivRem = useDivRem(N))
3173  return DivRem;
3174 
3175  return SDValue();
3176 }
3177 
3178 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3179  SDLoc DL(N);
3180  EVT VT = N->getValueType(0);
3181  EVT CCVT = getSetCCResultType(VT);
3182  unsigned BitWidth = VT.getScalarSizeInBits();
3183 
3184  // Helper for determining whether a value is a power-2 constant scalar or a
3185  // vector of such elements.
3186  auto IsPowerOfTwo = [](ConstantSDNode *C) {
3187  if (C->isNullValue() || C->isOpaque())
3188  return false;
3189  if (C->getAPIntValue().isPowerOf2())
3190  return true;
3191  if ((-C->getAPIntValue()).isPowerOf2())
3192  return true;
3193  return false;
3194  };
3195 
3196  // fold (sdiv X, pow2) -> simple ops after legalize
3197  // FIXME: We check for the exact bit here because the generic lowering gives
3198  // better results in that case. The target-specific lowering should learn how
3199  // to handle exact sdivs efficiently.
3200  if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
3201  // Target-specific implementation of sdiv x, pow2.
3202  if (SDValue Res = BuildSDIVPow2(N))
3203  return Res;
3204 
3205  // Create constants that are functions of the shift amount value.
3206  EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
3207  SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
3208  SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
3209  C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
3210  SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
3211  if (!isConstantOrConstantVector(Inexact))
3212  return SDValue();
3213 
3214  // Splat the sign bit into the register
3215  SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
3216  DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
3217  AddToWorklist(Sign.getNode());
3218 
3219  // Add (N0 < 0) ? abs2 - 1 : 0;
3220  SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
3221  AddToWorklist(Srl.getNode());
3222  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
3223  AddToWorklist(Add.getNode());
3224  SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
3225  AddToWorklist(Sra.getNode());
3226 
3227  // Special case: (sdiv X, 1) -> X
3228  // Special Case: (sdiv X, -1) -> 0-X
3229  SDValue One = DAG.getConstant(1, DL, VT);
3230  SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
3231  SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
3232  SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
3233  SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
3234  Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
3235 
3236  // If dividing by a positive value, we're done. Otherwise, the result must
3237  // be negated.
3238  SDValue Zero = DAG.getConstant(0, DL, VT);
3239  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
3240 
3241  // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
3242  SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
3243  SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
3244  return Res;
3245  }
3246 
3247  // If integer divide is expensive and we satisfy the requirements, emit an
3248  // alternate sequence. Targets may check function attributes for size/speed
3249  // trade-offs.
3251  if (isConstantOrConstantVector(N1) &&
3252  !TLI.isIntDivCheap(N->getValueType(0), Attr))
3253  if (SDValue Op = BuildSDIV(N))
3254  return Op;
3255 
3256  return SDValue();
3257 }
3258 
3259 SDValue DAGCombiner::visitUDIV(SDNode *N) {
3260  SDValue N0 = N->getOperand(0);
3261  SDValue N1 = N->getOperand(1);
3262  EVT VT = N->getValueType(0);
3263  EVT CCVT = getSetCCResultType(VT);
3264 
3265  // fold vector ops
3266  if (VT.isVector())
3267  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3268  return FoldedVOp;
3269 
3270  SDLoc DL(N);
3271 
3272  // fold (udiv c1, c2) -> c1/c2
3275  if (N0C && N1C)
3276  if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
3277  N0C, N1C))
3278  return Folded;
3279  // fold (udiv X, -1) -> select(X == -1, 1, 0)
3280  if (N1C && N1C->getAPIntValue().isAllOnesValue())
3281  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3282  DAG.getConstant(1, DL, VT),
3283  DAG.getConstant(0, DL, VT));
3284 
3285  if (SDValue V = simplifyDivRem(N, DAG))
3286  return V;
3287 
3288  if (SDValue NewSel = foldBinOpIntoSelect(N))
3289  return NewSel;
3290 
3291  if (SDValue V = visitUDIVLike(N0, N1, N))
3292  return V;
3293 
3294  // sdiv, srem -> sdivrem
3295  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3296  // true. Otherwise, we break the simplification logic in visitREM().
3298  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3299  if (SDValue DivRem = useDivRem(N))
3300  return DivRem;
3301 
3302  return SDValue();
3303 }
3304 
3305 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3306  SDLoc DL(N);
3307  EVT VT = N->getValueType(0);
3308 
3309  // fold (udiv x, (1 << c)) -> x >>u c
3310  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3311  DAG.isKnownToBeAPowerOfTwo(N1)) {
3312  SDValue LogBase2 = BuildLogBase2(N1, DL);
3313  AddToWorklist(LogBase2.getNode());
3314 
3315  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3316  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3317  AddToWorklist(Trunc.getNode());
3318  return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3319  }
3320 
3321  // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
3322  if (N1.getOpcode() == ISD::SHL) {
3323  SDValue N10 = N1.getOperand(0);
3324  if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
3325  DAG.isKnownToBeAPowerOfTwo(N10)) {
3326  SDValue LogBase2 = BuildLogBase2(N10, DL);
3327  AddToWorklist(LogBase2.getNode());
3328 
3329  EVT ADDVT = N1.getOperand(1).getValueType();
3330  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
3331  AddToWorklist(Trunc.getNode());
3332  SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
3333  AddToWorklist(Add.getNode());
3334  return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
3335  }
3336  }
3337 
3338  // fold (udiv x, c) -> alternate
3340  if (isConstantOrConstantVector(N1) &&
3341  !TLI.isIntDivCheap(N->getValueType(0), Attr))
3342  if (SDValue Op = BuildUDIV(N))
3343  return Op;
3344 
3345  return SDValue();
3346 }
3347 
3348 // handles ISD::SREM and ISD::UREM
3349 SDValue DAGCombiner::visitREM(SDNode *N) {
3350  unsigned Opcode = N->getOpcode();
3351  SDValue N0 = N->getOperand(0);
3352  SDValue N1 = N->getOperand(1);
3353  EVT VT = N->getValueType(0);
3354  EVT CCVT = getSetCCResultType(VT);
3355 
3356  bool isSigned = (Opcode == ISD::SREM);
3357  SDLoc DL(N);
3358 
3359  // fold (rem c1, c2) -> c1%c2
3362  if (N0C && N1C)
3363  if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
3364  return Folded;
3365  // fold (urem X, -1) -> select(X == -1, 0, x)
3366  if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
3367  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3368  DAG.getConstant(0, DL, VT), N0);
3369 
3370  if (SDValue V = simplifyDivRem(N, DAG))
3371  return V;
3372 
3373  if (SDValue NewSel = foldBinOpIntoSelect(N))
3374  return NewSel;
3375 
3376  if (isSigned) {
3377  // If we know the sign bits of both operands are zero, strength reduce to a
3378  // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
3379  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3380  return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
3381  } else {
3382  SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
3383  if (DAG.isKnownToBeAPowerOfTwo(N1)) {
3384  // fold (urem x, pow2) -> (and x, pow2-1)
3385  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3386  AddToWorklist(Add.getNode());
3387  return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3388  }
3389  if (N1.getOpcode() == ISD::SHL &&
3390  DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
3391  // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
3392  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3393  AddToWorklist(Add.getNode());
3394  return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3395  }
3396  }
3397 
3399 
3400  // If X/C can be simplified by the division-by-constant logic, lower
3401  // X%C to the equivalent of X-X/C*C.
3402  // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
3403  // speculative DIV must not cause a DIVREM conversion. We guard against this
3404  // by skipping the simplification if isIntDivCheap(). When div is not cheap,
3405  // combine will not return a DIVREM. Regardless, checking cheapness here
3406  // makes sense since the simplification results in fatter code.
3407  if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
3408  SDValue OptimizedDiv =
3409  isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
3410  if (OptimizedDiv.getNode()) {
3411  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
3412  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3413  AddToWorklist(OptimizedDiv.getNode());
3414  AddToWorklist(Mul.getNode());
3415  return Sub;
3416  }
3417  }
3418 
3419  // sdiv, srem -> sdivrem
3420  if (SDValue DivRem = useDivRem(N))
3421  return DivRem.getValue(1);
3422 
3423  return SDValue();
3424 }
3425 
3426 SDValue DAGCombiner::visitMULHS(SDNode *N) {
3427  SDValue N0 = N->getOperand(0);
3428  SDValue N1 = N->getOperand(1);
3429  EVT VT = N->getValueType(0);
3430  SDLoc DL(N);
3431 
3432  if (VT.isVector()) {
3433  // fold (mulhs x, 0) -> 0
3435  return N1;
3437  return N0;
3438  }
3439 
3440  // fold (mulhs x, 0) -> 0
3441  if (isNullConstant(N1))
3442  return N1;
3443  // fold (mulhs x, 1) -> (sra x, size(x)-1)
3444  if (isOneConstant(N1))
3445  return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
3446  DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
3447  getShiftAmountTy(N0.getValueType())));
3448 
3449  // fold (mulhs x, undef) -> 0
3450  if (N0.isUndef() || N1.isUndef())
3451  return DAG.getConstant(0, DL, VT);
3452 
3453  // If the type twice as wide is legal, transform the mulhs to a wider multiply
3454  // plus a shift.
3455  if (VT.isSimple() && !VT.isVector()) {
3456  MVT Simple = VT.getSimpleVT();
3457  unsigned SimpleSize = Simple.getSizeInBits();
3458  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3459  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3460  N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
3461  N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
3462  N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3463  N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3464  DAG.getConstant(SimpleSize, DL,
3465  getShiftAmountTy(N1.getValueType())));
3466  return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3467  }
3468  }
3469 
3470  return SDValue();
3471 }
3472 
3473 SDValue DAGCombiner::visitMULHU(SDNode *N) {
3474  SDValue N0 = N->getOperand(0);
3475  SDValue N1 = N->getOperand(1);
3476  EVT VT = N->getValueType(0);
3477  SDLoc DL(N);
3478 
3479  if (VT.isVector()) {
3480  // fold (mulhu x, 0) -> 0
3482  return N1;
3484  return N0;
3485  }
3486 
3487  // fold (mulhu x, 0) -> 0
3488  if (isNullConstant(N1))
3489  return N1;
3490  // fold (mulhu x, 1) -> 0
3491  if (isOneConstant(N1))
3492  return DAG.getConstant(0, DL, N0.getValueType());
3493  // fold (mulhu x, undef) -> 0
3494  if (N0.isUndef() || N1.isUndef())
3495  return DAG.getConstant(0, DL, VT);
3496 
3497  // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
3498  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3499  DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
3500  SDLoc DL(N);
3501  unsigned NumEltBits = VT.getScalarSizeInBits();
3502  SDValue LogBase2 = BuildLogBase2(N1, DL);
3503  SDValue SRLAmt = DAG.getNode(
3504  ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
3505  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3506  SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
3507  return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3508  }
3509 
3510  // If the type twice as wide is legal, transform the mulhu to a wider multiply
3511  // plus a shift.
3512  if (VT.isSimple() && !VT.isVector()) {
3513  MVT Simple = VT.getSimpleVT();
3514  unsigned SimpleSize = Simple.getSizeInBits();
3515  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3516  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3517  N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
3518  N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
3519  N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3520  N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3521  DAG.getConstant(SimpleSize, DL,
3522  getShiftAmountTy(N1.getValueType())));
3523  return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3524  }
3525  }
3526 
3527  return SDValue();
3528 }
3529 
3530 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
3531 /// give the opcodes for the two computations that are being performed. Return
3532 /// true if a simplification was made.
3533 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
3534  unsigned HiOp) {
3535  // If the high half is not needed, just compute the low half.
3536  bool HiExists = N->hasAnyUseOfValue(1);
3537  if (!HiExists && (!LegalOperations ||
3538  TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
3539  SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3540  return CombineTo(N, Res, Res);
3541  }
3542 
3543  // If the low half is not needed, just compute the high half.
3544  bool LoExists = N->hasAnyUseOfValue(0);
3545  if (!LoExists && (!LegalOperations ||
3546  TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
3547  SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3548  return CombineTo(N, Res, Res);
3549  }
3550 
3551  // If both halves are used, return as it is.
3552  if (LoExists && HiExists)
3553  return SDValue();
3554 
3555  // If the two computed results can be simplified separately, separate them.
3556  if (LoExists) {
3557  SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3558  AddToWorklist(Lo.getNode());
3559  SDValue LoOpt = combine(Lo.getNode());
3560  if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
3561  (!LegalOperations ||
3562  TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
3563  return CombineTo(N, LoOpt, LoOpt);
3564  }
3565 
3566  if (HiExists) {
3567  SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3568  AddToWorklist(Hi.getNode());
3569  SDValue HiOpt = combine(Hi.getNode());
3570  if (HiOpt.getNode() && HiOpt != Hi &&
3571  (!LegalOperations ||
3572  TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
3573  return CombineTo(N, HiOpt, HiOpt);
3574  }
3575 
3576  return SDValue();
3577 }
3578 
3579 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
3580  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
3581  return Res;
3582 
3583  EVT VT = N->getValueType(0);
3584  SDLoc DL(N);
3585 
3586  // If the type is twice as wide is legal, transform the mulhu to a wider
3587  // multiply plus a shift.
3588  if (VT.isSimple() && !VT.isVector()) {
3589  MVT Simple = VT.getSimpleVT();
3590  unsigned SimpleSize = Simple.getSizeInBits();
3591  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3592  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3593  SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
3594  SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
3595  Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3596  // Compute the high part as N1.
3597  Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3598  DAG.getConstant(SimpleSize, DL,
3599  getShiftAmountTy(Lo.getValueType())));
3600  Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3601  // Compute the low part as N0.
3602  Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3603  return CombineTo(N, Lo, Hi);
3604  }
3605  }
3606 
3607  return SDValue();
3608 }
3609 
3610 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
3611  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
3612  return Res;
3613 
3614  EVT VT = N->getValueType(0);
3615  SDLoc DL(N);
3616 
3617  // If the type is twice as wide is legal, transform the mulhu to a wider
3618  // multiply plus a shift.
3619  if (VT.isSimple() && !VT.isVector()) {
3620  MVT Simple = VT.getSimpleVT();
3621  unsigned SimpleSize = Simple.getSizeInBits();
3622  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3623  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3624  SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
3625  SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
3626  Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3627  // Compute the high part as N1.
3628  Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3629  DAG.getConstant(SimpleSize, DL,
3630  getShiftAmountTy(Lo.getValueType())));
3631  Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3632  // Compute the low part as N0.
3633  Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3634  return CombineTo(N, Lo, Hi);
3635  }
3636  }
3637 
3638  return SDValue();
3639 }
3640 
3641 SDValue DAGCombiner::visitSMULO(SDNode *N) {
3642  // (smulo x, 2) -> (saddo x, x)
3643  if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3644  if (C2->getAPIntValue() == 2)
3645  return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
3646  N->getOperand(0), N->getOperand(0));
3647 
3648  return SDValue();
3649 }
3650 
3651 SDValue DAGCombiner::visitUMULO(SDNode *N) {
3652  // (umulo x, 2) -> (uaddo x, x)
3653  if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3654  if (C2->getAPIntValue() == 2)
3655  return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
3656  N->getOperand(0), N->getOperand(0));
3657 
3658  return SDValue();
3659 }
3660 
3661 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
3662  SDValue N0 = N->getOperand(0);
3663  SDValue N1 = N->getOperand(1);
3664  EVT VT = N0.getValueType();
3665 
3666  // fold vector ops
3667  if (VT.isVector())
3668  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3669  return FoldedVOp;
3670 
3671  // fold operation with constant operands.
3674  if (N0C && N1C)
3675  return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
3676 
3677  // canonicalize constant to RHS
3680  return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
3681 
3682  // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
3683  // Only do this if the current op isn't legal and the flipped is.
3684  unsigned Opcode = N->getOpcode();
3685  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3686  if (!TLI.isOperationLegal(Opcode, VT) &&
3687  (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
3688  (N1.isUndef() || DAG.SignBitIsZero(N1))) {
3689  unsigned AltOpcode;
3690  switch (Opcode) {
3691  case ISD::SMIN: AltOpcode = ISD::UMIN; break;
3692  case ISD::SMAX: AltOpcode = ISD::UMAX; break;
3693  case ISD::UMIN: AltOpcode = ISD::SMIN; break;
3694  case ISD::UMAX: AltOpcode = ISD::SMAX; break;
3695  default: llvm_unreachable("Unknown MINMAX opcode");
3696  }
3697  if (TLI.isOperationLegal(AltOpcode, VT))
3698  return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
3699  }
3700 
3701  return SDValue();
3702 }
3703 
3704 /// If this is a bitwise logic instruction and both operands have the same
3705 /// opcode, try to sink the other opcode after the logic instruction.
3706 SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
3707  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3708  EVT VT = N0.getValueType();
3709  unsigned LogicOpcode = N->getOpcode();
3710  unsigned HandOpcode = N0.getOpcode();
3711  assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
3712  LogicOpcode == ISD::XOR) && "Expected logic opcode");
3713  assert(HandOpcode == N1.getOpcode() && "Bad input!");
3714 
3715  // Bail early if none of these transforms apply.
3716  if (N0.getNumOperands() == 0)
3717  return SDValue();
3718 
3719  // FIXME: We should check number of uses of the operands to not increase
3720  // the instruction count for all transforms.
3721 
3722  // Handle size-changing casts.
3723  SDValue X = N0.getOperand(0);
3724  SDValue Y = N1.getOperand(0);
3725  EVT XVT = X.getValueType();
3726  SDLoc DL(N);
3727  if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
3728  HandOpcode == ISD::SIGN_EXTEND) {
3729  // If both operands have other uses, this transform would create extra
3730  // instructions without eliminating anything.
3731  if (!N0.hasOneUse() && !N1.hasOneUse())
3732  return SDValue();
3733  // We need matching integer source types.
3734  // Do not hoist logic op inside of a vector extend, since it may combine
3735  // into a vsetcc.
3736  // TODO: Should the vector check apply to truncate though?
3737  if (VT.isVector() || XVT != Y.getValueType())
3738  return SDValue();
3739  // Don't create an illegal op during or after legalization.
3740  if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
3741  return SDValue();
3742  // Avoid infinite looping with PromoteIntBinOp.
3743  // TODO: Should we apply desirable/legal constraints to all opcodes?
3744  if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
3745  !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
3746  return SDValue();
3747  // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
3748  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
3749  return DAG.getNode(HandOpcode, DL, VT, Logic);
3750  }
3751 
3752  // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
3753  if (HandOpcode == ISD::TRUNCATE) {
3754  // If both operands have other uses, this transform would create extra
3755  // instructions without eliminating anything.
3756  if (!N0.hasOneUse() && !N1.hasOneUse())
3757  return SDValue();
3758  // We need matching source types.
3759  if (XVT != Y.getValueType())
3760  return SDValue();
3761  // Don't create an illegal op during or after legalization.
3762  if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
3763  return SDValue();
3764  // Be extra careful sinking truncate. If it's free, there's no benefit in
3765  // widening a binop. Also, don't create a logic op on an illegal type.
3766  if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
3767  return SDValue();
3768  if (!TLI.isTypeLegal(XVT))
3769  return SDValue();
3770  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
3771  return DAG.getNode(HandOpcode, DL, VT, Logic);
3772  }
3773 
3774  // For binops SHL/SRL/SRA/AND:
3775  // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
3776  if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
3777  HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
3778  N0.getOperand(1) == N1.getOperand(1)) {
3779  // If either operand has other uses, this transform is not an improvement.
3780  if (!N0.hasOneUse() || !N1.hasOneUse())
3781  return SDValue();
3782  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
3783  return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
3784  }
3785 
3786  // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
3787  if (HandOpcode == ISD::BSWAP) {
3788  // If either operand has other uses, this transform is not an improvement.
3789  if (!N0.hasOneUse() || !N1.hasOneUse())
3790  return SDValue();
3791  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
3792  return DAG.getNode(HandOpcode, DL, VT, Logic);
3793  }
3794 
3795  // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
3796  // Only perform this optimization up until type legalization, before
3797  // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
3798  // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
3799  // we don't want to undo this promotion.
3800  // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
3801  // on scalars.
3802  if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
3803  Level <= AfterLegalizeTypes) {
3804  // Input types must be integer and the same.
3805  if (XVT.isInteger() && XVT == Y.getValueType()) {
3806  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
3807  return DAG.getNode(HandOpcode, DL, VT, Logic);
3808  }
3809  }
3810 
3811  // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
3812  // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
3813  // If both shuffles use the same mask, and both shuffle within a single
3814  // vector, then it is worthwhile to move the swizzle after the operation.
3815  // The type-legalizer generates this pattern when loading illegal
3816  // vector types from memory. In many cases this allows additional shuffle
3817  // optimizations.
3818  // There are other cases where moving the shuffle after the xor/and/or
3819  // is profitable even if shuffles don't perform a swizzle.
3820  // If both shuffles use the same mask, and both shuffles have the same first
3821  // or second operand, then it might still be profitable to move the shuffle
3822  // after the xor/and/or operation.
3823  if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
3824  auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
3825  auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
3826  assert(X.getValueType() == Y.getValueType() &&
3827  "Inputs to shuffles are not the same type");
3828 
3829  // Check that both shuffles use the same mask. The masks are known to be of
3830  // the same length because the result vector type is the same.
3831  // Check also that shuffles have only one use to avoid introducing extra
3832  // instructions.
3833  if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
3834  !SVN0->getMask().equals(SVN1->getMask()))
3835  return SDValue();
3836 
3837  // Don't try to fold this node if it requires introducing a
3838  // build vector of all zeros that might be illegal at this stage.
3839  SDValue ShOp = N0.getOperand(1);
3840  if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
3841  ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3842 
3843  // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
3844  if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
3845  SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
3846  N0.getOperand(0), N1.getOperand(0));
3847  return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
3848  }
3849 
3850  // Don't try to fold this node if it requires introducing a
3851  // build vector of all zeros that might be illegal at this stage.
3852  ShOp = N0.getOperand(0);
3853  if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
3854  ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3855 
3856  // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
3857  if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
3858  SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
3859  N1.getOperand(1));
3860  return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
3861  }
3862  }
3863 
3864  return SDValue();
3865 }
3866 
3867 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
3868 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
3869  const SDLoc &DL) {
3870  SDValue LL, LR, RL, RR, N0CC, N1CC;
3871  if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
3872  !isSetCCEquivalent(N1, RL, RR, N1CC))
3873  return SDValue();
3874 
3875  assert(N0.getValueType() == N1.getValueType() &&
3876  "Unexpected operand types for bitwise logic op");
3877  assert(LL.getValueType() == LR.getValueType() &&
3878  RL.getValueType() == RR.getValueType() &&
3879  "Unexpected operand types for setcc");
3880 
3881  // If we're here post-legalization or the logic op type is not i1, the logic
3882  // op type must match a setcc result type. Also, all folds require new
3883  // operations on the left and right operands, so those types must match.
3884  EVT VT = N0.getValueType();
3885  EVT OpVT = LL.getValueType();
3886  if (LegalOperations || VT.getScalarType() != MVT::i1)
3887  if (VT != getSetCCResultType(OpVT))
3888  return SDValue();
3889  if (OpVT != RL.getValueType())
3890  return SDValue();
3891 
3892  ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
3893  ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
3894  bool IsInteger = OpVT.isInteger();
3895  if (LR == RR && CC0 == CC1 && IsInteger) {
3896  bool IsZero = isNullOrNullSplat(LR);
3897  bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
3898 
3899  // All bits clear?
3900  bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
3901  // All sign bits clear?
3902  bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
3903  // Any bits set?
3904  bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
3905  // Any sign bits set?
3906  bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
3907 
3908  // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
3909  // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
3910  // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
3911  // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
3912  if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
3913  SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
3914  AddToWorklist(Or.getNode());
3915  return DAG.getSetCC(DL, VT, Or, LR, CC1);
3916  }
3917 
3918  // All bits set?
3919  bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
3920  // All sign bits set?
3921  bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
3922  // Any bits clear?
3923  bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
3924  // Any sign bits clear?
3925  bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
3926 
3927  // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
3928  // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
3929  // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
3930  // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
3931  if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
3932  SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
3933  AddToWorklist(And.getNode());
3934  return DAG.getSetCC(DL, VT, And, LR, CC1);
3935  }
3936  }
3937 
3938  // TODO: What is the 'or' equivalent of this fold?
3939  // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
3940  if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
3941  IsInteger && CC0 == ISD::SETNE &&
3942  ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
3943  (isAllOnesConstant(LR) && isNullConstant(RR)))) {
3944  SDValue One = DAG.getConstant(1, DL, OpVT);
3945  SDValue Two = DAG.getConstant(2, DL, OpVT);
3946  SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
3947  AddToWorklist(Add.getNode());
3948  return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
3949  }
3950 
3951  // Try more general transforms if the predicates match and the only user of
3952  // the compares is the 'and' or 'or'.
3953  if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
3954  N0.hasOneUse() && N1.hasOneUse()) {
3955  // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
3956  // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
3957  if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
3958  SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
3959  SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
3960  SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
3961  SDValue Zero = DAG.getConstant(0, DL, OpVT);
3962  return DAG.getSetCC(DL, VT, Or, Zero, CC1);
3963  }
3964  }
3965 
3966  // Canonicalize equivalent operands to LL == RL.
3967  if (LL == RR && LR == RL) {
3968  CC1 = ISD::getSetCCSwappedOperands(CC1);
3969  std::swap(RL, RR);
3970  }
3971 
3972  // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3973  // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3974  if (LL == RL && LR == RR) {
3975  ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
3976  : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
3977  if (NewCC != ISD::SETCC_INVALID &&
3978  (!LegalOperations ||
3979  (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
3980  TLI.isOperationLegal(ISD::SETCC, OpVT))))
3981  return DAG.getSetCC(DL, VT, LL, LR, NewCC);
3982  }
3983 
3984  return SDValue();
3985 }
3986 
3987 /// This contains all DAGCombine rules which reduce two values combined by
3988 /// an And operation to a single value. This makes them reusable in the context
3989 /// of visitSELECT(). Rules involving constants are not included as
3990 /// visitSELECT() already handles those cases.
3991 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
3992  EVT VT = N1.getValueType();
3993  SDLoc DL(N);
3994 
3995  // fold (and x, undef) -> 0
3996  if (N0.isUndef() || N1.isUndef())
3997  return DAG.getConstant(0, DL, VT);
3998 
3999  if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
4000  return V;
4001 
4002  if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
4003  VT.getSizeInBits() <= 64) {
4004  if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4005  if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
4006  // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
4007  // immediate for an add, but it is legal if its top c2 bits are set,
4008  // transform the ADD so the immediate doesn't need to be materialized
4009  // in a register.
4010  APInt ADDC = ADDI->getAPIntValue();
4011  APInt SRLC = SRLI->getAPIntValue();
4012  if (ADDC.getMinSignedBits() <= 64 &&
4013  SRLC.ult(VT.getSizeInBits()) &&
4014  !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4016  SRLC.getZExtValue());
4017  if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
4018  ADDC |= Mask;
4019  if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4020  SDLoc DL0(N0);
4021  SDValue NewAdd =
4022  DAG.getNode(ISD::ADD, DL0, VT,
4023  N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
4024  CombineTo(N0.getNode(), NewAdd);
4025  // Return N so it doesn't get rechecked!
4026  return SDValue(N, 0);
4027  }
4028  }
4029  }
4030  }
4031  }
4032  }
4033 
4034  // Reduce bit extract of low half of an integer to the narrower type.
4035  // (and (srl i64:x, K), KMask) ->
4036  // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
4037  if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
4038  if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
4039  if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4040  unsigned Size = VT.getSizeInBits();
4041  const APInt &AndMask = CAnd->getAPIntValue();
4042  unsigned ShiftBits = CShift->getZExtValue();
4043 
4044  // Bail out, this node will probably disappear anyway.
4045  if (ShiftBits == 0)
4046  return SDValue();
4047 
4048  unsigned MaskBits = AndMask.countTrailingOnes();
4049  EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
4050 
4051  if (AndMask.isMask() &&
4052  // Required bits must not span the two halves of the integer and
4053  // must fit in the half size type.
4054  (ShiftBits + MaskBits <= Size / 2) &&
4055  TLI.isNarrowingProfitable(VT, HalfVT) &&
4056  TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
4057  TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
4058  TLI.isTruncateFree(VT, HalfVT) &&
4059  TLI.isZExtFree(HalfVT, VT)) {
4060  // The isNarrowingProfitable is to avoid regressions on PPC and
4061  // AArch64 which match a few 64-bit bit insert / bit extract patterns
4062  // on downstream users of this. Those patterns could probably be
4063  // extended to handle extensions mixed in.
4064 
4065  SDValue SL(N0);
4066  assert(MaskBits <= Size);
4067 
4068  // Extracting the highest bit of the low half.
4069  EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
4070  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
4071  N0.getOperand(0));
4072 
4073  SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
4074  SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
4075  SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
4076  SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
4077  return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
4078  }
4079  }
4080  }
4081  }
4082 
4083  return SDValue();
4084 }
4085 
4086 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
4087  EVT LoadResultTy, EVT &ExtVT) {
4088  if (!AndC->getAPIntValue().isMask())
4089  return false;
4090 
4091  unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
4092 
4093  ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4094  EVT LoadedVT = LoadN->getMemoryVT();
4095 
4096  if (ExtVT == LoadedVT &&
4097  (!LegalOperations ||
4098  TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
4099  // ZEXTLOAD will match without needing to change the size of the value being
4100  // loaded.
4101  return true;
4102  }
4103 
4104  // Do not change the width of a volatile load.
4105  if (LoadN->isVolatile())
4106  return false;
4107 
4108  // Do not generate loads of non-round integer types since these can
4109  // be expensive (and would be wrong if the type is not byte sized).
4110  if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
4111  return false;
4112 
4113  if (LegalOperations &&
4114  !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
4115  return false;
4116 
4117  if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
4118  return false;
4119 
4120  return true;
4121 }
4122 
4123 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
4124  ISD::LoadExtType ExtType, EVT &MemVT,
4125  unsigned ShAmt) {
4126  if (!LDST)
4127  return false;
4128  // Only allow byte offsets.
4129  if (ShAmt % 8)
4130  return false;
4131 
4132  // Do not generate loads of non-round integer types since these can
4133  // be expensive (and would be wrong if the type is not byte sized).
4134  if (!MemVT.isRound())
4135  return false;
4136 
4137  // Don't change the width of a volatile load.
4138  if (LDST->isVolatile())
4139  return false;
4140 
4141  // Verify that we are actually reducing a load width here.
4142  if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
4143  return false;
4144 
4145  // Ensure that this isn't going to produce an unsupported unaligned access.
4146  if (ShAmt &&
4147  !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
4148  LDST->getAddressSpace(), ShAmt / 8))
4149  return false;
4150 
4151  // It's not possible to generate a constant of extended or untyped type.
4152  EVT PtrType = LDST->getBasePtr().getValueType();
4153  if (PtrType == MVT::Untyped || PtrType.isExtended())
4154  return false;
4155 
4156  if (isa<LoadSDNode>(LDST)) {
4157  LoadSDNode *Load = cast<LoadSDNode>(LDST);
4158  // Don't transform one with multiple uses, this would require adding a new
4159  // load.
4160  if (!SDValue(Load, 0).hasOneUse())
4161  return false;
4162 
4163  if (LegalOperations &&
4164  !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
4165  return false;
4166 
4167  // For the transform to be legal, the load must produce only two values
4168  // (the value loaded and the chain). Don't transform a pre-increment
4169  // load, for example, which produces an extra value. Otherwise the
4170  // transformation is not equivalent, and the downstream logic to replace
4171  // uses gets things wrong.
4172  if (Load->getNumValues() > 2)
4173  return false;
4174 
4175  // If the load that we're shrinking is an extload and we're not just
4176  // discarding the extension we can't simply shrink the load. Bail.
4177  // TODO: It would be possible to merge the extensions in some cases.
4178  if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
4179  Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4180  return false;
4181 
4182  if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
4183  return false;
4184  } else {
4185  assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
4186  StoreSDNode *Store = cast<StoreSDNode>(LDST);
4187  // Can't write outside the original store
4188  if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4189  return false;
4190 
4191  if (LegalOperations &&
4192  !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
4193  return false;
4194  }
4195  return true;
4196 }
4197 
4198 bool DAGCombiner::SearchForAndLoads(SDNode *N,
4200  SmallPtrSetImpl<SDNode*> &NodesWithConsts,
4202  SDNode *&NodeToMask) {
4203  // Recursively search for the operands, looking for loads which can be
4204  // narrowed.
4205  for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
4206  SDValue Op = N->getOperand(i);
4207 
4208  if (Op.getValueType().isVector())
4209  return false;
4210 
4211  // Some constants may need fixing up later if they are too large.
4212  if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4213  if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
4214  (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
4215  NodesWithConsts.insert(N);
4216  continue;
4217  }
4218 
4219  if (!Op.hasOneUse())
4220  return false;
4221 
4222  switch(Op.getOpcode()) {
4223  case ISD::LOAD: {
4224  auto *Load = cast<LoadSDNode>(Op);
4225  EVT ExtVT;
4226  if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
4227  isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
4228 
4229  // ZEXTLOAD is already small enough.
4230  if (Load->getExtensionType() == ISD::ZEXTLOAD &&
4231  ExtVT.bitsGE(Load->getMemoryVT()))
4232  continue;
4233 
4234  // Use LE to convert equal sized loads to zext.
4235  if (ExtVT.bitsLE(Load->getMemoryVT()))
4236  Loads.push_back(Load);
4237 
4238  continue;
4239  }
4240  return false;
4241  }
4242  case ISD::ZERO_EXTEND:
4243  case ISD::AssertZext: {
4244  unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
4245  EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4246  EVT VT = Op.getOpcode() == ISD::AssertZext ?
4247  cast<VTSDNode>(Op.getOperand(1))->getVT() :
4248  Op.getOperand(0).getValueType();
4249 
4250  // We can accept extending nodes if the mask is wider or an equal
4251  // width to the original type.
4252  if (ExtVT.bitsGE(VT))
4253  continue;
4254  break;
4255  }
4256  case ISD::OR:
4257  case ISD::XOR:
4258  case ISD::AND:
4259  if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
4260  NodeToMask))
4261  return false;
4262  continue;
4263  }
4264 
4265  // Allow one node which will masked along with any loads found.
4266  if (NodeToMask)
4267  return false;
4268 
4269  // Also ensure that the node to be masked only produces one data result.
4270  NodeToMask = Op.getNode();
4271  if (NodeToMask->getNumValues() > 1) {
4272  bool HasValue = false;
4273  for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
4274  MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
4275  if (VT != MVT::Glue && VT != MVT::Other) {
4276  if (HasValue) {
4277  NodeToMask = nullptr;
4278  return false;
4279  }
4280  HasValue = true;
4281  }
4282  }
4283  assert(HasValue && "Node to be masked has no data result?");
4284  }
4285  }
4286  return true;
4287 }
4288 
4289 bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
4290  auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
4291  if (!Mask)
4292  return false;
4293 
4294  if (!Mask->getAPIntValue().isMask())
4295  return false;
4296 
4297  // No need to do anything if the and directly uses a load.
4298  if (isa<LoadSDNode>(N->getOperand(0)))
4299  return false;
4300 
4302  SmallPtrSet<SDNode*, 2> NodesWithConsts;
4303  SDNode *FixupNode = nullptr;
4304  if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
4305  if (Loads.size() == 0)
4306  return false;
4307 
4308  LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
4309  SDValue MaskOp = N->getOperand(1);
4310 
4311  // If it exists, fixup the single node we allow in the tree that needs
4312  // masking.
4313  if (FixupNode) {
4314  LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
4315  SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
4316  FixupNode->getValueType(0),
4317  SDValue(FixupNode, 0), MaskOp);
4318  DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
4319  if (And.getOpcode() == ISD ::AND)
4320  DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
4321  }
4322 
4323  // Narrow any constants that need it.
4324  for (auto *LogicN : NodesWithConsts) {
4325  SDValue Op0 = LogicN->getOperand(0);
4326  SDValue Op1 = LogicN->getOperand(1);
4327 
4328  if (isa<ConstantSDNode>(Op0))
4329  std::swap(Op0, Op1);
4330 
4331  SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
4332  Op1, MaskOp);
4333 
4334  DAG.UpdateNodeOperands(LogicN, Op0, And);
4335  }
4336 
4337  // Create narrow loads.
4338  for (auto *Load : Loads) {
4339  LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
4340  SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
4341  SDValue(Load, 0), MaskOp);
4342  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
4343  if (And.getOpcode() == ISD ::AND)
4344  And = SDValue(
4345  DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
4346  SDValue NewLoad = ReduceLoadWidth(And.getNode());
4347  assert(NewLoad &&
4348  "Shouldn't be masking the load if it can't be narrowed");
4349  CombineTo(Load, NewLoad, NewLoad.getValue(1));
4350  }
4351  DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
4352  return true;
4353  }
4354  return false;
4355 }
4356 
4357 // Unfold
4358 // x & (-1 'logical shift' y)
4359 // To
4360 // (x 'opposite logical shift' y) 'logical shift' y
4361 // if it is better for performance.
4362 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
4363  assert(N->getOpcode() == ISD::AND);
4364 
4365  SDValue N0 = N->getOperand(0);
4366  SDValue N1 = N->getOperand(1);
4367 
4368  // Do we actually prefer shifts over mask?
4369  if (!TLI.preferShiftsToClearExtremeBits(N0))
4370  return SDValue();
4371 
4372  // Try to match (-1 '[outer] logical shift' y)
4373  unsigned OuterShift;
4374  unsigned InnerShift; // The opposite direction to the OuterShift.
4375  SDValue Y; // Shift amount.
4376  auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
4377  if (!M.hasOneUse())
4378  return false;
4379  OuterShift = M->getOpcode();
4380  if (OuterShift == ISD::SHL)
4381  InnerShift = ISD::SRL;
4382  else if (OuterShift == ISD::SRL)
4383  InnerShift = ISD::SHL;
4384  else
4385  return false;
4386  if (!isAllOnesConstant(M->getOperand(0)))
4387  return false;
4388  Y = M->getOperand(1);
4389  return true;
4390  };
4391 
4392  SDValue X;
4393  if (matchMask(N1))
4394  X = N0;
4395  else if (matchMask(N0))
4396  X = N1;
4397  else
4398  return SDValue();
4399 
4400  SDLoc DL(N);
4401  EVT VT = N->getValueType(0);
4402 
4403  // tmp = x 'opposite logical shift' y
4404  SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
4405  // ret = tmp 'logical shift' y
4406  SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
4407 
4408  return T1;
4409 }
4410 
4411 SDValue DAGCombiner::visitAND(SDNode *N) {
4412  SDValue N0 = N->getOperand(0);
4413  SDValue N1 = N->getOperand(1);
4414  EVT VT = N1.getValueType();
4415 
4416  // x & x --> x
4417  if (N0 == N1)
4418  return N0;
4419 
4420  // fold vector ops
4421  if (VT.isVector()) {
4422  if (SDValue FoldedVOp = SimplifyVBinOp(N))
4423  return FoldedVOp;
4424 
4425  // fold (and x, 0) -> 0, vector edition
4427  // do not return N0, because undef node may exist in N0
4429  SDLoc(N), N0.getValueType());
4431  // do not return N1, because undef node may exist in N1
4433  SDLoc(N), N1.getValueType());
4434 
4435  // fold (and x, -1) -> x, vector edition
4437  return N1;
4439  return N0;
4440  }
4441 
4442  // fold (and c1, c2) -> c1&c2
4445  if (N0C && N1C && !N1C->isOpaque())
4446  return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
4447  // canonicalize constant to RHS
4450  return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
4451  // fold (and x, -1) -> x
4452  if (isAllOnesConstant(N1))
4453  return N0;
4454  // if (and x, c) is known to be zero, return 0
4455  unsigned BitWidth = VT.getScalarSizeInBits();
4456  if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
4457  APInt::getAllOnesValue(BitWidth)))
4458  return DAG.getConstant(0, SDLoc(N), VT);
4459 
4460  if (SDValue NewSel = foldBinOpIntoSelect(N))
4461  return NewSel;
4462 
4463  // reassociate and
4464  if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
4465  return RAND;
4466 
4467  // Try to convert a constant mask AND into a shuffle clear mask.
4468  if (VT.isVector())
4469  if (SDValue Shuffle = XformToShuffleWithZero(N))
4470  return Shuffle;
4471 
4472  // fold (and (or x, C), D) -> D if (C & D) == D
4473  auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
4474  return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
4475  };
4476  if (N0.getOpcode() == ISD::OR &&
4477  ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
4478  return N1;
4479  // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
4480  if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
4481  SDValue N0Op0 = N0.getOperand(0);
4482  APInt Mask = ~N1C->getAPIntValue();
4483  Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
4484  if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
4485  SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
4486  N0.getValueType(), N0Op0);
4487 
4488  // Replace uses of the AND with uses of the Zero extend node.
4489  CombineTo(N, Zext);
4490 
4491  // We actually want to replace all uses of the any_extend with the
4492  // zero_extend, to avoid duplicating things. This will later cause this
4493  // AND to be folded.
4494  CombineTo(N0.getNode(), Zext);
4495  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4496  }
4497  }
4498  // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
4499  // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
4500  // already be zero by virtue of the width of the base type of the load.
4501  //
4502  // the 'X' node here can either be nothing or an extract_vector_elt to catch
4503  // more cases.
4504  if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4506  N0.getOperand(0).getOpcode() == ISD::LOAD &&
4507  N0.getOperand(0).getResNo() == 0) ||
4508  (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
4509  LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
4510  N0 : N0.getOperand(0) );
4511 
4512  // Get the constant (if applicable) the zero'th operand is being ANDed with.
4513  // This can be a pure constant or a vector splat, in which case we treat the
4514  // vector as a scalar and use the splat value.
4516  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
4517  Constant = C->getAPIntValue();
4518  } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
4519  APInt SplatValue, SplatUndef;
4520  unsigned SplatBitSize;
4521  bool HasAnyUndefs;
4522  bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
4523  SplatBitSize, HasAnyUndefs);
4524  if (IsSplat) {
4525  // Undef bits can contribute to a possible optimisation if set, so
4526  // set them.
4527  SplatValue |= SplatUndef;
4528 
4529  // The splat value may be something like "0x00FFFFFF", which means 0 for
4530  // the first vector value and FF for the rest, repeating. We need a mask
4531  // that will apply equally to all members of the vector, so AND all the
4532  // lanes of the constant together.
4533  EVT VT = Vector->getValueType(0);
4534  unsigned BitWidth = VT.getScalarSizeInBits();
4535 
4536  // If the splat value has been compressed to a bitlength lower
4537  // than the size of the vector lane, we need to re-expand it to
4538  // the lane size.
4539  if (BitWidth > SplatBitSize)
4540  for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
4541  SplatBitSize < BitWidth;
4542  SplatBitSize = SplatBitSize * 2)
4543  SplatValue |= SplatValue.shl(SplatBitSize);
4544 
4545  // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
4546  // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
4547  if (SplatBitSize % BitWidth == 0) {
4548  Constant = APInt::getAllOnesValue(BitWidth);
4549  for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
4550  Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
4551  }
4552  }
4553  }
4554 
4555  // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
4556  // actually legal and isn't going to get expanded, else this is a false
4557  // optimisation.
4558  bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
4559  Load->getValueType(0),
4560  Load->getMemoryVT());
4561 
4562  // Resize the constant to the same size as the original memory access before
4563  // extension. If it is still the AllOnesValue then this AND is completely
4564  // unneeded.
4565  Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
4566 
4567  bool B;
4568  switch (Load->getExtensionType()) {
4569  default: B = false; break;
4570  case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
4571  case ISD::ZEXTLOAD:
4572  case ISD::NON_EXTLOAD: B = true; break;
4573  }
4574 
4575  if (B && Constant.isAllOnesValue()) {
4576  // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
4577  // preserve semantics once we get rid of the AND.
4578  SDValue NewLoad(Load, 0);
4579 
4580  // Fold the AND away. NewLoad may get replaced immediately.
4581  CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
4582 
4583  if (Load->getExtensionType() == ISD::EXTLOAD) {
4584  NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
4585  Load->getValueType(0), SDLoc(Load),
4586  Load->getChain(), Load->getBasePtr(),
4587  Load->getOffset(), Load->getMemoryVT(),
4588  Load->getMemOperand());
4589  // Replace uses of the EXTLOAD with the new ZEXTLOAD.
4590  if (Load->getNumValues() == 3) {
4591  // PRE/POST_INC loads have 3 values.
4592  SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
4593  NewLoad.getValue(2) };
4594  CombineTo(Load, To, 3, true);
4595  } else {
4596  CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
4597  }
4598  }
4599 
4600  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4601  }
4602  }
4603 
4604  // fold (and (load x), 255) -> (zextload x, i8)
4605  // fold (and (extload x, i16), 255) -> (zextload x, i8)
4606  // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
4607  if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
4608  (N0.getOpcode() == ISD::ANY_EXTEND &&
4609  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
4610  if (SDValue Res = ReduceLoadWidth(N)) {
4611  LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
4612  ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
4613 
4614  AddToWorklist(N);
4615  CombineTo(LN0, Res, Res.getValue(1));
4616  return SDValue(N, 0);
4617  }
4618  }
4619 
4620  if (Level >= AfterLegalizeTypes) {
4621  // Attempt to propagate the AND back up to the leaves which, if they're
4622  // loads, can be combined to narrow loads and the AND node can be removed.
4623  // Perform after legalization so that extend nodes will already be
4624  // combined into the loads.
4625  if (BackwardsPropagateMask(N, DAG)) {
4626  return SDValue(N, 0);
4627  }
4628  }
4629 
4630  if (SDValue Combined = visitANDLike(N0, N1, N))
4631  return Combined;
4632 
4633  // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
4634  if (N0.getOpcode() == N1.getOpcode())
4635  if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
4636  return V;
4637 
4638  // Masking the negated extension of a boolean is just the zero-extended
4639  // boolean:
4640  // and (sub 0, zext(bool X)), 1 --> zext(bool X)
4641  // and (sub 0, sext(bool X)), 1 --> zext(bool X)
4642  //
4643  // Note: the SimplifyDemandedBits fold below can make an information-losing
4644  // transform, and then we have no way to find this better fold.
4645  if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
4646  if (isNullOrNullSplat(N0.getOperand(0))) {
4647  SDValue SubRHS = N0.getOperand(1);
4648  if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
4649  SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4650  return SubRHS;
4651  if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
4652  SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4653  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
4654  }
4655  }
4656 
4657  // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
4658  // fold (and (sra)) -> (and (srl)) when possible.
4659  if (SimplifyDemandedBits(SDValue(N, 0)))
4660  return SDValue(N, 0);
4661 
4662  // fold (zext_inreg (extload x)) -> (zextload x)
4663  if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
4664  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4665  EVT MemVT = LN0->getMemoryVT();
4666  // If we zero all the possible extended bits, then we can turn this into
4667  // a zextload if we are running before legalize or the operation is legal.
4668  unsigned BitWidth = N1.getScalarValueSizeInBits();
4669  if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4670  BitWidth - MemVT.getScalarSizeInBits())) &&
4671  ((!LegalOperations && !LN0->isVolatile()) ||
4672  TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
4673  SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4674  LN0->getChain(), LN0->getBasePtr(),
4675  MemVT, LN0->getMemOperand());
4676  AddToWorklist(N);
4677  CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4678  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4679  }
4680  }
4681  // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
4682  if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
4683  N0.hasOneUse()) {
4684  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4685  EVT MemVT = LN0->getMemoryVT();
4686  // If we zero all the possible extended bits, then we can turn this into
4687  // a zextload if we are running before legalize or the operation is legal.
4688  unsigned BitWidth = N1.getScalarValueSizeInBits();
4689  if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4690  BitWidth - MemVT.getScalarSizeInBits())) &&
4691  ((!LegalOperations && !LN0->isVolatile()) ||
4692  TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
4693  SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4694  LN0->getChain(), LN0->getBasePtr(),
4695  MemVT, LN0->getMemOperand());
4696  AddToWorklist(N);
4697  CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4698  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4699  }
4700  }
4701  // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
4702  if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
4703  if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
4704  N0.getOperand(1), false))
4705  return BSwap;
4706  }
4707 
4708  if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
4709  return Shifts;
4710 
4711  return SDValue();
4712 }
4713 
4714 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
4715 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
4716  bool DemandHighBits) {
4717  if (!LegalOperations)
4718  return SDValue();
4719 
4720  EVT VT = N->getValueType(0);
4721  if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
4722  return SDValue();
4723  if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
4724  return SDValue();
4725 
4726  // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
4727  bool LookPassAnd0 = false;
4728  bool LookPassAnd1 = false;
4729  if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
4730  std::swap(N0, N1);
4731  if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
4732  std::swap(N0, N1);
4733  if (N0.getOpcode() == ISD::AND) {
4734  if (!N0.getNode()->hasOneUse())
4735  return SDValue();
4737  // Also handle 0xffff since the LHS is guaranteed to have zeros there.
4738  // This is needed for X86.
4739  if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
4740  N01C->getZExtValue() != 0xFFFF))
4741  return SDValue();
4742  N0 = N0.getOperand(0);
4743  LookPassAnd0 = true;
4744  }
4745 
4746  if (N1.getOpcode() == ISD::AND) {
4747  if (!N1.getNode()->hasOneUse())
4748  return SDValue();
4750  if (!N11C || N11C->getZExtValue() != 0xFF)
4751  return SDValue();
4752  N1 = N1.getOperand(0);
4753  LookPassAnd1 = true;
4754  }
4755 
4756  if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
4757  std::swap(N0, N1);
4758  if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
4759  return SDValue();
4760  if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
4761  return SDValue();
4762 
4765  if (!N01C || !N11C)
4766  return SDValue();
4767  if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
4768  return SDValue();
4769 
4770  // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
4771  SDValue N00 = N0->getOperand(0);
4772  if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
4773  if (!N00.getNode()->hasOneUse())
4774  return SDValue();
4776  if (!N001C || N001C->getZExtValue() != 0xFF)
4777  return SDValue();
4778  N00 = N00.getOperand(0);
4779  LookPassAnd0 = true;
4780  }
4781 
4782  SDValue N10 = N1->getOperand(0);
4783  if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
4784  if (!N10.getNode()->hasOneUse())
4785  return SDValue();
4787  // Also allow 0xFFFF since the bits will be shifted out. This is needed
4788  // for X86.
4789  if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
4790  N101C->getZExtValue() != 0xFFFF))
4791  return SDValue();
4792  N10 = N10.getOperand(0);
4793  LookPassAnd1 = true;
4794  }
4795 
4796  if (N00 != N10)
4797  return SDValue();
4798 
4799  // Make sure everything beyond the low halfword gets set to zero since the SRL
4800  // 16 will clear the top bits.
4801  unsigned OpSizeInBits = VT.getSizeInBits();
4802  if (DemandHighBits && OpSizeInBits > 16) {
4803  // If the left-shift isn't masked out then the only way this is a bswap is
4804  // if all bits beyond the low 8 are 0. In that case the entire pattern
4805  // reduces to a left shift anyway: leave it for other parts of the combiner.
4806  if (!LookPassAnd0)
4807  return SDValue();
4808 
4809  // However, if the right shift isn't masked out then it might be because
4810  // it's not needed. See if we can spot that too.
4811  if (!LookPassAnd1 &&
4812  !DAG.MaskedValueIsZero(
4813  N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
4814  return SDValue();
4815  }
4816 
4817  SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
4818  if (OpSizeInBits > 16) {
4819  SDLoc DL(N);
4820  Res = DAG.getNode(ISD::SRL, DL, VT, Res,
4821  DAG.getConstant(OpSizeInBits - 16, DL,
4822  getShiftAmountTy(VT)));
4823  }
4824  return Res;
4825 }
4826 
4827 /// Return true if the specified node is an element that makes up a 32-bit
4828 /// packed halfword byteswap.
4829 /// ((x & 0x000000ff) << 8) |
4830 /// ((x & 0x0000ff00) >> 8) |
4831 /// ((x & 0x00ff0000) << 8) |
4832 /// ((x & 0xff000000) >> 8)
4834  if (!N.getNode()->hasOneUse())
4835  return false;
4836 
4837  unsigned Opc = N.getOpcode();
4838  if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
4839  return false;
4840 
4841  SDValue N0 = N.getOperand(0);
4842  unsigned Opc0 = N0.getOpcode();
4843  if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
4844  return false;
4845 
4846  ConstantSDNode *N1C = nullptr;
4847  // SHL or SRL: look upstream for AND mask operand
4848  if (Opc == ISD::AND)
4849  N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4850  else if (Opc0 == ISD::AND)
4851  N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4852  if (!N1C)
4853  return false;
4854 
4855  unsigned MaskByteOffset;
4856  switch (N1C->getZExtValue()) {
4857  default:
4858  return