LLVM  9.0.0svn
DAGCombiner.cpp
Go to the documentation of this file.
1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10 // both before and after the DAG is legalized.
11 //
12 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13 // primarily intended to handle simplification opportunities that are implicit
14 // in the LLVM IR and exposed by the various codegen lowering phases.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/IntervalMap.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SetVector.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/Statistic.h"
48 #include "llvm/IR/Attributes.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/DataLayout.h"
51 #include "llvm/IR/DerivedTypes.h"
52 #include "llvm/IR/Function.h"
53 #include "llvm/IR/LLVMContext.h"
54 #include "llvm/IR/Metadata.h"
55 #include "llvm/Support/Casting.h"
56 #include "llvm/Support/CodeGen.h"
58 #include "llvm/Support/Compiler.h"
59 #include "llvm/Support/Debug.h"
61 #include "llvm/Support/KnownBits.h"
67 #include <algorithm>
68 #include <cassert>
69 #include <cstdint>
70 #include <functional>
71 #include <iterator>
72 #include <string>
73 #include <tuple>
74 #include <utility>
75 
76 using namespace llvm;
77 
78 #define DEBUG_TYPE "dagcombine"
79 
80 STATISTIC(NodesCombined , "Number of dag nodes combined");
81 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
82 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
83 STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
84 STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
85 STATISTIC(SlicedLoads, "Number of load sliced");
86 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
87 
88 static cl::opt<bool>
89 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
90  cl::desc("Enable DAG combiner's use of IR alias analysis"));
91 
92 static cl::opt<bool>
93 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
94  cl::desc("Enable DAG combiner's use of TBAA"));
95 
96 #ifndef NDEBUG
98 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
99  cl::desc("Only use DAG-combiner alias analysis in this"
100  " function"));
101 #endif
102 
103 /// Hidden option to stress test load slicing, i.e., when this option
104 /// is enabled, load slicing bypasses most of its profitability guards.
105 static cl::opt<bool>
106 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
107  cl::desc("Bypass the profitability model of load slicing"),
108  cl::init(false));
109 
110 static cl::opt<bool>
111  MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
112  cl::desc("DAG combiner may split indexing from loads"));
113 
114 namespace {
115 
116  class DAGCombiner {
117  SelectionDAG &DAG;
118  const TargetLowering &TLI;
120  CodeGenOpt::Level OptLevel;
121  bool LegalOperations = false;
122  bool LegalTypes = false;
123  bool ForCodeSize;
124 
125  /// Worklist of all of the nodes that need to be simplified.
126  ///
127  /// This must behave as a stack -- new nodes to process are pushed onto the
128  /// back and when processing we pop off of the back.
129  ///
130  /// The worklist will not contain duplicates but may contain null entries
131  /// due to nodes being deleted from the underlying DAG.
132  SmallVector<SDNode *, 64> Worklist;
133 
134  /// Mapping from an SDNode to its position on the worklist.
135  ///
136  /// This is used to find and remove nodes from the worklist (by nulling
137  /// them) when they are deleted from the underlying DAG. It relies on
138  /// stable indices of nodes within the worklist.
139  DenseMap<SDNode *, unsigned> WorklistMap;
140 
141  /// Set of nodes which have been combined (at least once).
142  ///
143  /// This is used to allow us to reliably add any operands of a DAG node
144  /// which have not yet been combined to the worklist.
145  SmallPtrSet<SDNode *, 32> CombinedNodes;
146 
147  // AA - Used for DAG load/store alias analysis.
148  AliasAnalysis *AA;
149 
150  /// When an instruction is simplified, add all users of the instruction to
151  /// the work lists because they might get more simplified now.
152  void AddUsersToWorklist(SDNode *N) {
153  for (SDNode *Node : N->uses())
154  AddToWorklist(Node);
155  }
156 
157  /// Call the node-specific routine that folds each particular type of node.
158  SDValue visit(SDNode *N);
159 
160  public:
161  DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
162  : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
163  OptLevel(OL), AA(AA) {
164  ForCodeSize = DAG.getMachineFunction().getFunction().optForSize();
165 
166  MaximumLegalStoreInBits = 0;
167  for (MVT VT : MVT::all_valuetypes())
168  if (EVT(VT).isSimple() && VT != MVT::Other &&
169  TLI.isTypeLegal(EVT(VT)) &&
170  VT.getSizeInBits() >= MaximumLegalStoreInBits)
171  MaximumLegalStoreInBits = VT.getSizeInBits();
172  }
173 
174  /// Add to the worklist making sure its instance is at the back (next to be
175  /// processed.)
176  void AddToWorklist(SDNode *N) {
178  "Deleted Node added to Worklist");
179 
180  // Skip handle nodes as they can't usefully be combined and confuse the
181  // zero-use deletion strategy.
182  if (N->getOpcode() == ISD::HANDLENODE)
183  return;
184 
185  if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
186  Worklist.push_back(N);
187  }
188 
189  /// Remove all instances of N from the worklist.
190  void removeFromWorklist(SDNode *N) {
191  CombinedNodes.erase(N);
192 
193  auto It = WorklistMap.find(N);
194  if (It == WorklistMap.end())
195  return; // Not in the worklist.
196 
197  // Null out the entry rather than erasing it to avoid a linear operation.
198  Worklist[It->second] = nullptr;
199  WorklistMap.erase(It);
200  }
201 
202  void deleteAndRecombine(SDNode *N);
203  bool recursivelyDeleteUnusedNodes(SDNode *N);
204 
205  /// Replaces all uses of the results of one DAG node with new values.
206  SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
207  bool AddTo = true);
208 
209  /// Replaces all uses of the results of one DAG node with new values.
210  SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
211  return CombineTo(N, &Res, 1, AddTo);
212  }
213 
214  /// Replaces all uses of the results of one DAG node with new values.
215  SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
216  bool AddTo = true) {
217  SDValue To[] = { Res0, Res1 };
218  return CombineTo(N, To, 2, AddTo);
219  }
220 
221  void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
222 
223  private:
224  unsigned MaximumLegalStoreInBits;
225 
226  /// Check the specified integer node value to see if it can be simplified or
227  /// if things it uses can be simplified by bit propagation.
228  /// If so, return true.
229  bool SimplifyDemandedBits(SDValue Op) {
230  unsigned BitWidth = Op.getScalarValueSizeInBits();
231  APInt Demanded = APInt::getAllOnesValue(BitWidth);
232  return SimplifyDemandedBits(Op, Demanded);
233  }
234 
235  /// Check the specified vector node value to see if it can be simplified or
236  /// if things it uses can be simplified as it only uses some of the
237  /// elements. If so, return true.
238  bool SimplifyDemandedVectorElts(SDValue Op) {
239  unsigned NumElts = Op.getValueType().getVectorNumElements();
240  APInt Demanded = APInt::getAllOnesValue(NumElts);
241  return SimplifyDemandedVectorElts(Op, Demanded);
242  }
243 
244  bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
245  bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
246  bool AssumeSingleUse = false);
247 
248  bool CombineToPreIndexedLoadStore(SDNode *N);
249  bool CombineToPostIndexedLoadStore(SDNode *N);
250  SDValue SplitIndexingFromLoad(LoadSDNode *LD);
251  bool SliceUpLoad(SDNode *N);
252 
253  // Scalars have size 0 to distinguish from singleton vectors.
254  SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
255  bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
256  bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
257 
258  /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
259  /// load.
260  ///
261  /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
262  /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
263  /// \param EltNo index of the vector element to load.
264  /// \param OriginalLoad load that EVE came from to be replaced.
265  /// \returns EVE on success SDValue() on failure.
266  SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
267  SDValue EltNo,
268  LoadSDNode *OriginalLoad);
269  void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
270  SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
271  SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
272  SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
273  SDValue PromoteIntBinOp(SDValue Op);
274  SDValue PromoteIntShiftOp(SDValue Op);
275  SDValue PromoteExtend(SDValue Op);
276  bool PromoteLoad(SDValue Op);
277 
278  /// Call the node-specific routine that knows how to fold each
279  /// particular type of node. If that doesn't do anything, try the
280  /// target-specific DAG combines.
281  SDValue combine(SDNode *N);
282 
283  // Visitation implementation - Implement dag node combining for different
284  // node types. The semantics are as follows:
285  // Return Value:
286  // SDValue.getNode() == 0 - No change was made
287  // SDValue.getNode() == N - N was replaced, is dead and has been handled.
288  // otherwise - N should be replaced by the returned Operand.
289  //
290  SDValue visitTokenFactor(SDNode *N);
291  SDValue visitMERGE_VALUES(SDNode *N);
292  SDValue visitADD(SDNode *N);
293  SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
294  SDValue visitSUB(SDNode *N);
295  SDValue visitADDSAT(SDNode *N);
296  SDValue visitSUBSAT(SDNode *N);
297  SDValue visitADDC(SDNode *N);
298  SDValue visitUADDO(SDNode *N);
299  SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
300  SDValue visitSUBC(SDNode *N);
301  SDValue visitUSUBO(SDNode *N);
302  SDValue visitADDE(SDNode *N);
303  SDValue visitADDCARRY(SDNode *N);
304  SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
305  SDValue visitSUBE(SDNode *N);
306  SDValue visitSUBCARRY(SDNode *N);
307  SDValue visitMUL(SDNode *N);
308  SDValue useDivRem(SDNode *N);
309  SDValue visitSDIV(SDNode *N);
310  SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
311  SDValue visitUDIV(SDNode *N);
312  SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
313  SDValue visitREM(SDNode *N);
314  SDValue visitMULHU(SDNode *N);
315  SDValue visitMULHS(SDNode *N);
316  SDValue visitSMUL_LOHI(SDNode *N);
317  SDValue visitUMUL_LOHI(SDNode *N);
318  SDValue visitSMULO(SDNode *N);
319  SDValue visitUMULO(SDNode *N);
320  SDValue visitIMINMAX(SDNode *N);
321  SDValue visitAND(SDNode *N);
322  SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
323  SDValue visitOR(SDNode *N);
324  SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
325  SDValue visitXOR(SDNode *N);
326  SDValue SimplifyVBinOp(SDNode *N);
327  SDValue visitSHL(SDNode *N);
328  SDValue visitSRA(SDNode *N);
329  SDValue visitSRL(SDNode *N);
330  SDValue visitFunnelShift(SDNode *N);
331  SDValue visitRotate(SDNode *N);
332  SDValue visitABS(SDNode *N);
333  SDValue visitBSWAP(SDNode *N);
334  SDValue visitBITREVERSE(SDNode *N);
335  SDValue visitCTLZ(SDNode *N);
336  SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
337  SDValue visitCTTZ(SDNode *N);
338  SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
339  SDValue visitCTPOP(SDNode *N);
340  SDValue visitSELECT(SDNode *N);
341  SDValue visitVSELECT(SDNode *N);
342  SDValue visitSELECT_CC(SDNode *N);
343  SDValue visitSETCC(SDNode *N);
344  SDValue visitSETCCCARRY(SDNode *N);
345  SDValue visitSIGN_EXTEND(SDNode *N);
346  SDValue visitZERO_EXTEND(SDNode *N);
347  SDValue visitANY_EXTEND(SDNode *N);
348  SDValue visitAssertExt(SDNode *N);
349  SDValue visitSIGN_EXTEND_INREG(SDNode *N);
350  SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
351  SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
352  SDValue visitTRUNCATE(SDNode *N);
353  SDValue visitBITCAST(SDNode *N);
354  SDValue visitBUILD_PAIR(SDNode *N);
355  SDValue visitFADD(SDNode *N);
356  SDValue visitFSUB(SDNode *N);
357  SDValue visitFMUL(SDNode *N);
358  SDValue visitFMA(SDNode *N);
359  SDValue visitFDIV(SDNode *N);
360  SDValue visitFREM(SDNode *N);
361  SDValue visitFSQRT(SDNode *N);
362  SDValue visitFCOPYSIGN(SDNode *N);
363  SDValue visitFPOW(SDNode *N);
364  SDValue visitSINT_TO_FP(SDNode *N);
365  SDValue visitUINT_TO_FP(SDNode *N);
366  SDValue visitFP_TO_SINT(SDNode *N);
367  SDValue visitFP_TO_UINT(SDNode *N);
368  SDValue visitFP_ROUND(SDNode *N);
369  SDValue visitFP_ROUND_INREG(SDNode *N);
370  SDValue visitFP_EXTEND(SDNode *N);
371  SDValue visitFNEG(SDNode *N);
372  SDValue visitFABS(SDNode *N);
373  SDValue visitFCEIL(SDNode *N);
374  SDValue visitFTRUNC(SDNode *N);
375  SDValue visitFFLOOR(SDNode *N);
376  SDValue visitFMINNUM(SDNode *N);
377  SDValue visitFMAXNUM(SDNode *N);
378  SDValue visitFMINIMUM(SDNode *N);
379  SDValue visitFMAXIMUM(SDNode *N);
380  SDValue visitBRCOND(SDNode *N);
381  SDValue visitBR_CC(SDNode *N);
382  SDValue visitLOAD(SDNode *N);
383 
384  SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
385  SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
386 
387  SDValue visitSTORE(SDNode *N);
388  SDValue visitINSERT_VECTOR_ELT(SDNode *N);
389  SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
390  SDValue visitBUILD_VECTOR(SDNode *N);
391  SDValue visitCONCAT_VECTORS(SDNode *N);
392  SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
393  SDValue visitVECTOR_SHUFFLE(SDNode *N);
394  SDValue visitSCALAR_TO_VECTOR(SDNode *N);
395  SDValue visitINSERT_SUBVECTOR(SDNode *N);
396  SDValue visitMLOAD(SDNode *N);
397  SDValue visitMSTORE(SDNode *N);
398  SDValue visitMGATHER(SDNode *N);
399  SDValue visitMSCATTER(SDNode *N);
400  SDValue visitFP_TO_FP16(SDNode *N);
401  SDValue visitFP16_TO_FP(SDNode *N);
402 
403  SDValue visitFADDForFMACombine(SDNode *N);
404  SDValue visitFSUBForFMACombine(SDNode *N);
405  SDValue visitFMULForFMADistributiveCombine(SDNode *N);
406 
407  SDValue XformToShuffleWithZero(SDNode *N);
408  SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
409  SDValue N1, SDNodeFlags Flags);
410 
411  SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
412 
413  SDValue foldSelectOfConstants(SDNode *N);
414  SDValue foldVSelectOfConstants(SDNode *N);
415  SDValue foldBinOpIntoSelect(SDNode *BO);
416  bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
417  SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
418  SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
419  SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
420  SDValue N2, SDValue N3, ISD::CondCode CC,
421  bool NotExtCompare = false);
422  SDValue convertSelectOfFPConstantsToLoadOffset(
423  const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
424  ISD::CondCode CC);
425  SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
426  SDValue N2, SDValue N3, ISD::CondCode CC);
427  SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
428  const SDLoc &DL);
429  SDValue unfoldMaskedMerge(SDNode *N);
430  SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
431  SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
432  const SDLoc &DL, bool foldBooleans);
433  SDValue rebuildSetCC(SDValue N);
434 
435  bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
436  SDValue &CC) const;
437  bool isOneUseSetCC(SDValue N) const;
438 
439  SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
440  unsigned HiOp);
441  SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
442  SDValue CombineExtLoad(SDNode *N);
443  SDValue CombineZExtLogicopShiftLoad(SDNode *N);
444  SDValue combineRepeatedFPDivisors(SDNode *N);
445  SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
446  SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
447  SDValue BuildSDIV(SDNode *N);
448  SDValue BuildSDIVPow2(SDNode *N);
449  SDValue BuildUDIV(SDNode *N);
450  SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
451  SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
452  SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
453  SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
454  SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
455  SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
456  SDNodeFlags Flags, bool Reciprocal);
457  SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
458  SDNodeFlags Flags, bool Reciprocal);
459  SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
460  bool DemandHighBits = true);
461  SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
462  SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
463  SDValue InnerPos, SDValue InnerNeg,
464  unsigned PosOpcode, unsigned NegOpcode,
465  const SDLoc &DL);
466  SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
467  SDValue MatchLoadCombine(SDNode *N);
468  SDValue ReduceLoadWidth(SDNode *N);
469  SDValue ReduceLoadOpStoreWidth(SDNode *N);
471  SDValue TransformFPLoadStorePair(SDNode *N);
472  SDValue convertBuildVecZextToZext(SDNode *N);
473  SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
474  SDValue reduceBuildVecToShuffle(SDNode *N);
475  SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
476  ArrayRef<int> VectorMask, SDValue VecIn1,
477  SDValue VecIn2, unsigned LeftIdx);
478  SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
479 
480  /// Walk up chain skipping non-aliasing memory nodes,
481  /// looking for aliasing nodes and adding them to the Aliases vector.
482  void GatherAllAliases(LSBaseSDNode *N, SDValue OriginalChain,
483  SmallVectorImpl<SDValue> &Aliases);
484 
485  /// Return true if there is any possibility that the two addresses overlap.
486  bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
487 
488  /// Walk up chain skipping non-aliasing memory nodes, looking for a better
489  /// chain (aliasing node.)
490  SDValue FindBetterChain(LSBaseSDNode *N, SDValue Chain);
491 
492  /// Try to replace a store and any possibly adjacent stores on
493  /// consecutive chains with better chains. Return true only if St is
494  /// replaced.
495  ///
496  /// Notice that other chains may still be replaced even if the function
497  /// returns false.
498  bool findBetterNeighborChains(StoreSDNode *St);
499 
500  // Helper for findBetterNeighborChains. Walk up store chain add additional
501  // chained stores that do not overlap and can be parallelized.
502  bool parallelizeChainedStores(StoreSDNode *St);
503 
504  /// Holds a pointer to an LSBaseSDNode as well as information on where it
505  /// is located in a sequence of memory operations connected by a chain.
506  struct MemOpLink {
507  // Ptr to the mem node.
508  LSBaseSDNode *MemNode;
509 
510  // Offset from the base ptr.
511  int64_t OffsetFromBase;
512 
513  MemOpLink(LSBaseSDNode *N, int64_t Offset)
514  : MemNode(N), OffsetFromBase(Offset) {}
515  };
516 
517  /// This is a helper function for visitMUL to check the profitability
518  /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
519  /// MulNode is the original multiply, AddNode is (add x, c1),
520  /// and ConstNode is c2.
521  bool isMulAddWithConstProfitable(SDNode *MulNode,
522  SDValue &AddNode,
523  SDValue &ConstNode);
524 
525  /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
526  /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
527  /// the type of the loaded value to be extended.
528  bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
529  EVT LoadResultTy, EVT &ExtVT);
530 
531  /// Helper function to calculate whether the given Load/Store can have its
532  /// width reduced to ExtVT.
533  bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
534  EVT &MemVT, unsigned ShAmt = 0);
535 
536  /// Used by BackwardsPropagateMask to find suitable loads.
537  bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
538  SmallPtrSetImpl<SDNode*> &NodesWithConsts,
539  ConstantSDNode *Mask, SDNode *&NodeToMask);
540  /// Attempt to propagate a given AND node back to load leaves so that they
541  /// can be combined into narrow loads.
542  bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
543 
544  /// Helper function for MergeConsecutiveStores which merges the
545  /// component store chains.
546  SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
547  unsigned NumStores);
548 
549  /// This is a helper function for MergeConsecutiveStores. When the
550  /// source elements of the consecutive stores are all constants or
551  /// all extracted vector elements, try to merge them into one
552  /// larger store introducing bitcasts if necessary. \return True
553  /// if a merged store was created.
554  bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
555  EVT MemVT, unsigned NumStores,
556  bool IsConstantSrc, bool UseVector,
557  bool UseTrunc);
558 
559  /// This is a helper function for MergeConsecutiveStores. Stores
560  /// that potentially may be merged with St are placed in
561  /// StoreNodes. RootNode is a chain predecessor to all store
562  /// candidates.
563  void getStoreMergeCandidates(StoreSDNode *St,
564  SmallVectorImpl<MemOpLink> &StoreNodes,
565  SDNode *&Root);
566 
567  /// Helper function for MergeConsecutiveStores. Checks if
568  /// candidate stores have indirect dependency through their
569  /// operands. RootNode is the predecessor to all stores calculated
570  /// by getStoreMergeCandidates and is used to prune the dependency check.
571  /// \return True if safe to merge.
572  bool checkMergeStoreCandidatesForDependencies(
573  SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
574  SDNode *RootNode);
575 
576  /// Merge consecutive store operations into a wide store.
577  /// This optimization uses wide integers or vectors when possible.
578  /// \return number of stores that were merged into a merged store (the
579  /// affected nodes are stored as a prefix in \p StoreNodes).
580  bool MergeConsecutiveStores(StoreSDNode *St);
581 
582  /// Try to transform a truncation where C is a constant:
583  /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
584  ///
585  /// \p N needs to be a truncation and its first operand an AND. Other
586  /// requirements are checked by the function (e.g. that trunc is
587  /// single-use) and if missed an empty SDValue is returned.
588  SDValue distributeTruncateThroughAnd(SDNode *N);
589 
590  /// Helper function to determine whether the target supports operation
591  /// given by \p Opcode for type \p VT, that is, whether the operation
592  /// is legal or custom before legalizing operations, and whether is
593  /// legal (but not custom) after legalization.
594  bool hasOperation(unsigned Opcode, EVT VT) {
595  if (LegalOperations)
596  return TLI.isOperationLegal(Opcode, VT);
597  return TLI.isOperationLegalOrCustom(Opcode, VT);
598  }
599 
600  public:
601  /// Runs the dag combiner on all nodes in the work list
602  void Run(CombineLevel AtLevel);
603 
604  SelectionDAG &getDAG() const { return DAG; }
605 
606  /// Returns a type large enough to hold any valid shift amount - before type
607  /// legalization these can be huge.
608  EVT getShiftAmountTy(EVT LHSTy) {
609  assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
610  return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
611  }
612 
613  /// This method returns true if we are running before type legalization or
614  /// if the specified VT is legal.
615  bool isTypeLegal(const EVT &VT) {
616  if (!LegalTypes) return true;
617  return TLI.isTypeLegal(VT);
618  }
619 
620  /// Convenience wrapper around TargetLowering::getSetCCResultType
621  EVT getSetCCResultType(EVT VT) const {
622  return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
623  }
624 
625  void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
626  SDValue OrigLoad, SDValue ExtLoad,
628  };
629 
630 /// This class is a DAGUpdateListener that removes any deleted
631 /// nodes from the worklist.
632 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
633  DAGCombiner &DC;
634 
635 public:
636  explicit WorklistRemover(DAGCombiner &dc)
637  : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
638 
639  void NodeDeleted(SDNode *N, SDNode *E) override {
640  DC.removeFromWorklist(N);
641  }
642 };
643 
644 } // end anonymous namespace
645 
646 //===----------------------------------------------------------------------===//
647 // TargetLowering::DAGCombinerInfo implementation
648 //===----------------------------------------------------------------------===//
649 
651  ((DAGCombiner*)DC)->AddToWorklist(N);
652 }
653 
655 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
656  return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
657 }
658 
660 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
661  return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
662 }
663 
665 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
666  return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
667 }
668 
671  return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
672 }
673 
674 //===----------------------------------------------------------------------===//
675 // Helper Functions
676 //===----------------------------------------------------------------------===//
677 
678 void DAGCombiner::deleteAndRecombine(SDNode *N) {
679  removeFromWorklist(N);
680 
681  // If the operands of this node are only used by the node, they will now be
682  // dead. Make sure to re-visit them and recursively delete dead nodes.
683  for (const SDValue &Op : N->ops())
684  // For an operand generating multiple values, one of the values may
685  // become dead allowing further simplification (e.g. split index
686  // arithmetic from an indexed load).
687  if (Op->hasOneUse() || Op->getNumValues() > 1)
688  AddToWorklist(Op.getNode());
689 
690  DAG.DeleteNode(N);
691 }
692 
693 /// Return 1 if we can compute the negated form of the specified expression for
694 /// the same cost as the expression itself, or 2 if we can compute the negated
695 /// form more cheaply than the expression itself.
696 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
697  const TargetLowering &TLI,
698  const TargetOptions *Options,
699  unsigned Depth = 0) {
700  // fneg is removable even if it has multiple uses.
701  if (Op.getOpcode() == ISD::FNEG) return 2;
702 
703  // Don't allow anything with multiple uses unless we know it is free.
704  EVT VT = Op.getValueType();
705  const SDNodeFlags Flags = Op->getFlags();
706  if (!Op.hasOneUse())
707  if (!(Op.getOpcode() == ISD::FP_EXTEND &&
708  TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
709  return 0;
710 
711  // Don't recurse exponentially.
712  if (Depth > 6) return 0;
713 
714  switch (Op.getOpcode()) {
715  default: return false;
716  case ISD::ConstantFP: {
717  if (!LegalOperations)
718  return 1;
719 
720  // Don't invert constant FP values after legalization unless the target says
721  // the negated constant is legal.
722  return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
723  TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
724  }
725  case ISD::FADD:
726  if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros())
727  return 0;
728 
729  // After operation legalization, it might not be legal to create new FSUBs.
730  if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
731  return 0;
732 
733  // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
734  if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
735  Options, Depth + 1))
736  return V;
737  // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
738  return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
739  Depth + 1);
740  case ISD::FSUB:
741  // We can't turn -(A-B) into B-A when we honor signed zeros.
742  if (!Options->NoSignedZerosFPMath &&
743  !Flags.hasNoSignedZeros())
744  return 0;
745 
746  // fold (fneg (fsub A, B)) -> (fsub B, A)
747  return 1;
748 
749  case ISD::FMUL:
750  case ISD::FDIV:
751  // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
752  if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
753  Options, Depth + 1))
754  return V;
755 
756  return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
757  Depth + 1);
758 
759  case ISD::FP_EXTEND:
760  case ISD::FP_ROUND:
761  case ISD::FSIN:
762  return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
763  Depth + 1);
764  }
765 }
766 
767 /// If isNegatibleForFree returns true, return the newly negated expression.
769  bool LegalOperations, unsigned Depth = 0) {
770  const TargetOptions &Options = DAG.getTarget().Options;
771  // fneg is removable even if it has multiple uses.
772  if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
773 
774  assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
775 
776  const SDNodeFlags Flags = Op.getNode()->getFlags();
777 
778  switch (Op.getOpcode()) {
779  default: llvm_unreachable("Unknown code");
780  case ISD::ConstantFP: {
781  APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
782  V.changeSign();
783  return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
784  }
785  case ISD::FADD:
786  assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros());
787 
788  // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
789  if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
790  DAG.getTargetLoweringInfo(), &Options, Depth+1))
791  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
792  GetNegatedExpression(Op.getOperand(0), DAG,
793  LegalOperations, Depth+1),
794  Op.getOperand(1), Flags);
795  // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
796  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
797  GetNegatedExpression(Op.getOperand(1), DAG,
798  LegalOperations, Depth+1),
799  Op.getOperand(0), Flags);
800  case ISD::FSUB:
801  // fold (fneg (fsub 0, B)) -> B
802  if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
803  if (N0CFP->isZero())
804  return Op.getOperand(1);
805 
806  // fold (fneg (fsub A, B)) -> (fsub B, A)
807  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
808  Op.getOperand(1), Op.getOperand(0), Flags);
809 
810  case ISD::FMUL:
811  case ISD::FDIV:
812  // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
813  if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
814  DAG.getTargetLoweringInfo(), &Options, Depth+1))
815  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
816  GetNegatedExpression(Op.getOperand(0), DAG,
817  LegalOperations, Depth+1),
818  Op.getOperand(1), Flags);
819 
820  // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
821  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
822  Op.getOperand(0),
823  GetNegatedExpression(Op.getOperand(1), DAG,
824  LegalOperations, Depth+1), Flags);
825 
826  case ISD::FP_EXTEND:
827  case ISD::FSIN:
828  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
829  GetNegatedExpression(Op.getOperand(0), DAG,
830  LegalOperations, Depth+1));
831  case ISD::FP_ROUND:
832  return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
833  GetNegatedExpression(Op.getOperand(0), DAG,
834  LegalOperations, Depth+1),
835  Op.getOperand(1));
836  }
837 }
838 
839 // APInts must be the same size for most operations, this helper
840 // function zero extends the shorter of the pair so that they match.
841 // We provide an Offset so that we can create bitwidths that won't overflow.
842 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
843  unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
844  LHS = LHS.zextOrSelf(Bits);
845  RHS = RHS.zextOrSelf(Bits);
846 }
847 
848 // Return true if this node is a setcc, or is a select_cc
849 // that selects between the target values used for true and false, making it
850 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
851 // the appropriate nodes based on the type of node we are checking. This
852 // simplifies life a bit for the callers.
853 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
854  SDValue &CC) const {
855  if (N.getOpcode() == ISD::SETCC) {
856  LHS = N.getOperand(0);
857  RHS = N.getOperand(1);
858  CC = N.getOperand(2);
859  return true;
860  }
861 
862  if (N.getOpcode() != ISD::SELECT_CC ||
863  !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
864  !TLI.isConstFalseVal(N.getOperand(3).getNode()))
865  return false;
866 
867  if (TLI.getBooleanContents(N.getValueType()) ==
869  return false;
870 
871  LHS = N.getOperand(0);
872  RHS = N.getOperand(1);
873  CC = N.getOperand(4);
874  return true;
875 }
876 
877 /// Return true if this is a SetCC-equivalent operation with only one use.
878 /// If this is true, it allows the users to invert the operation for free when
879 /// it is profitable to do so.
880 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
881  SDValue N0, N1, N2;
882  if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
883  return true;
884  return false;
885 }
886 
887 // Returns the SDNode if it is a constant float BuildVector
888 // or constant float.
890  if (isa<ConstantFPSDNode>(N))
891  return N.getNode();
893  return N.getNode();
894  return nullptr;
895 }
896 
897 // Determines if it is a constant integer or a build vector of constant
898 // integers (and undefs).
899 // Do not permit build vector implicit truncation.
900 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
901  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
902  return !(Const->isOpaque() && NoOpaques);
903  if (N.getOpcode() != ISD::BUILD_VECTOR)
904  return false;
905  unsigned BitWidth = N.getScalarValueSizeInBits();
906  for (const SDValue &Op : N->op_values()) {
907  if (Op.isUndef())
908  continue;
910  if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
911  (Const->isOpaque() && NoOpaques))
912  return false;
913  }
914  return true;
915 }
916 
917 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
918 // undef's.
919 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
920  if (V.getOpcode() != ISD::BUILD_VECTOR)
921  return false;
922  return isConstantOrConstantVector(V, NoOpaques) ||
924 }
925 
926 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
927  SDValue N1, SDNodeFlags Flags) {
928  // Don't reassociate reductions.
929  if (Flags.hasVectorReduction())
930  return SDValue();
931 
932  EVT VT = N0.getValueType();
933  if (N0.getOpcode() == Opc && !N0->getFlags().hasVectorReduction()) {
936  // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
937  if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
938  return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
939  return SDValue();
940  }
941  if (N0.hasOneUse()) {
942  // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
943  // use
944  SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
945  if (!OpNode.getNode())
946  return SDValue();
947  AddToWorklist(OpNode.getNode());
948  return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
949  }
950  }
951  }
952 
953  if (N1.getOpcode() == Opc && !N1->getFlags().hasVectorReduction()) {
956  // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
957  if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
958  return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
959  return SDValue();
960  }
961  if (N1.hasOneUse()) {
962  // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
963  // use
964  SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
965  if (!OpNode.getNode())
966  return SDValue();
967  AddToWorklist(OpNode.getNode());
968  return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
969  }
970  }
971  }
972 
973  return SDValue();
974 }
975 
976 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
977  bool AddTo) {
978  assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
979  ++NodesCombined;
980  LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
981  To[0].getNode()->dump(&DAG);
982  dbgs() << " and " << NumTo - 1 << " other values\n");
983  for (unsigned i = 0, e = NumTo; i != e; ++i)
984  assert((!To[i].getNode() ||
985  N->getValueType(i) == To[i].getValueType()) &&
986  "Cannot combine value to value of different type!");
987 
988  WorklistRemover DeadNodes(*this);
989  DAG.ReplaceAllUsesWith(N, To);
990  if (AddTo) {
991  // Push the new nodes and any users onto the worklist
992  for (unsigned i = 0, e = NumTo; i != e; ++i) {
993  if (To[i].getNode()) {
994  AddToWorklist(To[i].getNode());
995  AddUsersToWorklist(To[i].getNode());
996  }
997  }
998  }
999 
1000  // Finally, if the node is now dead, remove it from the graph. The node
1001  // may not be dead if the replacement process recursively simplified to
1002  // something else needing this node.
1003  if (N->use_empty())
1004  deleteAndRecombine(N);
1005  return SDValue(N, 0);
1006 }
1007 
1008 void DAGCombiner::
1009 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1010  // Replace all uses. If any nodes become isomorphic to other nodes and
1011  // are deleted, make sure to remove them from our worklist.
1012  WorklistRemover DeadNodes(*this);
1013  DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1014 
1015  // Push the new node and any (possibly new) users onto the worklist.
1016  AddToWorklist(TLO.New.getNode());
1017  AddUsersToWorklist(TLO.New.getNode());
1018 
1019  // Finally, if the node is now dead, remove it from the graph. The node
1020  // may not be dead if the replacement process recursively simplified to
1021  // something else needing this node.
1022  if (TLO.Old.getNode()->use_empty())
1023  deleteAndRecombine(TLO.Old.getNode());
1024 }
1025 
1026 /// Check the specified integer node value to see if it can be simplified or if
1027 /// things it uses can be simplified by bit propagation. If so, return true.
1028 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
1029  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1030  KnownBits Known;
1031  if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO))
1032  return false;
1033 
1034  // Revisit the node.
1035  AddToWorklist(Op.getNode());
1036 
1037  // Replace the old value with the new one.
1038  ++NodesCombined;
1039  LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1040  dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1041  dbgs() << '\n');
1042 
1043  CommitTargetLoweringOpt(TLO);
1044  return true;
1045 }
1046 
1047 /// Check the specified vector node value to see if it can be simplified or
1048 /// if things it uses can be simplified as it only uses some of the elements.
1049 /// If so, return true.
1050 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
1051  bool AssumeSingleUse) {
1052  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1053  APInt KnownUndef, KnownZero;
1054  if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO,
1055  0, AssumeSingleUse))
1056  return false;
1057 
1058  // Revisit the node.
1059  AddToWorklist(Op.getNode());
1060 
1061  // Replace the old value with the new one.
1062  ++NodesCombined;
1063  LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1064  dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1065  dbgs() << '\n');
1066 
1067  CommitTargetLoweringOpt(TLO);
1068  return true;
1069 }
1070 
1071 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1072  SDLoc DL(Load);
1073  EVT VT = Load->getValueType(0);
1074  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1075 
1076  LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1077  Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1078  WorklistRemover DeadNodes(*this);
1079  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1080  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1081  deleteAndRecombine(Load);
1082  AddToWorklist(Trunc.getNode());
1083 }
1084 
1085 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1086  Replace = false;
1087  SDLoc DL(Op);
1088  if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1089  LoadSDNode *LD = cast<LoadSDNode>(Op);
1090  EVT MemVT = LD->getMemoryVT();
1092  : LD->getExtensionType();
1093  Replace = true;
1094  return DAG.getExtLoad(ExtType, DL, PVT,
1095  LD->getChain(), LD->getBasePtr(),
1096  MemVT, LD->getMemOperand());
1097  }
1098 
1099  unsigned Opc = Op.getOpcode();
1100  switch (Opc) {
1101  default: break;
1102  case ISD::AssertSext:
1103  if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1104  return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1105  break;
1106  case ISD::AssertZext:
1107  if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1108  return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1109  break;
1110  case ISD::Constant: {
1111  unsigned ExtOpc =
1113  return DAG.getNode(ExtOpc, DL, PVT, Op);
1114  }
1115  }
1116 
1117  if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1118  return SDValue();
1119  return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1120 }
1121 
1122 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1124  return SDValue();
1125  EVT OldVT = Op.getValueType();
1126  SDLoc DL(Op);
1127  bool Replace = false;
1128  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1129  if (!NewOp.getNode())
1130  return SDValue();
1131  AddToWorklist(NewOp.getNode());
1132 
1133  if (Replace)
1134  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1135  return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1136  DAG.getValueType(OldVT));
1137 }
1138 
1139 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1140  EVT OldVT = Op.getValueType();
1141  SDLoc DL(Op);
1142  bool Replace = false;
1143  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1144  if (!NewOp.getNode())
1145  return SDValue();
1146  AddToWorklist(NewOp.getNode());
1147 
1148  if (Replace)
1149  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1150  return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1151 }
1152 
1153 /// Promote the specified integer binary operation if the target indicates it is
1154 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1155 /// i32 since i16 instructions are longer.
1156 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1157  if (!LegalOperations)
1158  return SDValue();
1159 
1160  EVT VT = Op.getValueType();
1161  if (VT.isVector() || !VT.isInteger())
1162  return SDValue();
1163 
1164  // If operation type is 'undesirable', e.g. i16 on x86, consider
1165  // promoting it.
1166  unsigned Opc = Op.getOpcode();
1167  if (TLI.isTypeDesirableForOp(Opc, VT))
1168  return SDValue();
1169 
1170  EVT PVT = VT;
1171  // Consult target whether it is a good idea to promote this operation and
1172  // what's the right type to promote it to.
1173  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1174  assert(PVT != VT && "Don't know what type to promote to!");
1175 
1176  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1177 
1178  bool Replace0 = false;
1179  SDValue N0 = Op.getOperand(0);
1180  SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1181 
1182  bool Replace1 = false;
1183  SDValue N1 = Op.getOperand(1);
1184  SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1185  SDLoc DL(Op);
1186 
1187  SDValue RV =
1188  DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1189 
1190  // We are always replacing N0/N1's use in N and only need
1191  // additional replacements if there are additional uses.
1192  Replace0 &= !N0->hasOneUse();
1193  Replace1 &= (N0 != N1) && !N1->hasOneUse();
1194 
1195  // Combine Op here so it is preserved past replacements.
1196  CombineTo(Op.getNode(), RV);
1197 
1198  // If operands have a use ordering, make sure we deal with
1199  // predecessor first.
1200  if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1201  std::swap(N0, N1);
1202  std::swap(NN0, NN1);
1203  }
1204 
1205  if (Replace0) {
1206  AddToWorklist(NN0.getNode());
1207  ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1208  }
1209  if (Replace1) {
1210  AddToWorklist(NN1.getNode());
1211  ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1212  }
1213  return Op;
1214  }
1215  return SDValue();
1216 }
1217 
1218 /// Promote the specified integer shift operation if the target indicates it is
1219 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1220 /// i32 since i16 instructions are longer.
1221 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1222  if (!LegalOperations)
1223  return SDValue();
1224 
1225  EVT VT = Op.getValueType();
1226  if (VT.isVector() || !VT.isInteger())
1227  return SDValue();
1228 
1229  // If operation type is 'undesirable', e.g. i16 on x86, consider
1230  // promoting it.
1231  unsigned Opc = Op.getOpcode();
1232  if (TLI.isTypeDesirableForOp(Opc, VT))
1233  return SDValue();
1234 
1235  EVT PVT = VT;
1236  // Consult target whether it is a good idea to promote this operation and
1237  // what's the right type to promote it to.
1238  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1239  assert(PVT != VT && "Don't know what type to promote to!");
1240 
1241  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1242 
1243  bool Replace = false;
1244  SDValue N0 = Op.getOperand(0);
1245  SDValue N1 = Op.getOperand(1);
1246  if (Opc == ISD::SRA)
1247  N0 = SExtPromoteOperand(N0, PVT);
1248  else if (Opc == ISD::SRL)
1249  N0 = ZExtPromoteOperand(N0, PVT);
1250  else
1251  N0 = PromoteOperand(N0, PVT, Replace);
1252 
1253  if (!N0.getNode())
1254  return SDValue();
1255 
1256  SDLoc DL(Op);
1257  SDValue RV =
1258  DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1259 
1260  AddToWorklist(N0.getNode());
1261  if (Replace)
1262  ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1263 
1264  // Deal with Op being deleted.
1265  if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1266  return RV;
1267  }
1268  return SDValue();
1269 }
1270 
1271 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1272  if (!LegalOperations)
1273  return SDValue();
1274 
1275  EVT VT = Op.getValueType();
1276  if (VT.isVector() || !VT.isInteger())
1277  return SDValue();
1278 
1279  // If operation type is 'undesirable', e.g. i16 on x86, consider
1280  // promoting it.
1281  unsigned Opc = Op.getOpcode();
1282  if (TLI.isTypeDesirableForOp(Opc, VT))
1283  return SDValue();
1284 
1285  EVT PVT = VT;
1286  // Consult target whether it is a good idea to promote this operation and
1287  // what's the right type to promote it to.
1288  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1289  assert(PVT != VT && "Don't know what type to promote to!");
1290  // fold (aext (aext x)) -> (aext x)
1291  // fold (aext (zext x)) -> (zext x)
1292  // fold (aext (sext x)) -> (sext x)
1293  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1294  return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1295  }
1296  return SDValue();
1297 }
1298 
1299 bool DAGCombiner::PromoteLoad(SDValue Op) {
1300  if (!LegalOperations)
1301  return false;
1302 
1303  if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1304  return false;
1305 
1306  EVT VT = Op.getValueType();
1307  if (VT.isVector() || !VT.isInteger())
1308  return false;
1309 
1310  // If operation type is 'undesirable', e.g. i16 on x86, consider
1311  // promoting it.
1312  unsigned Opc = Op.getOpcode();
1313  if (TLI.isTypeDesirableForOp(Opc, VT))
1314  return false;
1315 
1316  EVT PVT = VT;
1317  // Consult target whether it is a good idea to promote this operation and
1318  // what's the right type to promote it to.
1319  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1320  assert(PVT != VT && "Don't know what type to promote to!");
1321 
1322  SDLoc DL(Op);
1323  SDNode *N = Op.getNode();
1324  LoadSDNode *LD = cast<LoadSDNode>(N);
1325  EVT MemVT = LD->getMemoryVT();
1327  : LD->getExtensionType();
1328  SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1329  LD->getChain(), LD->getBasePtr(),
1330  MemVT, LD->getMemOperand());
1331  SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1332 
1333  LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1334  Result.getNode()->dump(&DAG); dbgs() << '\n');
1335  WorklistRemover DeadNodes(*this);
1336  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1337  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1338  deleteAndRecombine(N);
1339  AddToWorklist(Result.getNode());
1340  return true;
1341  }
1342  return false;
1343 }
1344 
1345 /// Recursively delete a node which has no uses and any operands for
1346 /// which it is the only use.
1347 ///
1348 /// Note that this both deletes the nodes and removes them from the worklist.
1349 /// It also adds any nodes who have had a user deleted to the worklist as they
1350 /// may now have only one use and subject to other combines.
1351 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1352  if (!N->use_empty())
1353  return false;
1354 
1356  Nodes.insert(N);
1357  do {
1358  N = Nodes.pop_back_val();
1359  if (!N)
1360  continue;
1361 
1362  if (N->use_empty()) {
1363  for (const SDValue &ChildN : N->op_values())
1364  Nodes.insert(ChildN.getNode());
1365 
1366  removeFromWorklist(N);
1367  DAG.DeleteNode(N);
1368  } else {
1369  AddToWorklist(N);
1370  }
1371  } while (!Nodes.empty());
1372  return true;
1373 }
1374 
1375 //===----------------------------------------------------------------------===//
1376 // Main DAG Combiner implementation
1377 //===----------------------------------------------------------------------===//
1378 
1379 void DAGCombiner::Run(CombineLevel AtLevel) {
1380  // set the instance variables, so that the various visit routines may use it.
1381  Level = AtLevel;
1382  LegalOperations = Level >= AfterLegalizeVectorOps;
1383  LegalTypes = Level >= AfterLegalizeTypes;
1384 
1385  // Add all the dag nodes to the worklist.
1386  for (SDNode &Node : DAG.allnodes())
1387  AddToWorklist(&Node);
1388 
1389  // Create a dummy node (which is not added to allnodes), that adds a reference
1390  // to the root node, preventing it from being deleted, and tracking any
1391  // changes of the root.
1392  HandleSDNode Dummy(DAG.getRoot());
1393 
1394  // While the worklist isn't empty, find a node and try to combine it.
1395  while (!WorklistMap.empty()) {
1396  SDNode *N;
1397  // The Worklist holds the SDNodes in order, but it may contain null entries.
1398  do {
1399  N = Worklist.pop_back_val();
1400  } while (!N);
1401 
1402  bool GoodWorklistEntry = WorklistMap.erase(N);
1403  (void)GoodWorklistEntry;
1404  assert(GoodWorklistEntry &&
1405  "Found a worklist entry without a corresponding map entry!");
1406 
1407  // If N has no uses, it is dead. Make sure to revisit all N's operands once
1408  // N is deleted from the DAG, since they too may now be dead or may have a
1409  // reduced number of uses, allowing other xforms.
1410  if (recursivelyDeleteUnusedNodes(N))
1411  continue;
1412 
1413  WorklistRemover DeadNodes(*this);
1414 
1415  // If this combine is running after legalizing the DAG, re-legalize any
1416  // nodes pulled off the worklist.
1417  if (Level == AfterLegalizeDAG) {
1418  SmallSetVector<SDNode *, 16> UpdatedNodes;
1419  bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1420 
1421  for (SDNode *LN : UpdatedNodes) {
1422  AddToWorklist(LN);
1423  AddUsersToWorklist(LN);
1424  }
1425  if (!NIsValid)
1426  continue;
1427  }
1428 
1429  LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1430 
1431  // Add any operands of the new node which have not yet been combined to the
1432  // worklist as well. Because the worklist uniques things already, this
1433  // won't repeatedly process the same operand.
1434  CombinedNodes.insert(N);
1435  for (const SDValue &ChildN : N->op_values())
1436  if (!CombinedNodes.count(ChildN.getNode()))
1437  AddToWorklist(ChildN.getNode());
1438 
1439  SDValue RV = combine(N);
1440 
1441  if (!RV.getNode())
1442  continue;
1443 
1444  ++NodesCombined;
1445 
1446  // If we get back the same node we passed in, rather than a new node or
1447  // zero, we know that the node must have defined multiple values and
1448  // CombineTo was used. Since CombineTo takes care of the worklist
1449  // mechanics for us, we have no work to do in this case.
1450  if (RV.getNode() == N)
1451  continue;
1452 
1454  RV.getOpcode() != ISD::DELETED_NODE &&
1455  "Node was deleted but visit returned new node!");
1456 
1457  LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1458 
1459  if (N->getNumValues() == RV.getNode()->getNumValues())
1460  DAG.ReplaceAllUsesWith(N, RV.getNode());
1461  else {
1462  assert(N->getValueType(0) == RV.getValueType() &&
1463  N->getNumValues() == 1 && "Type mismatch");
1464  DAG.ReplaceAllUsesWith(N, &RV);
1465  }
1466 
1467  // Push the new node and any users onto the worklist
1468  AddToWorklist(RV.getNode());
1469  AddUsersToWorklist(RV.getNode());
1470 
1471  // Finally, if the node is now dead, remove it from the graph. The node
1472  // may not be dead if the replacement process recursively simplified to
1473  // something else needing this node. This will also take care of adding any
1474  // operands which have lost a user to the worklist.
1475  recursivelyDeleteUnusedNodes(N);
1476  }
1477 
1478  // If the root changed (e.g. it was a dead load, update the root).
1479  DAG.setRoot(Dummy.getValue());
1480  DAG.RemoveDeadNodes();
1481 }
1482 
1483 SDValue DAGCombiner::visit(SDNode *N) {
1484  switch (N->getOpcode()) {
1485  default: break;
1486  case ISD::TokenFactor: return visitTokenFactor(N);
1487  case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1488  case ISD::ADD: return visitADD(N);
1489  case ISD::SUB: return visitSUB(N);
1490  case ISD::SADDSAT:
1491  case ISD::UADDSAT: return visitADDSAT(N);
1492  case ISD::SSUBSAT:
1493  case ISD::USUBSAT: return visitSUBSAT(N);
1494  case ISD::ADDC: return visitADDC(N);
1495  case ISD::UADDO: return visitUADDO(N);
1496  case ISD::SUBC: return visitSUBC(N);
1497  case ISD::USUBO: return visitUSUBO(N);
1498  case ISD::ADDE: return visitADDE(N);
1499  case ISD::ADDCARRY: return visitADDCARRY(N);
1500  case ISD::SUBE: return visitSUBE(N);
1501  case ISD::SUBCARRY: return visitSUBCARRY(N);
1502  case ISD::MUL: return visitMUL(N);
1503  case ISD::SDIV: return visitSDIV(N);
1504  case ISD::UDIV: return visitUDIV(N);
1505  case ISD::SREM:
1506  case ISD::UREM: return visitREM(N);
1507  case ISD::MULHU: return visitMULHU(N);
1508  case ISD::MULHS: return visitMULHS(N);
1509  case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1510  case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1511  case ISD::SMULO: return visitSMULO(N);
1512  case ISD::UMULO: return visitUMULO(N);
1513  case ISD::SMIN:
1514  case ISD::SMAX:
1515  case ISD::UMIN:
1516  case ISD::UMAX: return visitIMINMAX(N);
1517  case ISD::AND: return visitAND(N);
1518  case ISD::OR: return visitOR(N);
1519  case ISD::XOR: return visitXOR(N);
1520  case ISD::SHL: return visitSHL(N);
1521  case ISD::SRA: return visitSRA(N);
1522  case ISD::SRL: return visitSRL(N);
1523  case ISD::ROTR:
1524  case ISD::ROTL: return visitRotate(N);
1525  case ISD::FSHL:
1526  case ISD::FSHR: return visitFunnelShift(N);
1527  case ISD::ABS: return visitABS(N);
1528  case ISD::BSWAP: return visitBSWAP(N);
1529  case ISD::BITREVERSE: return visitBITREVERSE(N);
1530  case ISD::CTLZ: return visitCTLZ(N);
1531  case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1532  case ISD::CTTZ: return visitCTTZ(N);
1533  case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1534  case ISD::CTPOP: return visitCTPOP(N);
1535  case ISD::SELECT: return visitSELECT(N);
1536  case ISD::VSELECT: return visitVSELECT(N);
1537  case ISD::SELECT_CC: return visitSELECT_CC(N);
1538  case ISD::SETCC: return visitSETCC(N);
1539  case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1540  case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1541  case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1542  case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1543  case ISD::AssertSext:
1544  case ISD::AssertZext: return visitAssertExt(N);
1545  case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1546  case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1547  case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1548  case ISD::TRUNCATE: return visitTRUNCATE(N);
1549  case ISD::BITCAST: return visitBITCAST(N);
1550  case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1551  case ISD::FADD: return visitFADD(N);
1552  case ISD::FSUB: return visitFSUB(N);
1553  case ISD::FMUL: return visitFMUL(N);
1554  case ISD::FMA: return visitFMA(N);
1555  case ISD::FDIV: return visitFDIV(N);
1556  case ISD::FREM: return visitFREM(N);
1557  case ISD::FSQRT: return visitFSQRT(N);
1558  case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1559  case ISD::FPOW: return visitFPOW(N);
1560  case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1561  case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1562  case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1563  case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1564  case ISD::FP_ROUND: return visitFP_ROUND(N);
1565  case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N);
1566  case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1567  case ISD::FNEG: return visitFNEG(N);
1568  case ISD::FABS: return visitFABS(N);
1569  case ISD::FFLOOR: return visitFFLOOR(N);
1570  case ISD::FMINNUM: return visitFMINNUM(N);
1571  case ISD::FMAXNUM: return visitFMAXNUM(N);
1572  case ISD::FMINIMUM: return visitFMINIMUM(N);
1573  case ISD::FMAXIMUM: return visitFMAXIMUM(N);
1574  case ISD::FCEIL: return visitFCEIL(N);
1575  case ISD::FTRUNC: return visitFTRUNC(N);
1576  case ISD::BRCOND: return visitBRCOND(N);
1577  case ISD::BR_CC: return visitBR_CC(N);
1578  case ISD::LOAD: return visitLOAD(N);
1579  case ISD::STORE: return visitSTORE(N);
1580  case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1581  case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1582  case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1583  case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1584  case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1585  case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1586  case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1587  case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1588  case ISD::MGATHER: return visitMGATHER(N);
1589  case ISD::MLOAD: return visitMLOAD(N);
1590  case ISD::MSCATTER: return visitMSCATTER(N);
1591  case ISD::MSTORE: return visitMSTORE(N);
1592  case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1593  case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1594  }
1595  return SDValue();
1596 }
1597 
1598 SDValue DAGCombiner::combine(SDNode *N) {
1599  SDValue RV = visit(N);
1600 
1601  // If nothing happened, try a target-specific DAG combine.
1602  if (!RV.getNode()) {
1604  "Node was deleted but visit returned NULL!");
1605 
1606  if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1608 
1609  // Expose the DAG combiner to the target combiner impls.
1611  DagCombineInfo(DAG, Level, false, this);
1612 
1613  RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1614  }
1615  }
1616 
1617  // If nothing happened still, try promoting the operation.
1618  if (!RV.getNode()) {
1619  switch (N->getOpcode()) {
1620  default: break;
1621  case ISD::ADD:
1622  case ISD::SUB:
1623  case ISD::MUL:
1624  case ISD::AND:
1625  case ISD::OR:
1626  case ISD::XOR:
1627  RV = PromoteIntBinOp(SDValue(N, 0));
1628  break;
1629  case ISD::SHL:
1630  case ISD::SRA:
1631  case ISD::SRL:
1632  RV = PromoteIntShiftOp(SDValue(N, 0));
1633  break;
1634  case ISD::SIGN_EXTEND:
1635  case ISD::ZERO_EXTEND:
1636  case ISD::ANY_EXTEND:
1637  RV = PromoteExtend(SDValue(N, 0));
1638  break;
1639  case ISD::LOAD:
1640  if (PromoteLoad(SDValue(N, 0)))
1641  RV = SDValue(N, 0);
1642  break;
1643  }
1644  }
1645 
1646  // If N is a commutative binary node, try eliminate it if the commuted
1647  // version is already present in the DAG.
1648  if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1649  N->getNumValues() == 1) {
1650  SDValue N0 = N->getOperand(0);
1651  SDValue N1 = N->getOperand(1);
1652 
1653  // Constant operands are canonicalized to RHS.
1654  if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1655  SDValue Ops[] = {N1, N0};
1656  SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1657  N->getFlags());
1658  if (CSENode)
1659  return SDValue(CSENode, 0);
1660  }
1661  }
1662 
1663  return RV;
1664 }
1665 
1666 /// Given a node, return its input chain if it has one, otherwise return a null
1667 /// sd operand.
1669  if (unsigned NumOps = N->getNumOperands()) {
1670  if (N->getOperand(0).getValueType() == MVT::Other)
1671  return N->getOperand(0);
1672  if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1673  return N->getOperand(NumOps-1);
1674  for (unsigned i = 1; i < NumOps-1; ++i)
1675  if (N->getOperand(i).getValueType() == MVT::Other)
1676  return N->getOperand(i);
1677  }
1678  return SDValue();
1679 }
1680 
1681 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1682  // If N has two operands, where one has an input chain equal to the other,
1683  // the 'other' chain is redundant.
1684  if (N->getNumOperands() == 2) {
1685  if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1686  return N->getOperand(0);
1687  if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1688  return N->getOperand(1);
1689  }
1690 
1691  // Don't simplify token factors if optnone.
1692  if (OptLevel == CodeGenOpt::None)
1693  return SDValue();
1694 
1695  SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
1696  SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
1697  SmallPtrSet<SDNode*, 16> SeenOps;
1698  bool Changed = false; // If we should replace this token factor.
1699 
1700  // Start out with this token factor.
1701  TFs.push_back(N);
1702 
1703  // Iterate through token factors. The TFs grows when new token factors are
1704  // encountered.
1705  for (unsigned i = 0; i < TFs.size(); ++i) {
1706  SDNode *TF = TFs[i];
1707 
1708  // Check each of the operands.
1709  for (const SDValue &Op : TF->op_values()) {
1710  switch (Op.getOpcode()) {
1711  case ISD::EntryToken:
1712  // Entry tokens don't need to be added to the list. They are
1713  // redundant.
1714  Changed = true;
1715  break;
1716 
1717  case ISD::TokenFactor:
1718  if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1719  // Queue up for processing.
1720  TFs.push_back(Op.getNode());
1721  // Clean up in case the token factor is removed.
1722  AddToWorklist(Op.getNode());
1723  Changed = true;
1724  break;
1725  }
1727 
1728  default:
1729  // Only add if it isn't already in the list.
1730  if (SeenOps.insert(Op.getNode()).second)
1731  Ops.push_back(Op);
1732  else
1733  Changed = true;
1734  break;
1735  }
1736  }
1737  }
1738 
1739  // Remove Nodes that are chained to another node in the list. Do so
1740  // by walking up chains breath-first stopping when we've seen
1741  // another operand. In general we must climb to the EntryNode, but we can exit
1742  // early if we find all remaining work is associated with just one operand as
1743  // no further pruning is possible.
1744 
1745  // List of nodes to search through and original Ops from which they originate.
1747  SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1748  SmallPtrSet<SDNode *, 16> SeenChains;
1749  bool DidPruneOps = false;
1750 
1751  unsigned NumLeftToConsider = 0;
1752  for (const SDValue &Op : Ops) {
1753  Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1754  OpWorkCount.push_back(1);
1755  }
1756 
1757  auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1758  // If this is an Op, we can remove the op from the list. Remark any
1759  // search associated with it as from the current OpNumber.
1760  if (SeenOps.count(Op) != 0) {
1761  Changed = true;
1762  DidPruneOps = true;
1763  unsigned OrigOpNumber = 0;
1764  while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1765  OrigOpNumber++;
1766  assert((OrigOpNumber != Ops.size()) &&
1767  "expected to find TokenFactor Operand");
1768  // Re-mark worklist from OrigOpNumber to OpNumber
1769  for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1770  if (Worklist[i].second == OrigOpNumber) {
1771  Worklist[i].second = OpNumber;
1772  }
1773  }
1774  OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1775  OpWorkCount[OrigOpNumber] = 0;
1776  NumLeftToConsider--;
1777  }
1778  // Add if it's a new chain
1779  if (SeenChains.insert(Op).second) {
1780  OpWorkCount[OpNumber]++;
1781  Worklist.push_back(std::make_pair(Op, OpNumber));
1782  }
1783  };
1784 
1785  for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1786  // We need at least be consider at least 2 Ops to prune.
1787  if (NumLeftToConsider <= 1)
1788  break;
1789  auto CurNode = Worklist[i].first;
1790  auto CurOpNumber = Worklist[i].second;
1791  assert((OpWorkCount[CurOpNumber] > 0) &&
1792  "Node should not appear in worklist");
1793  switch (CurNode->getOpcode()) {
1794  case ISD::EntryToken:
1795  // Hitting EntryToken is the only way for the search to terminate without
1796  // hitting
1797  // another operand's search. Prevent us from marking this operand
1798  // considered.
1799  NumLeftToConsider++;
1800  break;
1801  case ISD::TokenFactor:
1802  for (const SDValue &Op : CurNode->op_values())
1803  AddToWorklist(i, Op.getNode(), CurOpNumber);
1804  break;
1805  case ISD::CopyFromReg:
1806  case ISD::CopyToReg:
1807  AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1808  break;
1809  default:
1810  if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1811  AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1812  break;
1813  }
1814  OpWorkCount[CurOpNumber]--;
1815  if (OpWorkCount[CurOpNumber] == 0)
1816  NumLeftToConsider--;
1817  }
1818 
1819  // If we've changed things around then replace token factor.
1820  if (Changed) {
1821  SDValue Result;
1822  if (Ops.empty()) {
1823  // The entry token is the only possible outcome.
1824  Result = DAG.getEntryNode();
1825  } else {
1826  if (DidPruneOps) {
1827  SmallVector<SDValue, 8> PrunedOps;
1828  //
1829  for (const SDValue &Op : Ops) {
1830  if (SeenChains.count(Op.getNode()) == 0)
1831  PrunedOps.push_back(Op);
1832  }
1833  Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps);
1834  } else {
1835  Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
1836  }
1837  }
1838  return Result;
1839  }
1840  return SDValue();
1841 }
1842 
1843 /// MERGE_VALUES can always be eliminated.
1844 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1845  WorklistRemover DeadNodes(*this);
1846  // Replacing results may cause a different MERGE_VALUES to suddenly
1847  // be CSE'd with N, and carry its uses with it. Iterate until no
1848  // uses remain, to ensure that the node can be safely deleted.
1849  // First add the users of this node to the work list so that they
1850  // can be tried again once they have new operands.
1851  AddUsersToWorklist(N);
1852  do {
1853  // Do as a single replacement to avoid rewalking use lists.
1855  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1856  Ops.push_back(N->getOperand(i));
1857  DAG.ReplaceAllUsesWith(N, Ops.data());
1858  } while (!N->use_empty());
1859  deleteAndRecombine(N);
1860  return SDValue(N, 0); // Return N so it doesn't get rechecked!
1861 }
1862 
1863 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1864 /// ConstantSDNode pointer else nullptr.
1867  return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1868 }
1869 
1870 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
1871  assert(ISD::isBinaryOp(BO) && "Unexpected binary operator");
1872 
1873  // Don't do this unless the old select is going away. We want to eliminate the
1874  // binary operator, not replace a binop with a select.
1875  // TODO: Handle ISD::SELECT_CC.
1876  unsigned SelOpNo = 0;
1877  SDValue Sel = BO->getOperand(0);
1878  if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
1879  SelOpNo = 1;
1880  Sel = BO->getOperand(1);
1881  }
1882 
1883  if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1884  return SDValue();
1885 
1886  SDValue CT = Sel.getOperand(1);
1887  if (!isConstantOrConstantVector(CT, true) &&
1889  return SDValue();
1890 
1891  SDValue CF = Sel.getOperand(2);
1892  if (!isConstantOrConstantVector(CF, true) &&
1894  return SDValue();
1895 
1896  // Bail out if any constants are opaque because we can't constant fold those.
1897  // The exception is "and" and "or" with either 0 or -1 in which case we can
1898  // propagate non constant operands into select. I.e.:
1899  // and (select Cond, 0, -1), X --> select Cond, 0, X
1900  // or X, (select Cond, -1, 0) --> select Cond, -1, X
1901  auto BinOpcode = BO->getOpcode();
1902  bool CanFoldNonConst =
1903  (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
1906 
1907  SDValue CBO = BO->getOperand(SelOpNo ^ 1);
1908  if (!CanFoldNonConst &&
1909  !isConstantOrConstantVector(CBO, true) &&
1911  return SDValue();
1912 
1913  EVT VT = Sel.getValueType();
1914 
1915  // In case of shift value and shift amount may have different VT. For instance
1916  // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
1917  // swapped operands and value types do not match. NB: x86 is fine if operands
1918  // are not swapped with shift amount VT being not bigger than shifted value.
1919  // TODO: that is possible to check for a shift operation, correct VTs and
1920  // still perform optimization on x86 if needed.
1921  if (SelOpNo && VT != CBO.getValueType())
1922  return SDValue();
1923 
1924  // We have a select-of-constants followed by a binary operator with a
1925  // constant. Eliminate the binop by pulling the constant math into the select.
1926  // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
1927  SDLoc DL(Sel);
1928  SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
1929  : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
1930  if (!CanFoldNonConst && !NewCT.isUndef() &&
1931  !isConstantOrConstantVector(NewCT, true) &&
1933  return SDValue();
1934 
1935  SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
1936  : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
1937  if (!CanFoldNonConst && !NewCF.isUndef() &&
1938  !isConstantOrConstantVector(NewCF, true) &&
1940  return SDValue();
1941 
1942  return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
1943 }
1944 
1946  assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
1947  "Expecting add or sub");
1948 
1949  // Match a constant operand and a zext operand for the math instruction:
1950  // add Z, C
1951  // sub C, Z
1952  bool IsAdd = N->getOpcode() == ISD::ADD;
1953  SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
1954  SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
1955  auto *CN = dyn_cast<ConstantSDNode>(C);
1956  if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
1957  return SDValue();
1958 
1959  // Match the zext operand as a setcc of a boolean.
1960  if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
1961  Z.getOperand(0).getValueType() != MVT::i1)
1962  return SDValue();
1963 
1964  // Match the compare as: setcc (X & 1), 0, eq.
1965  SDValue SetCC = Z.getOperand(0);
1966  ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
1967  if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
1968  SetCC.getOperand(0).getOpcode() != ISD::AND ||
1969  !isOneConstant(SetCC.getOperand(0).getOperand(1)))
1970  return SDValue();
1971 
1972  // We are adding/subtracting a constant and an inverted low bit. Turn that
1973  // into a subtract/add of the low bit with incremented/decremented constant:
1974  // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
1975  // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
1976  EVT VT = C.getValueType();
1977  SDLoc DL(N);
1978  SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
1979  SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
1980  DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
1981  return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
1982 }
1983 
1984 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
1985 /// a shift and add with a different constant.
1987  assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
1988  "Expecting add or sub");
1989 
1990  // We need a constant operand for the add/sub, and the other operand is a
1991  // logical shift right: add (srl), C or sub C, (srl).
1992  bool IsAdd = N->getOpcode() == ISD::ADD;
1993  SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
1994  SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
1995  ConstantSDNode *C = isConstOrConstSplat(ConstantOp);
1996  if (!C || ShiftOp.getOpcode() != ISD::SRL)
1997  return SDValue();
1998 
1999  // The shift must be of a 'not' value.
2000  SDValue Not = ShiftOp.getOperand(0);
2001  if (!Not.hasOneUse() || !isBitwiseNot(Not))
2002  return SDValue();
2003 
2004  // The shift must be moving the sign bit to the least-significant-bit.
2005  EVT VT = ShiftOp.getValueType();
2006  SDValue ShAmt = ShiftOp.getOperand(1);
2007  ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2008  if (!ShAmtC || ShAmtC->getZExtValue() != VT.getScalarSizeInBits() - 1)
2009  return SDValue();
2010 
2011  // Eliminate the 'not' by adjusting the shift and add/sub constant:
2012  // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2013  // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2014  SDLoc DL(N);
2015  auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2016  SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2017  APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1;
2018  return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
2019 }
2020 
2021 SDValue DAGCombiner::visitADD(SDNode *N) {
2022  SDValue N0 = N->getOperand(0);
2023  SDValue N1 = N->getOperand(1);
2024  EVT VT = N0.getValueType();
2025  SDLoc DL(N);
2026 
2027  // fold vector ops
2028  if (VT.isVector()) {
2029  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2030  return FoldedVOp;
2031 
2032  // fold (add x, 0) -> x, vector edition
2034  return N0;
2036  return N1;
2037  }
2038 
2039  // fold (add x, undef) -> undef
2040  if (N0.isUndef())
2041  return N0;
2042 
2043  if (N1.isUndef())
2044  return N1;
2045 
2047  // canonicalize constant to RHS
2049  return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2050  // fold (add c1, c2) -> c1+c2
2051  return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
2052  N1.getNode());
2053  }
2054 
2055  // fold (add x, 0) -> x
2056  if (isNullConstant(N1))
2057  return N0;
2058 
2059  if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2060  // fold ((c1-A)+c2) -> (c1+c2)-A
2061  if (N0.getOpcode() == ISD::SUB &&
2062  isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2063  // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
2064  return DAG.getNode(ISD::SUB, DL, VT,
2065  DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2066  N0.getOperand(1));
2067  }
2068 
2069  // add (sext i1 X), 1 -> zext (not i1 X)
2070  // We don't transform this pattern:
2071  // add (zext i1 X), -1 -> sext (not i1 X)
2072  // because most (?) targets generate better code for the zext form.
2073  if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2074  isOneOrOneSplat(N1)) {
2075  SDValue X = N0.getOperand(0);
2076  if ((!LegalOperations ||
2077  (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2078  TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2079  X.getScalarValueSizeInBits() == 1) {
2080  SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2081  return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2082  }
2083  }
2084 
2085  // Undo the add -> or combine to merge constant offsets from a frame index.
2086  if (N0.getOpcode() == ISD::OR &&
2087  isa<FrameIndexSDNode>(N0.getOperand(0)) &&
2088  isa<ConstantSDNode>(N0.getOperand(1)) &&
2089  DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2090  SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
2091  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2092  }
2093  }
2094 
2095  if (SDValue NewSel = foldBinOpIntoSelect(N))
2096  return NewSel;
2097 
2098  // reassociate add
2099  if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2100  return RADD;
2101 
2102  // fold ((0-A) + B) -> B-A
2103  if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2104  return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2105 
2106  // fold (A + (0-B)) -> A-B
2107  if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2108  return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2109 
2110  // fold (A+(B-A)) -> B
2111  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2112  return N1.getOperand(0);
2113 
2114  // fold ((B-A)+A) -> B
2115  if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2116  return N0.getOperand(0);
2117 
2118  // fold ((A-B)+(C-A)) -> (C-B)
2119  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2120  N0.getOperand(0) == N1.getOperand(1))
2121  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2122  N0.getOperand(1));
2123 
2124  // fold ((A-B)+(B-C)) -> (A-C)
2125  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2126  N0.getOperand(1) == N1.getOperand(0))
2127  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2128  N1.getOperand(1));
2129 
2130  // fold (A+(B-(A+C))) to (B-C)
2131  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2132  N0 == N1.getOperand(1).getOperand(0))
2133  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2134  N1.getOperand(1).getOperand(1));
2135 
2136  // fold (A+(B-(C+A))) to (B-C)
2137  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2138  N0 == N1.getOperand(1).getOperand(1))
2139  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2140  N1.getOperand(1).getOperand(0));
2141 
2142  // fold (A+((B-A)+or-C)) to (B+or-C)
2143  if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2144  N1.getOperand(0).getOpcode() == ISD::SUB &&
2145  N0 == N1.getOperand(0).getOperand(1))
2146  return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2147  N1.getOperand(1));
2148 
2149  // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2150  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2151  SDValue N00 = N0.getOperand(0);
2152  SDValue N01 = N0.getOperand(1);
2153  SDValue N10 = N1.getOperand(0);
2154  SDValue N11 = N1.getOperand(1);
2155 
2157  return DAG.getNode(ISD::SUB, DL, VT,
2158  DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2159  DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2160  }
2161 
2162  // fold (add (umax X, C), -C) --> (usubsat X, C)
2163  if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2164  auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2165  return (!Max && !Op) ||
2166  (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2167  };
2168  if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2169  /*AllowUndefs*/ true))
2170  return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2171  N0.getOperand(1));
2172  }
2173 
2174  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2175  return V;
2176 
2177  if (SDValue V = foldAddSubOfSignBit(N, DAG))
2178  return V;
2179 
2180  if (SimplifyDemandedBits(SDValue(N, 0)))
2181  return SDValue(N, 0);
2182 
2183  // fold (a+b) -> (a|b) iff a and b share no bits.
2184  if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2185  DAG.haveNoCommonBitsSet(N0, N1))
2186  return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2187 
2188  // fold (add (xor a, -1), 1) -> (sub 0, a)
2189  if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
2190  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2191  N0.getOperand(0));
2192 
2193  if (SDValue Combined = visitADDLike(N0, N1, N))
2194  return Combined;
2195 
2196  if (SDValue Combined = visitADDLike(N1, N0, N))
2197  return Combined;
2198 
2199  return SDValue();
2200 }
2201 
2202 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2203  unsigned Opcode = N->getOpcode();
2204  SDValue N0 = N->getOperand(0);
2205  SDValue N1 = N->getOperand(1);
2206  EVT VT = N0.getValueType();
2207  SDLoc DL(N);
2208 
2209  // fold vector ops
2210  if (VT.isVector()) {
2211  // TODO SimplifyVBinOp
2212 
2213  // fold (add_sat x, 0) -> x, vector edition
2215  return N0;
2217  return N1;
2218  }
2219 
2220  // fold (add_sat x, undef) -> -1
2221  if (N0.isUndef() || N1.isUndef())
2222  return DAG.getAllOnesConstant(DL, VT);
2223 
2225  // canonicalize constant to RHS
2227  return DAG.getNode(Opcode, DL, VT, N1, N0);
2228  // fold (add_sat c1, c2) -> c3
2229  return DAG.FoldConstantArithmetic(Opcode, DL, VT, N0.getNode(),
2230  N1.getNode());
2231  }
2232 
2233  // fold (add_sat x, 0) -> x
2234  if (isNullConstant(N1))
2235  return N0;
2236 
2237  // If it cannot overflow, transform into an add.
2238  if (Opcode == ISD::UADDSAT)
2239  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2240  return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2241 
2242  return SDValue();
2243 }
2244 
2245 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2246  bool Masked = false;
2247 
2248  // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2249  while (true) {
2250  if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2251  V = V.getOperand(0);
2252  continue;
2253  }
2254 
2255  if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2256  Masked = true;
2257  V = V.getOperand(0);
2258  continue;
2259  }
2260 
2261  break;
2262  }
2263 
2264  // If this is not a carry, return.
2265  if (V.getResNo() != 1)
2266  return SDValue();
2267 
2268  if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2269  V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2270  return SDValue();
2271 
2272  // If the result is masked, then no matter what kind of bool it is we can
2273  // return. If it isn't, then we need to make sure the bool type is either 0 or
2274  // 1 and not other values.
2275  if (Masked ||
2276  TLI.getBooleanContents(V.getValueType()) ==
2278  return V;
2279 
2280  return SDValue();
2281 }
2282 
2283 /// Given the operands of an add/sub operation, see if the 2nd operand is a
2284 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2285 /// the opcode and bypass the mask operation.
2286 static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2287  SelectionDAG &DAG, const SDLoc &DL) {
2288  if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2289  return SDValue();
2290 
2291  EVT VT = N0.getValueType();
2292  if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2293  return SDValue();
2294 
2295  // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2296  // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2297  return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2298 }
2299 
2300 SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
2301  EVT VT = N0.getValueType();
2302  SDLoc DL(LocReference);
2303 
2304  // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2305  if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2307  return DAG.getNode(ISD::SUB, DL, VT, N0,
2308  DAG.getNode(ISD::SHL, DL, VT,
2309  N1.getOperand(0).getOperand(1),
2310  N1.getOperand(1)));
2311 
2312  if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2313  return V;
2314 
2315  // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2316  // rather than 'add 0/-1' (the zext should get folded).
2317  // add (sext i1 Y), X --> sub X, (zext i1 Y)
2318  if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2319  N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2321  SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2322  return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2323  }
2324 
2325  // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2326  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2327  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2328  if (TN->getVT() == MVT::i1) {
2329  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2330  DAG.getConstant(1, DL, VT));
2331  return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2332  }
2333  }
2334 
2335  // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2336  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2337  N1.getResNo() == 0)
2338  return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2339  N0, N1.getOperand(0), N1.getOperand(2));
2340 
2341  // (add X, Carry) -> (addcarry X, 0, Carry)
2343  if (SDValue Carry = getAsCarry(TLI, N1))
2344  return DAG.getNode(ISD::ADDCARRY, DL,
2345  DAG.getVTList(VT, Carry.getValueType()), N0,
2346  DAG.getConstant(0, DL, VT), Carry);
2347 
2348  return SDValue();
2349 }
2350 
2351 SDValue DAGCombiner::visitADDC(SDNode *N) {
2352  SDValue N0 = N->getOperand(0);
2353  SDValue N1 = N->getOperand(1);
2354  EVT VT = N0.getValueType();
2355  SDLoc DL(N);
2356 
2357  // If the flag result is dead, turn this into an ADD.
2358  if (!N->hasAnyUseOfValue(1))
2359  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2360  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2361 
2362  // canonicalize constant to RHS.
2365  if (N0C && !N1C)
2366  return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2367 
2368  // fold (addc x, 0) -> x + no carry out
2369  if (isNullConstant(N1))
2370  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2371  DL, MVT::Glue));
2372 
2373  // If it cannot overflow, transform into an add.
2374  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2375  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2376  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2377 
2378  return SDValue();
2379 }
2380 
2381 static SDValue flipBoolean(SDValue V, const SDLoc &DL, EVT VT,
2382  SelectionDAG &DAG, const TargetLowering &TLI) {
2383  SDValue Cst;
2384  switch (TLI.getBooleanContents(VT)) {
2387  Cst = DAG.getConstant(1, DL, VT);
2388  break;
2390  Cst = DAG.getConstant(-1, DL, VT);
2391  break;
2392  }
2393 
2394  return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
2395 }
2396 
2397 static bool isBooleanFlip(SDValue V, EVT VT, const TargetLowering &TLI) {
2398  if (V.getOpcode() != ISD::XOR) return false;
2400  if (!Const) return false;
2401 
2402  switch(TLI.getBooleanContents(VT)) {
2404  return Const->isOne();
2406  return Const->isAllOnesValue();
2408  return (Const->getAPIntValue() & 0x01) == 1;
2409  }
2410  llvm_unreachable("Unsupported boolean content");
2411 }
2412 
2413 SDValue DAGCombiner::visitUADDO(SDNode *N) {
2414  SDValue N0 = N->getOperand(0);
2415  SDValue N1 = N->getOperand(1);
2416  EVT VT = N0.getValueType();
2417  if (VT.isVector())
2418  return SDValue();
2419 
2420  EVT CarryVT = N->getValueType(1);
2421  SDLoc DL(N);
2422 
2423  // If the flag result is dead, turn this into an ADD.
2424  if (!N->hasAnyUseOfValue(1))
2425  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2426  DAG.getUNDEF(CarryVT));
2427 
2428  // canonicalize constant to RHS.
2431  if (N0C && !N1C)
2432  return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0);
2433 
2434  // fold (uaddo x, 0) -> x + no carry out
2435  if (isNullConstant(N1))
2436  return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2437 
2438  // If it cannot overflow, transform into an add.
2439  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2440  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2441  DAG.getConstant(0, DL, CarryVT));
2442 
2443  // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2444  if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2445  SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2446  DAG.getConstant(0, DL, VT),
2447  N0.getOperand(0));
2448  return CombineTo(N, Sub,
2449  flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
2450  }
2451 
2452  if (SDValue Combined = visitUADDOLike(N0, N1, N))
2453  return Combined;
2454 
2455  if (SDValue Combined = visitUADDOLike(N1, N0, N))
2456  return Combined;
2457 
2458  return SDValue();
2459 }
2460 
2461 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2462  auto VT = N0.getValueType();
2463 
2464  // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2465  // If Y + 1 cannot overflow.
2466  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2467  SDValue Y = N1.getOperand(0);
2468  SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2469  if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2470  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2471  N1.getOperand(2));
2472  }
2473 
2474  // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2476  if (SDValue Carry = getAsCarry(TLI, N1))
2477  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2478  DAG.getConstant(0, SDLoc(N), VT), Carry);
2479 
2480  return SDValue();
2481 }
2482 
2483 SDValue DAGCombiner::visitADDE(SDNode *N) {
2484  SDValue N0 = N->getOperand(0);
2485  SDValue N1 = N->getOperand(1);
2486  SDValue CarryIn = N->getOperand(2);
2487 
2488  // canonicalize constant to RHS
2491  if (N0C && !N1C)
2492  return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2493  N1, N0, CarryIn);
2494 
2495  // fold (adde x, y, false) -> (addc x, y)
2496  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2497  return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2498 
2499  return SDValue();
2500 }
2501 
2502 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2503  SDValue N0 = N->getOperand(0);
2504  SDValue N1 = N->getOperand(1);
2505  SDValue CarryIn = N->getOperand(2);
2506  SDLoc DL(N);
2507 
2508  // canonicalize constant to RHS
2511  if (N0C && !N1C)
2512  return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2513 
2514  // fold (addcarry x, y, false) -> (uaddo x, y)
2515  if (isNullConstant(CarryIn)) {
2516  if (!LegalOperations ||
2518  return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2519  }
2520 
2521  EVT CarryVT = CarryIn.getValueType();
2522 
2523  // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2524  if (isNullConstant(N0) && isNullConstant(N1)) {
2525  EVT VT = N0.getValueType();
2526  SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2527  AddToWorklist(CarryExt.getNode());
2528  return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2529  DAG.getConstant(1, DL, VT)),
2530  DAG.getConstant(0, DL, CarryVT));
2531  }
2532 
2533  // fold (addcarry (xor a, -1), 0, !b) -> (subcarry 0, a, b) and flip carry.
2534  if (isBitwiseNot(N0) && isNullConstant(N1) &&
2535  isBooleanFlip(CarryIn, CarryVT, TLI)) {
2536  SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(),
2537  DAG.getConstant(0, DL, N0.getValueType()),
2538  N0.getOperand(0), CarryIn.getOperand(0));
2539  return CombineTo(N, Sub,
2540  flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
2541  }
2542 
2543  if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2544  return Combined;
2545 
2546  if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2547  return Combined;
2548 
2549  return SDValue();
2550 }
2551 
2552 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2553  SDNode *N) {
2554  // Iff the flag result is dead:
2555  // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2556  if ((N0.getOpcode() == ISD::ADD ||
2557  (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) &&
2558  isNullConstant(N1) && !N->hasAnyUseOfValue(1))
2559  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2560  N0.getOperand(0), N0.getOperand(1), CarryIn);
2561 
2562  /**
2563  * When one of the addcarry argument is itself a carry, we may be facing
2564  * a diamond carry propagation. In which case we try to transform the DAG
2565  * to ensure linear carry propagation if that is possible.
2566  *
2567  * We are trying to get:
2568  * (addcarry X, 0, (addcarry A, B, Z):Carry)
2569  */
2570  if (auto Y = getAsCarry(TLI, N1)) {
2571  /**
2572  * (uaddo A, B)
2573  * / \
2574  * Carry Sum
2575  * | \
2576  * | (addcarry *, 0, Z)
2577  * | /
2578  * \ Carry
2579  * | /
2580  * (addcarry X, *, *)
2581  */
2582  if (Y.getOpcode() == ISD::UADDO &&
2583  CarryIn.getResNo() == 1 &&
2584  CarryIn.getOpcode() == ISD::ADDCARRY &&
2585  isNullConstant(CarryIn.getOperand(1)) &&
2586  CarryIn.getOperand(0) == Y.getValue(0)) {
2587  auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
2588  Y.getOperand(0), Y.getOperand(1),
2589  CarryIn.getOperand(2));
2590  AddToWorklist(NewY.getNode());
2591  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2592  DAG.getConstant(0, SDLoc(N), N0.getValueType()),
2593  NewY.getValue(1));
2594  }
2595  }
2596 
2597  return SDValue();
2598 }
2599 
2600 // Since it may not be valid to emit a fold to zero for vector initializers
2601 // check if we can before folding.
2602 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
2603  SelectionDAG &DAG, bool LegalOperations) {
2604  if (!VT.isVector())
2605  return DAG.getConstant(0, DL, VT);
2606  if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
2607  return DAG.getConstant(0, DL, VT);
2608  return SDValue();
2609 }
2610 
2611 SDValue DAGCombiner::visitSUB(SDNode *N) {
2612  SDValue N0 = N->getOperand(0);
2613  SDValue N1 = N->getOperand(1);
2614  EVT VT = N0.getValueType();
2615  SDLoc DL(N);
2616 
2617  // fold vector ops
2618  if (VT.isVector()) {
2619  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2620  return FoldedVOp;
2621 
2622  // fold (sub x, 0) -> x, vector edition
2624  return N0;
2625  }
2626 
2627  // fold (sub x, x) -> 0
2628  // FIXME: Refactor this and xor and other similar operations together.
2629  if (N0 == N1)
2630  return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
2633  // fold (sub c1, c2) -> c1-c2
2634  return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
2635  N1.getNode());
2636  }
2637 
2638  if (SDValue NewSel = foldBinOpIntoSelect(N))
2639  return NewSel;
2640 
2642 
2643  // fold (sub x, c) -> (add x, -c)
2644  if (N1C) {
2645  return DAG.getNode(ISD::ADD, DL, VT, N0,
2646  DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
2647  }
2648 
2649  if (isNullOrNullSplat(N0)) {
2650  unsigned BitWidth = VT.getScalarSizeInBits();
2651  // Right-shifting everything out but the sign bit followed by negation is
2652  // the same as flipping arithmetic/logical shift type without the negation:
2653  // -(X >>u 31) -> (X >>s 31)
2654  // -(X >>s 31) -> (X >>u 31)
2655  if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
2656  ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
2657  if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
2658  auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
2659  if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
2660  return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
2661  }
2662  }
2663 
2664  // 0 - X --> 0 if the sub is NUW.
2665  if (N->getFlags().hasNoUnsignedWrap())
2666  return N0;
2667 
2668  if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
2669  // N1 is either 0 or the minimum signed value. If the sub is NSW, then
2670  // N1 must be 0 because negating the minimum signed value is undefined.
2671  if (N->getFlags().hasNoSignedWrap())
2672  return N0;
2673 
2674  // 0 - X --> X if X is 0 or the minimum signed value.
2675  return N1;
2676  }
2677  }
2678 
2679  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
2680  if (isAllOnesOrAllOnesSplat(N0))
2681  return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
2682 
2683  // fold (A - (0-B)) -> A+B
2684  if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2685  return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
2686 
2687  // fold A-(A-B) -> B
2688  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
2689  return N1.getOperand(1);
2690 
2691  // fold (A+B)-A -> B
2692  if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
2693  return N0.getOperand(1);
2694 
2695  // fold (A+B)-B -> A
2696  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
2697  return N0.getOperand(0);
2698 
2699  // fold C2-(A+C1) -> (C2-C1)-A
2700  if (N1.getOpcode() == ISD::ADD) {
2701  SDValue N11 = N1.getOperand(1);
2702  if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
2703  isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
2704  SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
2705  return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
2706  }
2707  }
2708 
2709  // fold ((A+(B+or-C))-B) -> A+or-C
2710  if (N0.getOpcode() == ISD::ADD &&
2711  (N0.getOperand(1).getOpcode() == ISD::SUB ||
2712  N0.getOperand(1).getOpcode() == ISD::ADD) &&
2713  N0.getOperand(1).getOperand(0) == N1)
2714  return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
2715  N0.getOperand(1).getOperand(1));
2716 
2717  // fold ((A+(C+B))-B) -> A+C
2718  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
2719  N0.getOperand(1).getOperand(1) == N1)
2720  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
2721  N0.getOperand(1).getOperand(0));
2722 
2723  // fold ((A-(B-C))-C) -> A-B
2724  if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
2725  N0.getOperand(1).getOperand(1) == N1)
2726  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2727  N0.getOperand(1).getOperand(0));
2728 
2729  // fold (A-(B-C)) -> A+(C-B)
2730  if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
2731  return DAG.getNode(ISD::ADD, DL, VT, N0,
2732  DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
2733  N1.getOperand(0)));
2734 
2735  // fold (X - (-Y * Z)) -> (X + (Y * Z))
2736  if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
2737  if (N1.getOperand(0).getOpcode() == ISD::SUB &&
2739  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
2740  N1.getOperand(0).getOperand(1),
2741  N1.getOperand(1));
2742  return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
2743  }
2744  if (N1.getOperand(1).getOpcode() == ISD::SUB &&
2746  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
2747  N1.getOperand(0),
2748  N1.getOperand(1).getOperand(1));
2749  return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
2750  }
2751  }
2752 
2753  // If either operand of a sub is undef, the result is undef
2754  if (N0.isUndef())
2755  return N0;
2756  if (N1.isUndef())
2757  return N1;
2758 
2759  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2760  return V;
2761 
2762  if (SDValue V = foldAddSubOfSignBit(N, DAG))
2763  return V;
2764 
2765  if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
2766  return V;
2767 
2768  // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
2769  // rather than 'sub 0/1' (the sext should get folded).
2770  // sub X, (zext i1 Y) --> add X, (sext i1 Y)
2771  if (N1.getOpcode() == ISD::ZERO_EXTEND &&
2772  N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
2773  TLI.getBooleanContents(VT) ==
2775  SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
2776  return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
2777  }
2778 
2779  // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
2780  if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
2781  if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
2782  SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
2783  SDValue S0 = N1.getOperand(0);
2784  if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
2785  unsigned OpSizeInBits = VT.getScalarSizeInBits();
2787  if (C->getAPIntValue() == (OpSizeInBits - 1))
2788  return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
2789  }
2790  }
2791  }
2792 
2793  // If the relocation model supports it, consider symbol offsets.
2794  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
2795  if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2796  // fold (sub Sym, c) -> Sym-c
2797  if (N1C && GA->getOpcode() == ISD::GlobalAddress)
2798  return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
2799  GA->getOffset() -
2800  (uint64_t)N1C->getSExtValue());
2801  // fold (sub Sym+c1, Sym+c2) -> c1-c2
2802  if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
2803  if (GA->getGlobal() == GB->getGlobal())
2804  return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
2805  DL, VT);
2806  }
2807 
2808  // sub X, (sextinreg Y i1) -> add X, (and Y 1)
2809  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2810  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2811  if (TN->getVT() == MVT::i1) {
2812  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2813  DAG.getConstant(1, DL, VT));
2814  return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
2815  }
2816  }
2817 
2818  // Prefer an add for more folding potential and possibly better codegen:
2819  // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
2820  if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
2821  SDValue ShAmt = N1.getOperand(1);
2822  ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2823  if (ShAmtC && ShAmtC->getZExtValue() == N1.getScalarValueSizeInBits() - 1) {
2824  SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
2825  return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
2826  }
2827  }
2828 
2829  return SDValue();
2830 }
2831 
2832 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
2833  SDValue N0 = N->getOperand(0);
2834  SDValue N1 = N->getOperand(1);
2835  EVT VT = N0.getValueType();
2836  SDLoc DL(N);
2837 
2838  // fold vector ops
2839  if (VT.isVector()) {
2840  // TODO SimplifyVBinOp
2841 
2842  // fold (sub_sat x, 0) -> x, vector edition
2844  return N0;
2845  }
2846 
2847  // fold (sub_sat x, undef) -> 0
2848  if (N0.isUndef() || N1.isUndef())
2849  return DAG.getConstant(0, DL, VT);
2850 
2851  // fold (sub_sat x, x) -> 0
2852  if (N0 == N1)
2853  return DAG.getConstant(0, DL, VT);
2854 
2857  // fold (sub_sat c1, c2) -> c3
2858  return DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, N0.getNode(),
2859  N1.getNode());
2860  }
2861 
2862  // fold (sub_sat x, 0) -> x
2863  if (isNullConstant(N1))
2864  return N0;
2865 
2866  return SDValue();
2867 }
2868 
2869 SDValue DAGCombiner::visitSUBC(SDNode *N) {
2870  SDValue N0 = N->getOperand(0);
2871  SDValue N1 = N->getOperand(1);
2872  EVT VT = N0.getValueType();
2873  SDLoc DL(N);
2874 
2875  // If the flag result is dead, turn this into an SUB.
2876  if (!N->hasAnyUseOfValue(1))
2877  return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2878  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2879 
2880  // fold (subc x, x) -> 0 + no borrow
2881  if (N0 == N1)
2882  return CombineTo(N, DAG.getConstant(0, DL, VT),
2883  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2884 
2885  // fold (subc x, 0) -> x + no borrow
2886  if (isNullConstant(N1))
2887  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2888 
2889  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2890  if (isAllOnesConstant(N0))
2891  return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2892  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2893 
2894  return SDValue();
2895 }
2896 
2897 SDValue DAGCombiner::visitUSUBO(SDNode *N) {
2898  SDValue N0 = N->getOperand(0);
2899  SDValue N1 = N->getOperand(1);
2900  EVT VT = N0.getValueType();
2901  if (VT.isVector())
2902  return SDValue();
2903 
2904  EVT CarryVT = N->getValueType(1);
2905  SDLoc DL(N);
2906 
2907  // If the flag result is dead, turn this into an SUB.
2908  if (!N->hasAnyUseOfValue(1))
2909  return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2910  DAG.getUNDEF(CarryVT));
2911 
2912  // fold (usubo x, x) -> 0 + no borrow
2913  if (N0 == N1)
2914  return CombineTo(N, DAG.getConstant(0, DL, VT),
2915  DAG.getConstant(0, DL, CarryVT));
2916 
2917  // fold (usubo x, 0) -> x + no borrow
2918  if (isNullConstant(N1))
2919  return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2920 
2921  // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2922  if (isAllOnesConstant(N0))
2923  return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2924  DAG.getConstant(0, DL, CarryVT));
2925 
2926  return SDValue();
2927 }
2928 
2929 SDValue DAGCombiner::visitSUBE(SDNode *N) {
2930  SDValue N0 = N->getOperand(0);
2931  SDValue N1 = N->getOperand(1);
2932  SDValue CarryIn = N->getOperand(2);
2933 
2934  // fold (sube x, y, false) -> (subc x, y)
2935  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2936  return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
2937 
2938  return SDValue();
2939 }
2940 
2941 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
2942  SDValue N0 = N->getOperand(0);
2943  SDValue N1 = N->getOperand(1);
2944  SDValue CarryIn = N->getOperand(2);
2945 
2946  // fold (subcarry x, y, false) -> (usubo x, y)
2947  if (isNullConstant(CarryIn)) {
2948  if (!LegalOperations ||
2950  return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
2951  }
2952 
2953  return SDValue();
2954 }
2955 
2956 SDValue DAGCombiner::visitMUL(SDNode *N) {
2957  SDValue N0 = N->getOperand(0);
2958  SDValue N1 = N->getOperand(1);
2959  EVT VT = N0.getValueType();
2960 
2961  // fold (mul x, undef) -> 0
2962  if (N0.isUndef() || N1.isUndef())
2963  return DAG.getConstant(0, SDLoc(N), VT);
2964 
2965  bool N0IsConst = false;
2966  bool N1IsConst = false;
2967  bool N1IsOpaqueConst = false;
2968  bool N0IsOpaqueConst = false;
2969  APInt ConstValue0, ConstValue1;
2970  // fold vector ops
2971  if (VT.isVector()) {
2972  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2973  return FoldedVOp;
2974 
2975  N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
2976  N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
2977  assert((!N0IsConst ||
2978  ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
2979  "Splat APInt should be element width");
2980  assert((!N1IsConst ||
2981  ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
2982  "Splat APInt should be element width");
2983  } else {
2984  N0IsConst = isa<ConstantSDNode>(N0);
2985  if (N0IsConst) {
2986  ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
2987  N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
2988  }
2989  N1IsConst = isa<ConstantSDNode>(N1);
2990  if (N1IsConst) {
2991  ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
2992  N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
2993  }
2994  }
2995 
2996  // fold (mul c1, c2) -> c1*c2
2997  if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
2998  return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
2999  N0.getNode(), N1.getNode());
3000 
3001  // canonicalize constant to RHS (vector doesn't have to splat)
3004  return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3005  // fold (mul x, 0) -> 0
3006  if (N1IsConst && ConstValue1.isNullValue())
3007  return N1;
3008  // fold (mul x, 1) -> x
3009  if (N1IsConst && ConstValue1.isOneValue())
3010  return N0;
3011 
3012  if (SDValue NewSel = foldBinOpIntoSelect(N))
3013  return NewSel;
3014 
3015  // fold (mul x, -1) -> 0-x
3016  if (N1IsConst && ConstValue1.isAllOnesValue()) {
3017  SDLoc DL(N);
3018  return DAG.getNode(ISD::SUB, DL, VT,
3019  DAG.getConstant(0, DL, VT), N0);
3020  }
3021  // fold (mul x, (1 << c)) -> x << c
3022  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3023  DAG.isKnownToBeAPowerOfTwo(N1) &&
3024  (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3025  SDLoc DL(N);
3026  SDValue LogBase2 = BuildLogBase2(N1, DL);
3027  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3028  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3029  return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3030  }
3031  // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3032  if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
3033  unsigned Log2Val = (-ConstValue1).logBase2();
3034  SDLoc DL(N);
3035  // FIXME: If the input is something that is easily negated (e.g. a
3036  // single-use add), we should put the negate there.
3037  return DAG.getNode(ISD::SUB, DL, VT,
3038  DAG.getConstant(0, DL, VT),
3039  DAG.getNode(ISD::SHL, DL, VT, N0,
3040  DAG.getConstant(Log2Val, DL,
3041  getShiftAmountTy(N0.getValueType()))));
3042  }
3043 
3044  // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3045  // mul x, (2^N + 1) --> add (shl x, N), x
3046  // mul x, (2^N - 1) --> sub (shl x, N), x
3047  // Examples: x * 33 --> (x << 5) + x
3048  // x * 15 --> (x << 4) - x
3049  // x * -33 --> -((x << 5) + x)
3050  // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3051  if (N1IsConst && TLI.decomposeMulByConstant(VT, N1)) {
3052  // TODO: We could handle more general decomposition of any constant by
3053  // having the target set a limit on number of ops and making a
3054  // callback to determine that sequence (similar to sqrt expansion).
3055  unsigned MathOp = ISD::DELETED_NODE;
3056  APInt MulC = ConstValue1.abs();
3057  if ((MulC - 1).isPowerOf2())
3058  MathOp = ISD::ADD;
3059  else if ((MulC + 1).isPowerOf2())
3060  MathOp = ISD::SUB;
3061 
3062  if (MathOp != ISD::DELETED_NODE) {
3063  unsigned ShAmt = MathOp == ISD::ADD ? (MulC - 1).logBase2()
3064  : (MulC + 1).logBase2();
3065  assert(ShAmt > 0 && ShAmt < VT.getScalarSizeInBits() &&
3066  "Not expecting multiply-by-constant that could have simplified");
3067  SDLoc DL(N);
3068  SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, N0,
3069  DAG.getConstant(ShAmt, DL, VT));
3070  SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
3071  if (ConstValue1.isNegative())
3072  R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3073  return R;
3074  }
3075  }
3076 
3077  // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3078  if (N0.getOpcode() == ISD::SHL &&
3079  isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3080  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3081  SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3083  return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3084  }
3085 
3086  // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3087  // use.
3088  {
3089  SDValue Sh(nullptr, 0), Y(nullptr, 0);
3090 
3091  // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
3092  if (N0.getOpcode() == ISD::SHL &&
3094  N0.getNode()->hasOneUse()) {
3095  Sh = N0; Y = N1;
3096  } else if (N1.getOpcode() == ISD::SHL &&
3098  N1.getNode()->hasOneUse()) {
3099  Sh = N1; Y = N0;
3100  }
3101 
3102  if (Sh.getNode()) {
3103  SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3104  return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3105  }
3106  }
3107 
3108  // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3110  N0.getOpcode() == ISD::ADD &&
3112  isMulAddWithConstProfitable(N, N0, N1))
3113  return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3114  DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3115  N0.getOperand(0), N1),
3116  DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3117  N0.getOperand(1), N1));
3118 
3119  // reassociate mul
3120  if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3121  return RMUL;
3122 
3123  return SDValue();
3124 }
3125 
3126 /// Return true if divmod libcall is available.
3127 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
3128  const TargetLowering &TLI) {
3129  RTLIB::Libcall LC;
3130  EVT NodeType = Node->getValueType(0);
3131  if (!NodeType.isSimple())
3132  return false;
3133  switch (NodeType.getSimpleVT().SimpleTy) {
3134  default: return false; // No libcall for vector types.
3135  case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
3136  case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
3137  case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
3138  case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
3139  case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
3140  }
3141 
3142  return TLI.getLibcallName(LC) != nullptr;
3143 }
3144 
3145 /// Issue divrem if both quotient and remainder are needed.
3146 SDValue DAGCombiner::useDivRem(SDNode *Node) {
3147  if (Node->use_empty())
3148  return SDValue(); // This is a dead node, leave it alone.
3149 
3150  unsigned Opcode = Node->getOpcode();
3151  bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
3152  unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
3153 
3154  // DivMod lib calls can still work on non-legal types if using lib-calls.
3155  EVT VT = Node->getValueType(0);
3156  if (VT.isVector() || !VT.isInteger())
3157  return SDValue();
3158 
3159  if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
3160  return SDValue();
3161 
3162  // If DIVREM is going to get expanded into a libcall,
3163  // but there is no libcall available, then don't combine.
3164  if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
3165  !isDivRemLibcallAvailable(Node, isSigned, TLI))
3166  return SDValue();
3167 
3168  // If div is legal, it's better to do the normal expansion
3169  unsigned OtherOpcode = 0;
3170  if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
3171  OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
3172  if (TLI.isOperationLegalOrCustom(Opcode, VT))
3173  return SDValue();
3174  } else {
3175  OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3176  if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
3177  return SDValue();
3178  }
3179 
3180  SDValue Op0 = Node->getOperand(0);
3181  SDValue Op1 = Node->getOperand(1);
3182  SDValue combined;
3183  for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
3184  UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
3185  SDNode *User = *UI;
3186  if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
3187  User->use_empty())
3188  continue;
3189  // Convert the other matching node(s), too;
3190  // otherwise, the DIVREM may get target-legalized into something
3191  // target-specific that we won't be able to recognize.
3192  unsigned UserOpc = User->getOpcode();
3193  if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
3194  User->getOperand(0) == Op0 &&
3195  User->getOperand(1) == Op1) {
3196  if (!combined) {
3197  if (UserOpc == OtherOpcode) {
3198  SDVTList VTs = DAG.getVTList(VT, VT);
3199  combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
3200  } else if (UserOpc == DivRemOpc) {
3201  combined = SDValue(User, 0);
3202  } else {
3203  assert(UserOpc == Opcode);
3204  continue;
3205  }
3206  }
3207  if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
3208  CombineTo(User, combined);
3209  else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
3210  CombineTo(User, combined.getValue(1));
3211  }
3212  }
3213  return combined;
3214 }
3215 
3217  SDValue N0 = N->getOperand(0);
3218  SDValue N1 = N->getOperand(1);
3219  EVT VT = N->getValueType(0);
3220  SDLoc DL(N);
3221 
3222  unsigned Opc = N->getOpcode();
3223  bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
3225 
3226  // X / undef -> undef
3227  // X % undef -> undef
3228  // X / 0 -> undef
3229  // X % 0 -> undef
3230  // NOTE: This includes vectors where any divisor element is zero/undef.
3231  if (DAG.isUndef(Opc, {N0, N1}))
3232  return DAG.getUNDEF(VT);
3233 
3234  // undef / X -> 0
3235  // undef % X -> 0
3236  if (N0.isUndef())
3237  return DAG.getConstant(0, DL, VT);
3238 
3239  // 0 / X -> 0
3240  // 0 % X -> 0
3242  if (N0C && N0C->isNullValue())
3243  return N0;
3244 
3245  // X / X -> 1
3246  // X % X -> 0
3247  if (N0 == N1)
3248  return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
3249 
3250  // X / 1 -> X
3251  // X % 1 -> 0
3252  // If this is a boolean op (single-bit element type), we can't have
3253  // division-by-zero or remainder-by-zero, so assume the divisor is 1.
3254  // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
3255  // it's a 1.
3256  if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
3257  return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
3258 
3259  return SDValue();
3260 }
3261 
3262 SDValue DAGCombiner::visitSDIV(SDNode *N) {
3263  SDValue N0 = N->getOperand(0);
3264  SDValue N1 = N->getOperand(1);
3265  EVT VT = N->getValueType(0);
3266  EVT CCVT = getSetCCResultType(VT);
3267 
3268  // fold vector ops
3269  if (VT.isVector())
3270  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3271  return FoldedVOp;
3272 
3273  SDLoc DL(N);
3274 
3275  // fold (sdiv c1, c2) -> c1/c2
3278  if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
3279  return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
3280  // fold (sdiv X, -1) -> 0-X
3281  if (N1C && N1C->isAllOnesValue())
3282  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
3283  // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
3284  if (N1C && N1C->getAPIntValue().isMinSignedValue())
3285  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3286  DAG.getConstant(1, DL, VT),
3287  DAG.getConstant(0, DL, VT));
3288 
3289  if (SDValue V = simplifyDivRem(N, DAG))
3290  return V;
3291 
3292  if (SDValue NewSel = foldBinOpIntoSelect(N))
3293  return NewSel;
3294 
3295  // If we know the sign bits of both operands are zero, strength reduce to a
3296  // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
3297  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3298  return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
3299 
3300  if (SDValue V = visitSDIVLike(N0, N1, N)) {
3301  // If the corresponding remainder node exists, update its users with
3302  // (Dividend - (Quotient * Divisor).
3303  if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
3304  { N0, N1 })) {
3305  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3306  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3307  AddToWorklist(Mul.getNode());
3308  AddToWorklist(Sub.getNode());
3309  CombineTo(RemNode, Sub);
3310  }
3311  return V;
3312  }
3313 
3314  // sdiv, srem -> sdivrem
3315  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3316  // true. Otherwise, we break the simplification logic in visitREM().
3318  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3319  if (SDValue DivRem = useDivRem(N))
3320  return DivRem;
3321 
3322  return SDValue();
3323 }
3324 
3325 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3326  SDLoc DL(N);
3327  EVT VT = N->getValueType(0);
3328  EVT CCVT = getSetCCResultType(VT);
3329  unsigned BitWidth = VT.getScalarSizeInBits();
3330 
3331  // Helper for determining whether a value is a power-2 constant scalar or a
3332  // vector of such elements.
3333  auto IsPowerOfTwo = [](ConstantSDNode *C) {
3334  if (C->isNullValue() || C->isOpaque())
3335  return false;
3336  if (C->getAPIntValue().isPowerOf2())
3337  return true;
3338  if ((-C->getAPIntValue()).isPowerOf2())
3339  return true;
3340  return false;
3341  };
3342 
3343  // fold (sdiv X, pow2) -> simple ops after legalize
3344  // FIXME: We check for the exact bit here because the generic lowering gives
3345  // better results in that case. The target-specific lowering should learn how
3346  // to handle exact sdivs efficiently.
3347  if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
3348  // Target-specific implementation of sdiv x, pow2.
3349  if (SDValue Res = BuildSDIVPow2(N))
3350  return Res;
3351 
3352  // Create constants that are functions of the shift amount value.
3353  EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
3354  SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
3355  SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
3356  C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
3357  SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
3358  if (!isConstantOrConstantVector(Inexact))
3359  return SDValue();
3360 
3361  // Splat the sign bit into the register
3362  SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
3363  DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
3364  AddToWorklist(Sign.getNode());
3365 
3366  // Add (N0 < 0) ? abs2 - 1 : 0;
3367  SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
3368  AddToWorklist(Srl.getNode());
3369  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
3370  AddToWorklist(Add.getNode());
3371  SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
3372  AddToWorklist(Sra.getNode());
3373 
3374  // Special case: (sdiv X, 1) -> X
3375  // Special Case: (sdiv X, -1) -> 0-X
3376  SDValue One = DAG.getConstant(1, DL, VT);
3377  SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
3378  SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
3379  SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
3380  SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
3381  Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
3382 
3383  // If dividing by a positive value, we're done. Otherwise, the result must
3384  // be negated.
3385  SDValue Zero = DAG.getConstant(0, DL, VT);
3386  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
3387 
3388  // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
3389  SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
3390  SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
3391  return Res;
3392  }
3393 
3394  // If integer divide is expensive and we satisfy the requirements, emit an
3395  // alternate sequence. Targets may check function attributes for size/speed
3396  // trade-offs.
3398  if (isConstantOrConstantVector(N1) &&
3399  !TLI.isIntDivCheap(N->getValueType(0), Attr))
3400  if (SDValue Op = BuildSDIV(N))
3401  return Op;
3402 
3403  return SDValue();
3404 }
3405 
3406 SDValue DAGCombiner::visitUDIV(SDNode *N) {
3407  SDValue N0 = N->getOperand(0);
3408  SDValue N1 = N->getOperand(1);
3409  EVT VT = N->getValueType(0);
3410  EVT CCVT = getSetCCResultType(VT);
3411 
3412  // fold vector ops
3413  if (VT.isVector())
3414  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3415  return FoldedVOp;
3416 
3417  SDLoc DL(N);
3418 
3419  // fold (udiv c1, c2) -> c1/c2
3422  if (N0C && N1C)
3423  if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
3424  N0C, N1C))
3425  return Folded;
3426  // fold (udiv X, -1) -> select(X == -1, 1, 0)
3427  if (N1C && N1C->getAPIntValue().isAllOnesValue())
3428  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3429  DAG.getConstant(1, DL, VT),
3430  DAG.getConstant(0, DL, VT));
3431 
3432  if (SDValue V = simplifyDivRem(N, DAG))
3433  return V;
3434 
3435  if (SDValue NewSel = foldBinOpIntoSelect(N))
3436  return NewSel;
3437 
3438  if (SDValue V = visitUDIVLike(N0, N1, N)) {
3439  // If the corresponding remainder node exists, update its users with
3440  // (Dividend - (Quotient * Divisor).
3441  if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
3442  { N0, N1 })) {
3443  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3444  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3445  AddToWorklist(Mul.getNode());
3446  AddToWorklist(Sub.getNode());
3447  CombineTo(RemNode, Sub);
3448  }
3449  return V;
3450  }
3451 
3452  // sdiv, srem -> sdivrem
3453  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3454  // true. Otherwise, we break the simplification logic in visitREM().
3456  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3457  if (SDValue DivRem = useDivRem(N))
3458  return DivRem;
3459 
3460  return SDValue();
3461 }
3462 
3463 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3464  SDLoc DL(N);
3465  EVT VT = N->getValueType(0);
3466 
3467  // fold (udiv x, (1 << c)) -> x >>u c
3468  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3469  DAG.isKnownToBeAPowerOfTwo(N1)) {
3470  SDValue LogBase2 = BuildLogBase2(N1, DL);
3471  AddToWorklist(LogBase2.getNode());
3472 
3473  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3474  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3475  AddToWorklist(Trunc.getNode());
3476  return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3477  }
3478 
3479  // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
3480  if (N1.getOpcode() == ISD::SHL) {
3481  SDValue N10 = N1.getOperand(0);
3482  if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
3483  DAG.isKnownToBeAPowerOfTwo(N10)) {
3484  SDValue LogBase2 = BuildLogBase2(N10, DL);
3485  AddToWorklist(LogBase2.getNode());
3486 
3487  EVT ADDVT = N1.getOperand(1).getValueType();
3488  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
3489  AddToWorklist(Trunc.getNode());
3490  SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
3491  AddToWorklist(Add.getNode());
3492  return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
3493  }
3494  }
3495 
3496  // fold (udiv x, c) -> alternate
3498  if (isConstantOrConstantVector(N1) &&
3499  !TLI.isIntDivCheap(N->getValueType(0), Attr))
3500  if (SDValue Op = BuildUDIV(N))
3501  return Op;
3502 
3503  return SDValue();
3504 }
3505 
3506 // handles ISD::SREM and ISD::UREM
3507 SDValue DAGCombiner::visitREM(SDNode *N) {
3508  unsigned Opcode = N->getOpcode();
3509  SDValue N0 = N->getOperand(0);
3510  SDValue N1 = N->getOperand(1);
3511  EVT VT = N->getValueType(0);
3512  EVT CCVT = getSetCCResultType(VT);
3513 
3514  bool isSigned = (Opcode == ISD::SREM);
3515  SDLoc DL(N);
3516 
3517  // fold (rem c1, c2) -> c1%c2
3520  if (N0C && N1C)
3521  if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
3522  return Folded;
3523  // fold (urem X, -1) -> select(X == -1, 0, x)
3524  if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
3525  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3526  DAG.getConstant(0, DL, VT), N0);
3527 
3528  if (SDValue V = simplifyDivRem(N, DAG))
3529  return V;
3530 
3531  if (SDValue NewSel = foldBinOpIntoSelect(N))
3532  return NewSel;
3533 
3534  if (isSigned) {
3535  // If we know the sign bits of both operands are zero, strength reduce to a
3536  // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
3537  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3538  return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
3539  } else {
3540  SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
3541  if (DAG.isKnownToBeAPowerOfTwo(N1)) {
3542  // fold (urem x, pow2) -> (and x, pow2-1)
3543  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3544  AddToWorklist(Add.getNode());
3545  return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3546  }
3547  if (N1.getOpcode() == ISD::SHL &&
3548  DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
3549  // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
3550  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3551  AddToWorklist(Add.getNode());
3552  return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3553  }
3554  }
3555 
3557 
3558  // If X/C can be simplified by the division-by-constant logic, lower
3559  // X%C to the equivalent of X-X/C*C.
3560  // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
3561  // speculative DIV must not cause a DIVREM conversion. We guard against this
3562  // by skipping the simplification if isIntDivCheap(). When div is not cheap,
3563  // combine will not return a DIVREM. Regardless, checking cheapness here
3564  // makes sense since the simplification results in fatter code.
3565  if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
3566  SDValue OptimizedDiv =
3567  isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
3568  if (OptimizedDiv.getNode()) {
3569  // If the equivalent Div node also exists, update its users.
3570  unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3571  if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
3572  { N0, N1 }))
3573  CombineTo(DivNode, OptimizedDiv);
3574  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
3575  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3576  AddToWorklist(OptimizedDiv.getNode());
3577  AddToWorklist(Mul.getNode());
3578  return Sub;
3579  }
3580  }
3581 
3582  // sdiv, srem -> sdivrem
3583  if (SDValue DivRem = useDivRem(N))
3584  return DivRem.getValue(1);
3585 
3586  return SDValue();
3587 }
3588 
3589 SDValue DAGCombiner::visitMULHS(SDNode *N) {
3590  SDValue N0 = N->getOperand(0);
3591  SDValue N1 = N->getOperand(1);
3592  EVT VT = N->getValueType(0);
3593  SDLoc DL(N);
3594 
3595  if (VT.isVector()) {
3596  // fold (mulhs x, 0) -> 0
3598  return N1;
3600  return N0;
3601  }
3602 
3603  // fold (mulhs x, 0) -> 0
3604  if (isNullConstant(N1))
3605  return N1;
3606  // fold (mulhs x, 1) -> (sra x, size(x)-1)
3607  if (isOneConstant(N1))
3608  return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
3609  DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
3610  getShiftAmountTy(N0.getValueType())));
3611 
3612  // fold (mulhs x, undef) -> 0
3613  if (N0.isUndef() || N1.isUndef())
3614  return DAG.getConstant(0, DL, VT);
3615 
3616  // If the type twice as wide is legal, transform the mulhs to a wider multiply
3617  // plus a shift.
3618  if (VT.isSimple() && !VT.isVector()) {
3619  MVT Simple = VT.getSimpleVT();
3620  unsigned SimpleSize = Simple.getSizeInBits();
3621  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3622  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3623  N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
3624  N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
3625  N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3626  N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3627  DAG.getConstant(SimpleSize, DL,
3628  getShiftAmountTy(N1.getValueType())));
3629  return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3630  }
3631  }
3632 
3633  return SDValue();
3634 }
3635 
3636 SDValue DAGCombiner::visitMULHU(SDNode *N) {
3637  SDValue N0 = N->getOperand(0);
3638  SDValue N1 = N->getOperand(1);
3639  EVT VT = N->getValueType(0);
3640  SDLoc DL(N);
3641 
3642  if (VT.isVector()) {
3643  // fold (mulhu x, 0) -> 0
3645  return N1;
3647  return N0;
3648  }
3649 
3650  // fold (mulhu x, 0) -> 0
3651  if (isNullConstant(N1))
3652  return N1;
3653  // fold (mulhu x, 1) -> 0
3654  if (isOneConstant(N1))
3655  return DAG.getConstant(0, DL, N0.getValueType());
3656  // fold (mulhu x, undef) -> 0
3657  if (N0.isUndef() || N1.isUndef())
3658  return DAG.getConstant(0, DL, VT);
3659 
3660  // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
3661  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3662  DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
3663  SDLoc DL(N);
3664  unsigned NumEltBits = VT.getScalarSizeInBits();
3665  SDValue LogBase2 = BuildLogBase2(N1, DL);
3666  SDValue SRLAmt = DAG.getNode(
3667  ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
3668  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3669  SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
3670  return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3671  }
3672 
3673  // If the type twice as wide is legal, transform the mulhu to a wider multiply
3674  // plus a shift.
3675  if (VT.isSimple() && !VT.isVector()) {
3676  MVT Simple = VT.getSimpleVT();
3677  unsigned SimpleSize = Simple.getSizeInBits();
3678  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3679  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3680  N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
3681  N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
3682  N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3683  N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3684  DAG.getConstant(SimpleSize, DL,
3685  getShiftAmountTy(N1.getValueType())));
3686  return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3687  }
3688  }
3689 
3690  return SDValue();
3691 }
3692 
3693 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
3694 /// give the opcodes for the two computations that are being performed. Return
3695 /// true if a simplification was made.
3696 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
3697  unsigned HiOp) {
3698  // If the high half is not needed, just compute the low half.
3699  bool HiExists = N->hasAnyUseOfValue(1);
3700  if (!HiExists && (!LegalOperations ||
3701  TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
3702  SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3703  return CombineTo(N, Res, Res);
3704  }
3705 
3706  // If the low half is not needed, just compute the high half.
3707  bool LoExists = N->hasAnyUseOfValue(0);
3708  if (!LoExists && (!LegalOperations ||
3709  TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
3710  SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3711  return CombineTo(N, Res, Res);
3712  }
3713 
3714  // If both halves are used, return as it is.
3715  if (LoExists && HiExists)
3716  return SDValue();
3717 
3718  // If the two computed results can be simplified separately, separate them.
3719  if (LoExists) {
3720  SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3721  AddToWorklist(Lo.getNode());
3722  SDValue LoOpt = combine(Lo.getNode());
3723  if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
3724  (!LegalOperations ||
3725  TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
3726  return CombineTo(N, LoOpt, LoOpt);
3727  }
3728 
3729  if (HiExists) {
3730  SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3731  AddToWorklist(Hi.getNode());
3732  SDValue HiOpt = combine(Hi.getNode());
3733  if (HiOpt.getNode() && HiOpt != Hi &&
3734  (!LegalOperations ||
3735  TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
3736  return CombineTo(N, HiOpt, HiOpt);
3737  }
3738 
3739  return SDValue();
3740 }
3741 
3742 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
3743  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
3744  return Res;
3745 
3746  EVT VT = N->getValueType(0);
3747  SDLoc DL(N);
3748 
3749  // If the type is twice as wide is legal, transform the mulhu to a wider
3750  // multiply plus a shift.
3751  if (VT.isSimple() && !VT.isVector()) {
3752  MVT Simple = VT.getSimpleVT();
3753  unsigned SimpleSize = Simple.getSizeInBits();
3754  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3755  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3756  SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
3757  SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
3758  Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3759  // Compute the high part as N1.
3760  Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3761  DAG.getConstant(SimpleSize, DL,
3762  getShiftAmountTy(Lo.getValueType())));
3763  Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3764  // Compute the low part as N0.
3765  Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3766  return CombineTo(N, Lo, Hi);
3767  }
3768  }
3769 
3770  return SDValue();
3771 }
3772 
3773 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
3774  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
3775  return Res;
3776 
3777  EVT VT = N->getValueType(0);
3778  SDLoc DL(N);
3779 
3780  // If the type is twice as wide is legal, transform the mulhu to a wider
3781  // multiply plus a shift.
3782  if (VT.isSimple() && !VT.isVector()) {
3783  MVT Simple = VT.getSimpleVT();
3784  unsigned SimpleSize = Simple.getSizeInBits();
3785  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3786  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3787  SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
3788  SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
3789  Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3790  // Compute the high part as N1.
3791  Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3792  DAG.getConstant(SimpleSize, DL,
3793  getShiftAmountTy(Lo.getValueType())));
3794  Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3795  // Compute the low part as N0.
3796  Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3797  return CombineTo(N, Lo, Hi);
3798  }
3799  }
3800 
3801  return SDValue();
3802 }
3803 
3804 SDValue DAGCombiner::visitSMULO(SDNode *N) {
3805  // (smulo x, 2) -> (saddo x, x)
3806  if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3807  if (C2->getAPIntValue() == 2)
3808  return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
3809  N->getOperand(0), N->getOperand(0));
3810 
3811  return SDValue();
3812 }
3813 
3814 SDValue DAGCombiner::visitUMULO(SDNode *N) {
3815  // (umulo x, 2) -> (uaddo x, x)
3816  if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3817  if (C2->getAPIntValue() == 2)
3818  return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
3819  N->getOperand(0), N->getOperand(0));
3820 
3821  return SDValue();
3822 }
3823 
3824 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
3825  SDValue N0 = N->getOperand(0);
3826  SDValue N1 = N->getOperand(1);
3827  EVT VT = N0.getValueType();
3828 
3829  // fold vector ops
3830  if (VT.isVector())
3831  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3832  return FoldedVOp;
3833 
3834  // fold operation with constant operands.
3837  if (N0C && N1C)
3838  return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
3839 
3840  // canonicalize constant to RHS
3843  return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
3844 
3845  // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
3846  // Only do this if the current op isn't legal and the flipped is.
3847  unsigned Opcode = N->getOpcode();
3848  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3849  if (!TLI.isOperationLegal(Opcode, VT) &&
3850  (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
3851  (N1.isUndef() || DAG.SignBitIsZero(N1))) {
3852  unsigned AltOpcode;
3853  switch (Opcode) {
3854  case ISD::SMIN: AltOpcode = ISD::UMIN; break;
3855  case ISD::SMAX: AltOpcode = ISD::UMAX; break;
3856  case ISD::UMIN: AltOpcode = ISD::SMIN; break;
3857  case ISD::UMAX: AltOpcode = ISD::SMAX; break;
3858  default: llvm_unreachable("Unknown MINMAX opcode");
3859  }
3860  if (TLI.isOperationLegal(AltOpcode, VT))
3861  return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
3862  }
3863 
3864  return SDValue();
3865 }
3866 
3867 /// If this is a bitwise logic instruction and both operands have the same
3868 /// opcode, try to sink the other opcode after the logic instruction.
3869 SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
3870  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3871  EVT VT = N0.getValueType();
3872  unsigned LogicOpcode = N->getOpcode();
3873  unsigned HandOpcode = N0.getOpcode();
3874  assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
3875  LogicOpcode == ISD::XOR) && "Expected logic opcode");
3876  assert(HandOpcode == N1.getOpcode() && "Bad input!");
3877 
3878  // Bail early if none of these transforms apply.
3879  if (N0.getNumOperands() == 0)
3880  return SDValue();
3881 
3882  // FIXME: We should check number of uses of the operands to not increase
3883  // the instruction count for all transforms.
3884 
3885  // Handle size-changing casts.
3886  SDValue X = N0.getOperand(0);
3887  SDValue Y = N1.getOperand(0);
3888  EVT XVT = X.getValueType();
3889  SDLoc DL(N);
3890  if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
3891  HandOpcode == ISD::SIGN_EXTEND) {
3892  // If both operands have other uses, this transform would create extra
3893  // instructions without eliminating anything.
3894  if (!N0.hasOneUse() && !N1.hasOneUse())
3895  return SDValue();
3896  // We need matching integer source types.
3897  if (XVT != Y.getValueType())
3898  return SDValue();
3899  // Don't create an illegal op during or after legalization. Don't ever
3900  // create an unsupported vector op.
3901  if ((VT.isVector() || LegalOperations) &&
3902  !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
3903  return SDValue();
3904  // Avoid infinite looping with PromoteIntBinOp.
3905  // TODO: Should we apply desirable/legal constraints to all opcodes?
3906  if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
3907  !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
3908  return SDValue();
3909  // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
3910  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
3911  return DAG.getNode(HandOpcode, DL, VT, Logic);
3912  }
3913 
3914  // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
3915  if (HandOpcode == ISD::TRUNCATE) {
3916  // If both operands have other uses, this transform would create extra
3917  // instructions without eliminating anything.
3918  if (!N0.hasOneUse() && !N1.hasOneUse())
3919  return SDValue();
3920  // We need matching source types.
3921  if (XVT != Y.getValueType())
3922  return SDValue();
3923  // Don't create an illegal op during or after legalization.
3924  if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
3925  return SDValue();
3926  // Be extra careful sinking truncate. If it's free, there's no benefit in
3927  // widening a binop. Also, don't create a logic op on an illegal type.
3928  if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
3929  return SDValue();
3930  if (!TLI.isTypeLegal(XVT))
3931  return SDValue();
3932  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
3933  return DAG.getNode(HandOpcode, DL, VT, Logic);
3934  }
3935 
3936  // For binops SHL/SRL/SRA/AND:
3937  // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
3938  if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
3939  HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
3940  N0.getOperand(1) == N1.getOperand(1)) {
3941  // If either operand has other uses, this transform is not an improvement.
3942  if (!N0.hasOneUse() || !N1.hasOneUse())
3943  return SDValue();
3944  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
3945  return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
3946  }
3947 
3948  // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
3949  if (HandOpcode == ISD::BSWAP) {
3950  // If either operand has other uses, this transform is not an improvement.
3951  if (!N0.hasOneUse() || !N1.hasOneUse())
3952  return SDValue();
3953  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
3954  return DAG.getNode(HandOpcode, DL, VT, Logic);
3955  }
3956 
3957  // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
3958  // Only perform this optimization up until type legalization, before
3959  // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
3960  // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
3961  // we don't want to undo this promotion.
3962  // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
3963  // on scalars.
3964  if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
3965  Level <= AfterLegalizeTypes) {
3966  // Input types must be integer and the same.
3967  if (XVT.isInteger() && XVT == Y.getValueType()) {
3968  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
3969  return DAG.getNode(HandOpcode, DL, VT, Logic);
3970  }
3971  }
3972 
3973  // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
3974  // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
3975  // If both shuffles use the same mask, and both shuffle within a single
3976  // vector, then it is worthwhile to move the swizzle after the operation.
3977  // The type-legalizer generates this pattern when loading illegal
3978  // vector types from memory. In many cases this allows additional shuffle
3979  // optimizations.
3980  // There are other cases where moving the shuffle after the xor/and/or
3981  // is profitable even if shuffles don't perform a swizzle.
3982  // If both shuffles use the same mask, and both shuffles have the same first
3983  // or second operand, then it might still be profitable to move the shuffle
3984  // after the xor/and/or operation.
3985  if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
3986  auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
3987  auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
3988  assert(X.getValueType() == Y.getValueType() &&
3989  "Inputs to shuffles are not the same type");
3990 
3991  // Check that both shuffles use the same mask. The masks are known to be of
3992  // the same length because the result vector type is the same.
3993  // Check also that shuffles have only one use to avoid introducing extra
3994  // instructions.
3995  if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
3996  !SVN0->getMask().equals(SVN1->getMask()))
3997  return SDValue();
3998 
3999  // Don't try to fold this node if it requires introducing a
4000  // build vector of all zeros that might be illegal at this stage.
4001  SDValue ShOp = N0.getOperand(1);
4002  if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4003  ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4004 
4005  // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
4006  if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
4007  SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
4008  N0.getOperand(0), N1.getOperand(0));
4009  return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
4010  }
4011 
4012  // Don't try to fold this node if it requires introducing a
4013  // build vector of all zeros that might be illegal at this stage.
4014  ShOp = N0.getOperand(0);
4015  if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4016  ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4017 
4018  // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
4019  if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
4020  SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
4021  N1.getOperand(1));
4022  return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
4023  }
4024  }
4025 
4026  return SDValue();
4027 }
4028 
4029 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
4030 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
4031  const SDLoc &DL) {
4032  SDValue LL, LR, RL, RR, N0CC, N1CC;
4033  if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
4034  !isSetCCEquivalent(N1, RL, RR, N1CC))
4035  return SDValue();
4036 
4037  assert(N0.getValueType() == N1.getValueType() &&
4038  "Unexpected operand types for bitwise logic op");
4039  assert(LL.getValueType() == LR.getValueType() &&
4040  RL.getValueType() == RR.getValueType() &&
4041  "Unexpected operand types for setcc");
4042 
4043  // If we're here post-legalization or the logic op type is not i1, the logic
4044  // op type must match a setcc result type. Also, all folds require new
4045  // operations on the left and right operands, so those types must match.
4046  EVT VT = N0.getValueType();
4047  EVT OpVT = LL.getValueType();
4048  if (LegalOperations || VT.getScalarType() != MVT::i1)
4049  if (VT != getSetCCResultType(OpVT))
4050  return SDValue();
4051  if (OpVT != RL.getValueType())
4052  return SDValue();
4053 
4054  ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
4055  ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
4056  bool IsInteger = OpVT.isInteger();
4057  if (LR == RR && CC0 == CC1 && IsInteger) {
4058  bool IsZero = isNullOrNullSplat(LR);
4059  bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
4060 
4061  // All bits clear?
4062  bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
4063  // All sign bits clear?
4064  bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
4065  // Any bits set?
4066  bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
4067  // Any sign bits set?
4068  bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
4069 
4070  // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
4071  // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
4072  // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
4073  // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
4074  if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
4075  SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
4076  AddToWorklist(Or.getNode());
4077  return DAG.getSetCC(DL, VT, Or, LR, CC1);
4078  }
4079 
4080  // All bits set?
4081  bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
4082  // All sign bits set?
4083  bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
4084  // Any bits clear?
4085  bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
4086  // Any sign bits clear?
4087  bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
4088 
4089  // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
4090  // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
4091  // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
4092  // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
4093  if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
4094  SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
4095  AddToWorklist(And.getNode());
4096  return DAG.getSetCC(DL, VT, And, LR, CC1);
4097  }
4098  }
4099 
4100  // TODO: What is the 'or' equivalent of this fold?
4101  // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
4102  if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
4103  IsInteger && CC0 == ISD::SETNE &&
4104  ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
4105  (isAllOnesConstant(LR) && isNullConstant(RR)))) {
4106  SDValue One = DAG.getConstant(1, DL, OpVT);
4107  SDValue Two = DAG.getConstant(2, DL, OpVT);
4108  SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
4109  AddToWorklist(Add.getNode());
4110  return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
4111  }
4112 
4113  // Try more general transforms if the predicates match and the only user of
4114  // the compares is the 'and' or 'or'.
4115  if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
4116  N0.hasOneUse() && N1.hasOneUse()) {
4117  // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
4118  // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
4119  if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
4120  SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
4121  SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
4122  SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
4123  SDValue Zero = DAG.getConstant(0, DL, OpVT);
4124  return DAG.getSetCC(DL, VT, Or, Zero, CC1);
4125  }
4126 
4127  // Turn compare of constants whose difference is 1 bit into add+and+setcc.
4128  if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
4129  // Match a shared variable operand and 2 non-opaque constant operands.
4132  if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
4133  // Canonicalize larger constant as C0.
4134  if (C1->getAPIntValue().ugt(C0->getAPIntValue()))
4135  std::swap(C0, C1);
4136 
4137  // The difference of the constants must be a single bit.
4138  const APInt &C0Val = C0->getAPIntValue();
4139  const APInt &C1Val = C1->getAPIntValue();
4140  if ((C0Val - C1Val).isPowerOf2()) {
4141  // and/or (setcc X, C0, ne), (setcc X, C1, ne/eq) -->
4142  // setcc ((add X, -C1), ~(C0 - C1)), 0, ne/eq
4143  SDValue OffsetC = DAG.getConstant(-C1Val, DL, OpVT);
4144  SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LL, OffsetC);
4145  SDValue MaskC = DAG.getConstant(~(C0Val - C1Val), DL, OpVT);
4146  SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Add, MaskC);
4147  SDValue Zero = DAG.getConstant(0, DL, OpVT);
4148  return DAG.getSetCC(DL, VT, And, Zero, CC0);
4149  }
4150  }
4151  }
4152  }
4153 
4154  // Canonicalize equivalent operands to LL == RL.
4155  if (LL == RR && LR == RL) {
4156  CC1 = ISD::getSetCCSwappedOperands(CC1);
4157  std::swap(RL, RR);
4158  }
4159 
4160  // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4161  // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4162  if (LL == RL && LR == RR) {
4163  ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
4164  : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
4165  if (NewCC != ISD::SETCC_INVALID &&
4166  (!LegalOperations ||
4167  (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
4168  TLI.isOperationLegal(ISD::SETCC, OpVT))))
4169  return DAG.getSetCC(DL, VT, LL, LR, NewCC);
4170  }
4171 
4172  return SDValue();
4173 }
4174 
4175 /// This contains all DAGCombine rules which reduce two values combined by
4176 /// an And operation to a single value. This makes them reusable in the context
4177 /// of visitSELECT(). Rules involving constants are not included as
4178 /// visitSELECT() already handles those cases.
4179 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
4180  EVT VT = N1.getValueType();
4181  SDLoc DL(N);
4182 
4183  // fold (and x, undef) -> 0
4184  if (N0.isUndef() || N1.isUndef())
4185  return DAG.getConstant(0, DL, VT);
4186 
4187  if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
4188  return V;
4189 
4190  if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
4191  VT.getSizeInBits() <= 64) {
4192  if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4193  if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
4194  // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
4195  // immediate for an add, but it is legal if its top c2 bits are set,
4196  // transform the ADD so the immediate doesn't need to be materialized
4197  // in a register.
4198  APInt ADDC = ADDI->getAPIntValue();
4199  APInt SRLC = SRLI->getAPIntValue();
4200  if (ADDC.getMinSignedBits() <= 64 &&
4201  SRLC.ult(VT.getSizeInBits()) &&
4202  !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4204  SRLC.getZExtValue());
4205  if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
4206  ADDC |= Mask;
4207  if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4208  SDLoc DL0(N0);
4209  SDValue NewAdd =
4210  DAG.getNode(ISD::ADD, DL0, VT,
4211  N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
4212  CombineTo(N0.getNode(), NewAdd);
4213  // Return N so it doesn't get rechecked!
4214  return SDValue(N, 0);
4215  }
4216  }
4217  }
4218  }
4219  }
4220  }
4221 
4222  // Reduce bit extract of low half of an integer to the narrower type.
4223  // (and (srl i64:x, K), KMask) ->
4224  // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
4225  if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
4226  if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
4227  if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4228  unsigned Size = VT.getSizeInBits();
4229  const APInt &AndMask = CAnd->getAPIntValue();
4230  unsigned ShiftBits = CShift->getZExtValue();
4231 
4232  // Bail out, this node will probably disappear anyway.
4233  if (ShiftBits == 0)
4234  return SDValue();
4235 
4236  unsigned MaskBits = AndMask.countTrailingOnes();
4237  EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
4238 
4239  if (AndMask.isMask() &&
4240  // Required bits must not span the two halves of the integer and
4241  // must fit in the half size type.
4242  (ShiftBits + MaskBits <= Size / 2) &&
4243  TLI.isNarrowingProfitable(VT, HalfVT) &&
4244  TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
4245  TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
4246  TLI.isTruncateFree(VT, HalfVT) &&
4247  TLI.isZExtFree(HalfVT, VT)) {
4248  // The isNarrowingProfitable is to avoid regressions on PPC and
4249  // AArch64 which match a few 64-bit bit insert / bit extract patterns
4250  // on downstream users of this. Those patterns could probably be
4251  // extended to handle extensions mixed in.
4252 
4253  SDValue SL(N0);
4254  assert(MaskBits <= Size);
4255 
4256  // Extracting the highest bit of the low half.
4257  EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
4258  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
4259  N0.getOperand(0));
4260 
4261  SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
4262  SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
4263  SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
4264  SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
4265  return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
4266  }
4267  }
4268  }
4269  }
4270 
4271  return SDValue();
4272 }
4273 
4274 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
4275  EVT LoadResultTy, EVT &ExtVT) {
4276  if (!AndC->getAPIntValue().isMask())
4277  return false;
4278 
4279  unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
4280 
4281  ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4282  EVT LoadedVT = LoadN->getMemoryVT();
4283 
4284  if (ExtVT == LoadedVT &&
4285  (!LegalOperations ||
4286  TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
4287  // ZEXTLOAD will match without needing to change the size of the value being
4288  // loaded.
4289  return true;
4290  }
4291 
4292  // Do not change the width of a volatile load.
4293  if (LoadN->isVolatile())
4294  return false;
4295 
4296  // Do not generate loads of non-round integer types since these can
4297  // be expensive (and would be wrong if the type is not byte sized).
4298  if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
4299  return false;
4300 
4301  if (LegalOperations &&
4302  !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
4303  return false;
4304 
4305  if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
4306  return false;
4307 
4308  return true;
4309 }
4310 
4311 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
4312  ISD::LoadExtType ExtType, EVT &MemVT,
4313  unsigned ShAmt) {
4314  if (!LDST)
4315  return false;
4316  // Only allow byte offsets.
4317  if (ShAmt % 8)
4318  return false;
4319 
4320  // Do not generate loads of non-round integer types since these can
4321  // be expensive (and would be wrong if the type is not byte sized).
4322  if (!MemVT.isRound())
4323  return false;
4324 
4325  // Don't change the width of a volatile load.
4326  if (LDST->isVolatile())
4327  return false;
4328 
4329  // Verify that we are actually reducing a load width here.
4330  if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
4331  return false;
4332 
4333  // Ensure that this isn't going to produce an unsupported unaligned access.
4334  if (ShAmt &&
4335  !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
4336  LDST->getAddressSpace(), ShAmt / 8))
4337  return false;
4338 
4339  // It's not possible to generate a constant of extended or untyped type.
4340  EVT PtrType = LDST->getBasePtr().getValueType();
4341  if (PtrType == MVT::Untyped || PtrType.isExtended())
4342  return false;
4343 
4344  if (isa<LoadSDNode>(LDST)) {
4345  LoadSDNode *Load = cast<LoadSDNode>(LDST);
4346  // Don't transform one with multiple uses, this would require adding a new
4347  // load.
4348  if (!SDValue(Load, 0).hasOneUse())
4349  return false;
4350 
4351  if (LegalOperations &&
4352  !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
4353  return false;
4354 
4355  // For the transform to be legal, the load must produce only two values
4356  // (the value loaded and the chain). Don't transform a pre-increment
4357  // load, for example, which produces an extra value. Otherwise the
4358  // transformation is not equivalent, and the downstream logic to replace
4359  // uses gets things wrong.
4360  if (Load->getNumValues() > 2)
4361  return false;
4362 
4363  // If the load that we're shrinking is an extload and we're not just
4364  // discarding the extension we can't simply shrink the load. Bail.
4365  // TODO: It would be possible to merge the extensions in some cases.
4366  if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
4367  Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4368  return false;
4369 
4370  if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
4371  return false;
4372  } else {
4373  assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
4374  StoreSDNode *Store = cast<StoreSDNode>(LDST);
4375  // Can't write outside the original store
4376  if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4377  return false;
4378 
4379  if (LegalOperations &&
4380  !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
4381  return false;
4382  }
4383  return true;
4384 }
4385 
4386 bool DAGCombiner::SearchForAndLoads(SDNode *N,
4388  SmallPtrSetImpl<SDNode*> &NodesWithConsts,
4390  SDNode *&NodeToMask) {
4391  // Recursively search for the operands, looking for loads which can be
4392  // narrowed.
4393  for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
4394  SDValue Op = N->getOperand(i);
4395 
4396  if (Op.getValueType().isVector())
4397  return false;
4398 
4399  // Some constants may need fixing up later if they are too large.
4400  if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4401  if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
4402  (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
4403  NodesWithConsts.insert(N);
4404  continue;
4405  }
4406 
4407  if (!Op.hasOneUse())
4408  return false;
4409 
4410  switch(Op.getOpcode()) {
4411  case ISD::LOAD: {
4412  auto *Load = cast<LoadSDNode>(Op);
4413  EVT ExtVT;
4414  if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
4415  isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
4416 
4417  // ZEXTLOAD is already small enough.
4418  if (Load->getExtensionType() == ISD::ZEXTLOAD &&
4419  ExtVT.bitsGE(Load->getMemoryVT()))
4420  continue;
4421 
4422  // Use LE to convert equal sized loads to zext.
4423  if (ExtVT.bitsLE(Load->getMemoryVT()))
4424  Loads.push_back(Load);
4425 
4426  continue;
4427  }
4428  return false;
4429  }
4430  case ISD::ZERO_EXTEND:
4431  case ISD::AssertZext: {
4432  unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
4433  EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4434  EVT VT = Op.getOpcode() == ISD::AssertZext ?
4435  cast<VTSDNode>(Op.getOperand(1))->getVT() :
4436  Op.getOperand(0).getValueType();
4437 
4438  // We can accept extending nodes if the mask is wider or an equal
4439  // width to the original type.
4440  if (ExtVT.bitsGE(VT))
4441  continue;
4442  break;
4443  }
4444  case ISD::OR:
4445  case ISD::XOR:
4446  case ISD::AND:
4447  if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
4448  NodeToMask))
4449  return false;
4450  continue;
4451  }
4452 
4453  // Allow one node which will masked along with any loads found.
4454  if (NodeToMask)
4455  return false;
4456 
4457  // Also ensure that the node to be masked only produces one data result.
4458  NodeToMask = Op.getNode();
4459  if (NodeToMask->getNumValues() > 1) {
4460  bool HasValue = false;
4461  for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
4462  MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
4463  if (VT != MVT::Glue && VT != MVT::Other) {
4464  if (HasValue) {
4465  NodeToMask = nullptr;
4466  return false;
4467  }
4468  HasValue = true;
4469  }
4470  }
4471  assert(HasValue && "Node to be masked has no data result?");
4472  }
4473  }
4474  return true;
4475 }
4476 
4477 bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
4478  auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
4479  if (!Mask)
4480  return false;
4481 
4482  if (!Mask->getAPIntValue().isMask())
4483  return false;
4484 
4485  // No need to do anything if the and directly uses a load.
4486  if (isa<LoadSDNode>(N->getOperand(0)))
4487  return false;
4488 
4490  SmallPtrSet<SDNode*, 2> NodesWithConsts;
4491  SDNode *FixupNode = nullptr;
4492  if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
4493  if (Loads.size() == 0)
4494  return false;
4495 
4496  LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
4497  SDValue MaskOp = N->getOperand(1);
4498 
4499  // If it exists, fixup the single node we allow in the tree that needs
4500  // masking.
4501  if (FixupNode) {
4502  LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
4503  SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
4504  FixupNode->getValueType(0),
4505  SDValue(FixupNode, 0), MaskOp);
4506  DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
4507  if (And.getOpcode() == ISD ::AND)
4508  DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
4509  }
4510 
4511  // Narrow any constants that need it.
4512  for (auto *LogicN : NodesWithConsts) {
4513  SDValue Op0 = LogicN->getOperand(0);
4514  SDValue Op1 = LogicN->getOperand(1);
4515 
4516  if (isa<ConstantSDNode>(Op0))
4517  std::swap(Op0, Op1);
4518 
4519  SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
4520  Op1, MaskOp);
4521 
4522  DAG.UpdateNodeOperands(LogicN, Op0, And);
4523  }
4524 
4525  // Create narrow loads.
4526  for (auto *Load : Loads) {
4527  LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
4528  SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
4529  SDValue(Load, 0), MaskOp);
4530  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
4531  if (And.getOpcode() == ISD ::AND)
4532  And = SDValue(
4533  DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
4534  SDValue NewLoad = ReduceLoadWidth(And.getNode());
4535  assert(NewLoad &&
4536  "Shouldn't be masking the load if it can't be narrowed");
4537  CombineTo(Load, NewLoad, NewLoad.getValue(1));
4538  }
4539  DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
4540  return true;
4541  }
4542  return false;
4543 }
4544 
4545 // Unfold
4546 // x & (-1 'logical shift' y)
4547 // To
4548 // (x 'opposite logical shift' y) 'logical shift' y
4549 // if it is better for performance.
4550 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
4551  assert(N->getOpcode() == ISD::AND);
4552 
4553  SDValue N0 = N->getOperand(0);
4554  SDValue N1 = N->getOperand(1);
4555 
4556  // Do we actually prefer shifts over mask?
4557  if (!TLI.preferShiftsToClearExtremeBits(N0))
4558  return SDValue();
4559 
4560  // Try to match (-1 '[outer] logical shift' y)
4561  unsigned OuterShift;
4562  unsigned InnerShift; // The opposite direction to the OuterShift.
4563  SDValue Y; // Shift amount.
4564  auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
4565  if (!M.hasOneUse())
4566  return false;
4567  OuterShift = M->getOpcode();
4568  if (OuterShift == ISD::SHL)
4569  InnerShift = ISD::SRL;
4570  else if (OuterShift == ISD::SRL)
4571  InnerShift = ISD::SHL;
4572  else
4573  return false;
4574  if (!isAllOnesConstant(M->getOperand(0)))
4575  return false;
4576  Y = M->getOperand(1);
4577  return true;
4578  };
4579 
4580  SDValue X;
4581  if (matchMask(N1))
4582  X = N0;
4583  else if (matchMask(N0))
4584  X = N1;
4585  else
4586  return SDValue();
4587 
4588  SDLoc DL(N);
4589  EVT VT = N->getValueType(0);
4590 
4591  // tmp = x 'opposite logical shift' y
4592  SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
4593  // ret = tmp 'logical shift' y
4594  SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
4595 
4596  return T1;
4597 }
4598 
4599 SDValue DAGCombiner::visitAND(SDNode *N) {
4600  SDValue N0 = N->getOperand(0);
4601  SDValue N1 = N->getOperand(1);
4602  EVT VT = N1.getValueType();
4603 
4604  // x & x --> x
4605  if (N0 == N1)
4606  return N0;
4607 
4608  // fold vector ops
4609  if (VT.isVector()) {
4610  if (SDValue FoldedVOp = SimplifyVBinOp(N))
4611  return FoldedVOp;
4612 
4613  // fold (and x, 0) -> 0, vector edition
4615  // do not return N0, because undef node may exist in N0
4617  SDLoc(N), N0.getValueType());
4619  // do not return N1, because undef node may exist in N1
4621  SDLoc(N), N1.getValueType());
4622 
4623  // fold (and x, -1) -> x, vector edition
4625  return N1;
4627  return N0;
4628  }
4629 
4630  // fold (and c1, c2) -> c1&c2
4633  if (N0C && N1C && !N1C->isOpaque())
4634  return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
4635  // canonicalize constant to RHS
4638  return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
4639  // fold (and x, -1) -> x
4640  if (isAllOnesConstant(N1))
4641  return N0;
4642  // if (and x, c) is known to be zero, return 0
4643  unsigned BitWidth = VT.getScalarSizeInBits();
4644  if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
4645  APInt::getAllOnesValue(BitWidth)))
4646  return DAG.getConstant(0, SDLoc(N), VT);
4647 
4648  if (SDValue NewSel = foldBinOpIntoSelect(N))
4649  return NewSel;
4650 
4651  // reassociate and
4652  if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
4653  return RAND;
4654 
4655  // Try to convert a constant mask AND into a shuffle clear mask.
4656  if (VT.isVector())
4657  if (SDValue Shuffle = XformToShuffleWithZero(N))
4658  return Shuffle;
4659 
4660  // fold (and (or x, C), D) -> D if (C & D) == D
4661  auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
4662  return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
4663  };
4664  if (N0.getOpcode() == ISD::OR &&
4665  ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
4666  return N1;
4667  // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
4668  if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
4669  SDValue N0Op0 = N0.getOperand(0);
4670  APInt Mask = ~N1C->getAPIntValue();
4671  Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
4672  if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
4673  SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
4674  N0.getValueType(), N0Op0);
4675 
4676  // Replace uses of the AND with uses of the Zero extend node.
4677  CombineTo(N, Zext);
4678 
4679  // We actually want to replace all uses of the any_extend with the
4680  // zero_extend, to avoid duplicating things. This will later cause this
4681  // AND to be folded.
4682  CombineTo(N0.getNode(), Zext);
4683  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4684  }
4685  }
4686  // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
4687  // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
4688  // already be zero by virtue of the width of the base type of the load.
4689  //
4690  // the 'X' node here can either be nothing or an extract_vector_elt to catch
4691  // more cases.
4692  if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4694  N0.getOperand(0).getOpcode() == ISD::LOAD &&
4695  N0.getOperand(0).getResNo() == 0) ||
4696  (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
4697  LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
4698  N0 : N0.getOperand(0) );
4699 
4700  // Get the constant (if applicable) the zero'th operand is being ANDed with.
4701  // This can be a pure constant or a vector splat, in which case we treat the
4702  // vector as a scalar and use the splat value.
4704  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
4705  Constant = C->getAPIntValue();
4706  } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
4707  APInt SplatValue, SplatUndef;
4708  unsigned SplatBitSize;
4709  bool HasAnyUndefs;
4710  bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
4711  SplatBitSize, HasAnyUndefs);
4712  if (IsSplat) {
4713  // Undef bits can contribute to a possible optimisation if set, so
4714  // set them.
4715  SplatValue |= SplatUndef;
4716 
4717  // The splat value may be something like "0x00FFFFFF", which means 0 for
4718  // the first vector value and FF for the rest, repeating. We need a mask
4719  // that will apply equally to all members of the vector, so AND all the
4720  // lanes of the constant together.
4721  EVT VT = Vector->getValueType(0);
4722  unsigned BitWidth = VT.getScalarSizeInBits();
4723 
4724  // If the splat value has been compressed to a bitlength lower
4725  // than the size of the vector lane, we need to re-expand it to
4726  // the lane size.
4727  if (BitWidth > SplatBitSize)
4728  for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
4729  SplatBitSize < BitWidth;
4730  SplatBitSize = SplatBitSize * 2)
4731  SplatValue |= SplatValue.shl(SplatBitSize);
4732 
4733  // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
4734  // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
4735  if (SplatBitSize % BitWidth == 0) {
4736  Constant = APInt::getAllOnesValue(BitWidth);
4737  for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
4738  Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
4739  }
4740  }
4741  }
4742 
4743  // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
4744  // actually legal and isn't going to get expanded, else this is a false
4745  // optimisation.
4746  bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
4747  Load->getValueType(0),
4748  Load->getMemoryVT());
4749 
4750  // Resize the constant to the same size as the original memory access before
4751  // extension. If it is still the AllOnesValue then this AND is completely
4752  // unneeded.
4753  Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
4754 
4755  bool B;
4756  switch (Load->getExtensionType()) {
4757  default: B = false; break;
4758  case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
4759  case ISD::ZEXTLOAD:
4760  case ISD::NON_EXTLOAD: B = true; break;
4761  }
4762 
4763  if (B && Constant.isAllOnesValue()) {
4764  // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
4765  // preserve semantics once we get rid of the AND.
4766  SDValue NewLoad(Load, 0);
4767 
4768  // Fold the AND away. NewLoad may get replaced immediately.
4769  CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
4770 
4771  if (Load->getExtensionType() == ISD::EXTLOAD) {
4772  NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
4773  Load->getValueType(0), SDLoc(Load),
4774  Load->getChain(), Load->getBasePtr(),
4775  Load->getOffset(), Load->getMemoryVT(),
4776  Load->getMemOperand());
4777  // Replace uses of the EXTLOAD with the new ZEXTLOAD.
4778  if (Load->getNumValues() == 3) {
4779  // PRE/POST_INC loads have 3 values.
4780  SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
4781  NewLoad.getValue(2) };
4782  CombineTo(Load, To, 3, true);
4783  } else {
4784  CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
4785  }
4786  }
4787 
4788  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4789  }
4790  }
4791 
4792  // fold (and (load x), 255) -> (zextload x, i8)
4793  // fold (and (extload x, i16), 255) -> (zextload x, i8)
4794  // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
4795  if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
4796  (N0.getOpcode() == ISD::ANY_EXTEND &&
4797  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
4798  if (SDValue Res = ReduceLoadWidth(N)) {
4799  LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
4800  ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
4801  AddToWorklist(N);
4802  DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
4803  return SDValue(N, 0);
4804  }
4805  }
4806 
4807  if (Level >= AfterLegalizeTypes) {
4808  // Attempt to propagate the AND back up to the leaves which, if they're
4809  // loads, can be combined to narrow loads and the AND node can be removed.
4810  // Perform after legalization so that extend nodes will already be
4811  // combined into the loads.
4812  if (BackwardsPropagateMask(N, DAG)) {
4813  return SDValue(N, 0);
4814  }
4815  }
4816 
4817  if (SDValue Combined = visitANDLike(N0, N1, N))
4818  return Combined;
4819 
4820  // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
4821  if (N0.getOpcode() == N1.getOpcode())
4822  if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
4823  return V;
4824 
4825  // Masking the negated extension of a boolean is just the zero-extended
4826  // boolean:
4827  // and (sub 0, zext(bool X)), 1 --> zext(bool X)
4828  // and (sub 0, sext(bool X)), 1 --> zext(bool X)
4829  //
4830  // Note: the SimplifyDemandedBits fold below can make an information-losing
4831  // transform, and then we have no way to find this better fold.
4832  if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
4833  if (isNullOrNullSplat(N0.getOperand(0))) {
4834  SDValue SubRHS = N0.getOperand(1);
4835  if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
4836  SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4837  return SubRHS;
4838  if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
4839  SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4840  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
4841  }
4842  }
4843 
4844  // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
4845  // fold (and (sra)) -> (and (srl)) when possible.
4846  if (SimplifyDemandedBits(SDValue(N, 0)))
4847  return SDValue(N, 0);
4848 
4849  // fold (zext_inreg (extload x)) -> (zextload x)
4850  if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
4851  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4852  EVT MemVT = LN0->getMemoryVT();
4853  // If we zero all the possible extended bits, then we can turn this into
4854  // a zextload if we are running before legalize or the operation is legal.
4855  unsigned BitWidth = N1.getScalarValueSizeInBits();
4856  if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4857  BitWidth - MemVT.getScalarSizeInBits())) &&
4858  ((!LegalOperations && !LN0->isVolatile()) ||
4859  TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
4860  SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4861  LN0->getChain(), LN0->getBasePtr(),
4862  MemVT, LN0->getMemOperand());
4863  AddToWorklist(N);
4864  CombineTo(N0.getNode(), ExtL